/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1999-2012 Apple Inc. All rights reserved.
*
- * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the
- * License may not be used to create, or enable the creation or
- * redistribution of, unlawful or unlicensed copies of an Apple operating
- * system, or to circumvent, violate, or enable the circumvention or
- * violation of, any terms of an Apple operating system software license
- * agreement.
- *
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
* limitations under the License.
- *
- * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/*
- * Copyright (c) 1999 Apple Computer, Inc.
- *
- * Data Link Inteface Layer
- * Author: Ted Walker
+ * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
+ * support for mandatory and extensible security protections. This notice
+ * is included in support of clause 2.2 (b) of the Apple Public License,
+ * Version 2.0.
*/
#include <sys/param.h>
#include <sys/socket.h>
#include <sys/domain.h>
#include <sys/user.h>
+#include <sys/random.h>
#include <net/if_dl.h>
#include <net/if.h>
#include <net/route.h>
#include <net/if_arp.h>
#include <sys/kern_event.h>
#include <sys/kdebug.h>
+#include <sys/mcache.h>
#include <kern/assert.h>
#include <kern/task.h>
#include <kern/thread.h>
#include <kern/sched_prim.h>
#include <kern/locks.h>
+#include <kern/zalloc.h>
+#include <net/kpi_protocol.h>
#include <net/if_types.h>
+#include <net/if_llreach.h>
#include <net/kpi_interfacefilter.h>
+#if INET
+#include <netinet/in_var.h>
+#include <netinet/igmp_var.h>
+#endif /* INET */
+
+#if INET6
+#include <netinet6/in6_var.h>
+#include <netinet6/nd6.h>
+#include <netinet6/mld6_var.h>
+#endif /* INET6 */
+
+#if NETAT
+#include <netat/at_var.h>
+#endif /* NETAT */
+
#include <libkern/OSAtomic.h>
#include <machine/machine_routines.h>
+#include <mach/thread_act.h>
+#include <mach/sdt.h>
+
+#if CONFIG_MACF_NET
+#include <security/mac_framework.h>
+#endif /* MAC_NET */
+
+#if PF
+#include <net/pfvar.h>
+#endif /* PF */
+
#define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
#define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
#define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
#define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
-#define MAX_DL_TAGS 16
-#define MAX_DLIL_FILTERS 16
#define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
#define MAX_LINKADDR 4 /* LONGWORDS */
#define M_NKE M_IFADDR
-#define PFILT(x) ((struct dlil_filterq_entry *) (x))->variants.pr_filter
-#define IFILT(x) ((struct dlil_filterq_entry *) (x))->variants.if_filter
-
-#if 0
+#if 1
#define DLIL_PRINTF printf
#else
#define DLIL_PRINTF kprintf
#endif
+#define _CASSERT(x) \
+ switch (0) { case 0: case (x): ; }
+
+#define IF_DATA_REQUIRE_ALIGNED_64(f) \
+ _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
+
+#define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
+ _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
+
+#define IFNET_IF_TC_REQUIRE_ALIGNED_64(f) \
+ _CASSERT(!(offsetof(struct ifnet, if_tc.f) % sizeof (u_int64_t)))
+
enum {
- kProtoKPI_DLIL = 0,
- kProtoKPI_v1 = 1
+ kProtoKPI_v1 = 1,
+ kProtoKPI_v2 = 2
};
+/*
+ * List of if_proto structures in if_proto_hash[] is protected by
+ * the ifnet lock. The rest of the fields are initialized at protocol
+ * attach time and never change, thus no lock required as long as
+ * a reference to it is valid, via if_proto_ref().
+ */
struct if_proto {
- SLIST_ENTRY(if_proto) next_hash;
- int refcount;
- int detaching;
- struct ifnet *ifp;
- struct domain *dl_domain;
+ SLIST_ENTRY(if_proto) next_hash;
+ u_int32_t refcount;
+ u_int32_t detached;
+ struct ifnet *ifp;
protocol_family_t protocol_family;
- int proto_kpi;
+ int proto_kpi;
union {
- struct {
- dl_input_func dl_input;
- dl_pre_output_func dl_pre_output;
- dl_event_func dl_event;
- dl_offer_func dl_offer;
- dl_ioctl_func dl_ioctl;
- dl_detached_func dl_detached;
- } dlil;
struct {
- proto_media_input input;
- proto_media_preout pre_output;
- proto_media_event event;
- proto_media_ioctl ioctl;
+ proto_media_input input;
+ proto_media_preout pre_output;
+ proto_media_event event;
+ proto_media_ioctl ioctl;
proto_media_detached detached;
proto_media_resolve_multi resolve_multi;
proto_media_send_arp send_arp;
} v1;
+ struct {
+ proto_media_input_v2 input;
+ proto_media_preout pre_output;
+ proto_media_event event;
+ proto_media_ioctl ioctl;
+ proto_media_detached detached;
+ proto_media_resolve_multi resolve_multi;
+ proto_media_send_arp send_arp;
+ } v2;
} kpi;
};
SLIST_HEAD(proto_hash_entry, if_proto);
+#define DLIL_SDLMAXLEN 64
+#define DLIL_SDLDATALEN \
+ (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
struct dlil_ifnet {
- /* ifnet and drvr_ext are used by the stack and drivers
- drvr_ext extends the public ifnet and must follow dl_if */
- struct ifnet dl_if; /* public ifnet */
-
- /* dlil private fields */
- TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet are link together */
- /* it is not the ifnet list */
- void *if_uniqueid; /* unique id identifying the interface */
- size_t if_uniqueid_len;/* length of the unique id */
- char if_namestorage[IFNAMSIZ]; /* interface name storage */
+ struct ifnet dl_if; /* public ifnet */
+ /*
+ * dlil private fields, protected by dl_if_lock
+ */
+ decl_lck_mtx_data(, dl_if_lock);
+ TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */
+ u_int32_t dl_if_flags; /* flags (below) */
+ u_int32_t dl_if_refcnt; /* refcnt */
+ void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */
+ void *dl_if_uniqueid; /* unique interface id */
+ size_t dl_if_uniqueid_len; /* length of the unique id */
+ char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */
+ struct {
+ struct ifaddr ifa; /* lladdr ifa */
+ u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */
+ u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */
+ } dl_if_lladdr;
+ ctrace_t dl_if_attach; /* attach PC stacktrace */
+ ctrace_t dl_if_detach; /* detach PC stacktrace */
};
-struct ifnet_filter {
- TAILQ_ENTRY(ifnet_filter) filt_next;
- ifnet_t filt_ifp;
- int filt_detaching;
-
- const char *filt_name;
- void *filt_cookie;
- protocol_family_t filt_protocol;
- iff_input_func filt_input;
- iff_output_func filt_output;
- iff_event_func filt_event;
- iff_ioctl_func filt_ioctl;
- iff_detached_func filt_detached;
-};
+/* Values for dl_if_flags (private to DLIL) */
+#define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
+#define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
+#define DLIF_DEBUG 0x4 /* has debugging info */
-struct if_family_str {
- TAILQ_ENTRY(if_family_str) if_fam_next;
- u_long if_family;
- int refcnt;
- int flags;
-
-#define DLIL_SHUTDOWN 1
-
- int (*add_if)(struct ifnet *ifp);
- int (*del_if)(struct ifnet *ifp);
- int (*init_if)(struct ifnet *ifp);
- int (*add_proto)(struct ifnet *ifp, u_long protocol_family, struct ddesc_head_str *demux_desc_head);
- ifnet_del_proto_func del_proto;
- ifnet_ioctl_func ifmod_ioctl;
- int (*shutdown)(void);
-};
+#define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
-struct proto_family_str {
- TAILQ_ENTRY(proto_family_str) proto_fam_next;
- u_long proto_family;
- u_long if_family;
- int usecnt;
+/* For gdb */
+__private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
- int (*attach_proto)(struct ifnet *ifp, u_long protocol_family);
- int (*detach_proto)(struct ifnet *ifp, u_long protocol_family);
+struct dlil_ifnet_dbg {
+ struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */
+ u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */
+ u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */
+ /*
+ * Circular lists of ifnet_{reference,release} callers.
+ */
+ ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE];
+ ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE];
};
-enum {
- kIfNetUseCount_MayBeZero = 0,
- kIfNetUseCount_MustNotBeZero = 1
+#define DLIL_TO_IFP(s) (&s->dl_if)
+#define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
+
+struct ifnet_filter {
+ TAILQ_ENTRY(ifnet_filter) filt_next;
+ u_int32_t filt_skip;
+ ifnet_t filt_ifp;
+ const char *filt_name;
+ void *filt_cookie;
+ protocol_family_t filt_protocol;
+ iff_input_func filt_input;
+ iff_output_func filt_output;
+ iff_event_func filt_event;
+ iff_ioctl_func filt_ioctl;
+ iff_detached_func filt_detached;
};
+struct proto_input_entry;
+
static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head;
-static TAILQ_HEAD(, if_family_str) if_family_head;
-static TAILQ_HEAD(, proto_family_str) proto_family_head;
static lck_grp_t *dlil_lock_group;
-static lck_grp_t *ifnet_lock_group;
+lck_grp_t *ifnet_lock_group;
static lck_grp_t *ifnet_head_lock_group;
-static lck_attr_t *ifnet_lock_attr;
-static lck_mtx_t *proto_family_mutex;
-static lck_rw_t *ifnet_head_mutex;
-static lck_mtx_t *dlil_ifnet_mutex;
-static lck_mtx_t *dlil_mutex;
-static unsigned long dlil_read_count = 0;
-static unsigned long dlil_detach_waiting = 0;
+lck_attr_t *ifnet_lock_attr;
+decl_lck_rw_data(static, ifnet_head_lock);
+decl_lck_mtx_data(static, dlil_ifnet_lock);
+u_int32_t dlil_filter_count = 0;
extern u_int32_t ipv4_ll_arp_aware;
-int dlil_initialized = 0;
-lck_spin_t *dlil_input_lock;
-__private_extern__ thread_t dlil_input_thread_ptr = 0;
-int dlil_input_thread_wakeup = 0;
-__private_extern__ int dlil_output_thread_wakeup = 0;
-static struct mbuf *dlil_input_mbuf_head = NULL;
-static struct mbuf *dlil_input_mbuf_tail = NULL;
-#if NLOOP > 1
-#error dlil_input() needs to be revised to support more than on loopback interface
-#endif
-static struct mbuf *dlil_input_loop_head = NULL;
-static struct mbuf *dlil_input_loop_tail = NULL;
+#if DEBUG
+static unsigned int ifnet_debug = 1; /* debugging (enabled) */
+#else
+static unsigned int ifnet_debug; /* debugging (disabled) */
+#endif /* !DEBUG */
+static unsigned int dlif_size; /* size of dlil_ifnet to allocate */
+static unsigned int dlif_bufsize; /* size of dlif_size + headroom */
+static struct zone *dlif_zone; /* zone for dlil_ifnet */
-static void dlil_input_thread(void);
-static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg);
-struct ifnet *ifbyfamily(u_long family, short unit);
-static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
-static void dlil_call_delayed_detach_thread(void);
+#define DLIF_ZONE_MAX 64 /* maximum elements in zone */
+#define DLIF_ZONE_NAME "ifnet" /* zone name */
-static void dlil_read_begin(void);
-static void dlil_read_end(void);
-static int dlil_write_begin(void);
-static void dlil_write_end(void);
+static unsigned int dlif_filt_size; /* size of ifnet_filter */
+static struct zone *dlif_filt_zone; /* zone for ifnet_filter */
-static int ifp_use(struct ifnet *ifp, int handle_zero);
-static int ifp_unuse(struct ifnet *ifp);
-static void ifp_use_reached_zero(struct ifnet *ifp);
+#define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
+#define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
-extern void bpfdetach(struct ifnet*);
-extern void proto_input_run(void); // new run_netisr
+static unsigned int dlif_inp_size; /* size of dlil_threading_info */
+static struct zone *dlif_inp_zone; /* zone for dlil_threading_info */
+
+#define DLIF_INP_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
+#define DLIF_INP_ZONE_NAME "ifnet_thread" /* zone name */
+
+static unsigned int dlif_phash_size; /* size of ifnet proto hash table */
+static struct zone *dlif_phash_zone; /* zone for ifnet proto hash table */
+
+#define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
+#define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
+
+static unsigned int dlif_proto_size; /* size of if_proto */
+static struct zone *dlif_proto_zone; /* zone for if_proto */
+
+#define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
+#define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
+
+/*
+ * Updating this variable should be done by first acquiring the global
+ * radix node head (rnh_lock), in tandem with settting/clearing the
+ * PR_AGGDRAIN for routedomain.
+ */
+u_int32_t ifnet_aggressive_drainers;
+static u_int32_t net_rtref;
+
+static struct dlil_threading_info dlil_lo_thread;
+__private_extern__ struct dlil_threading_info *dlil_lo_thread_ptr = &dlil_lo_thread;
+
+static struct mbuf *dlil_lo_input_mbuf_head = NULL;
+static struct mbuf *dlil_lo_input_mbuf_tail = NULL;
+static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg);
+static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
+static void dlil_if_trace(struct dlil_ifnet *, int);
+static void if_proto_ref(struct if_proto *);
+static void if_proto_free(struct if_proto *);
+static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t);
+static int dlil_ifp_proto_count(struct ifnet *);
+static void if_flt_monitor_busy(struct ifnet *);
+static void if_flt_monitor_unbusy(struct ifnet *);
+static void if_flt_monitor_enter(struct ifnet *);
+static void if_flt_monitor_leave(struct ifnet *);
+static int dlil_interface_filters_input(struct ifnet *, struct mbuf **,
+ char **, protocol_family_t);
+static int dlil_interface_filters_output(struct ifnet *, struct mbuf **,
+ protocol_family_t);
+static struct ifaddr *dlil_alloc_lladdr(struct ifnet *,
+ const struct sockaddr_dl *);
+static int ifnet_lookup(struct ifnet *);
+static void if_purgeaddrs(struct ifnet *);
+
+static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
+ struct mbuf *, char *);
+static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
+ struct mbuf *);
+static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
+ mbuf_t *, const struct sockaddr *, void *, char *, char *);
+static void ifproto_media_event(struct ifnet *, protocol_family_t,
+ const struct kev_msg *);
+static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
+ unsigned long, void *);
+static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
+ struct sockaddr_dl *, size_t);
+static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
+ const struct sockaddr_dl *, const struct sockaddr *,
+ const struct sockaddr_dl *, const struct sockaddr *);
+
+static errno_t ifp_if_output(struct ifnet *, struct mbuf *);
+static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
+ protocol_family_t *);
+static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
+ const struct ifnet_demux_desc *, u_int32_t);
+static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
+static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
+static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
+ const struct sockaddr *, const char *, const char *);
+static errno_t ifp_if_ioctl(struct ifnet *, unsigned long, void *);
+static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
+static void ifp_if_free(struct ifnet *);
+static void ifp_if_event(struct ifnet *, const struct kev_msg *);
+
+static void dlil_input_thread_func(struct dlil_threading_info *inpthread);
+static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *);
+
+static void ifnet_delayed_thread_func(void);
+static void ifnet_detach_final(struct ifnet *);
+static void ifnet_detaching_enqueue(struct ifnet *);
+static struct ifnet *ifnet_detaching_dequeue(void);
+
+static void ifp_src_route_copyout(struct ifnet *, struct route *);
+static void ifp_src_route_copyin(struct ifnet *, struct route *);
+#if INET6
+static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
+static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
+#endif /* INET6 */
+
+/* The following are protected by dlil_ifnet_lock */
+static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
+static u_int32_t ifnet_detaching_cnt;
+static void *ifnet_delayed_run; /* wait channel for detaching thread */
-int dlil_input_packet(struct ifnet *ifp, struct mbuf *m, char *frame_header);
+extern void bpfdetach(struct ifnet*);
+extern void proto_input_run(void);
__private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
-int dlil_expand_mcl;
+#if DEBUG
+static int dlil_verbose = 1;
+#else
+static int dlil_verbose = 0;
+#endif /* DEBUG */
+static int dlil_multithreaded_input = 1;
+static int cur_dlil_input_threads = 0;
+#if IFNET_INPUT_SANITY_CHK
+static int dlil_lo_input_mbuf_count = 0;
+/* sanity checking of input packet lists received */
+static int dlil_input_sanity_check = 0;
+#endif
-extern u_int32_t inject_buckets;
+SYSCTL_DECL(_net_link_generic_system);
-static const u_int32_t dlil_writer_waiting = 0x80000000;
+SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose, CTLFLAG_RW,
+ &dlil_verbose, 0, "Log DLIL error messages");
-static __inline__ void*
-_cast_non_const(const void * ptr) {
- union {
- const void* cval;
- void* val;
- } ret;
-
- ret.cval = ptr;
- return (ret.val);
-}
+SYSCTL_INT(_net_link_generic_system, OID_AUTO, multi_threaded_input, CTLFLAG_RW,
+ &dlil_multithreaded_input , 0, "Uses multiple input thread for DLIL input");
-/* Should these be inline? */
-static void
-dlil_read_begin(void)
-{
- unsigned long new_value;
- unsigned long old_value;
- struct uthread *uth = get_bsdthread_info(current_thread());
-
- if (uth->dlil_incremented_read == dlil_writer_waiting)
- panic("dlil_read_begin - thread is already a writer");
-
- do {
-again:
- old_value = dlil_read_count;
-
- if ((old_value & dlil_writer_waiting) != 0 && uth->dlil_incremented_read == 0)
- {
- tsleep(&dlil_read_count, PRIBIO, "dlil_read_count", 1);
- goto again;
- }
-
- new_value = old_value + 1;
- } while (!OSCompareAndSwap((UInt32)old_value, (UInt32)new_value, (UInt32*)&dlil_read_count));
-
- uth->dlil_incremented_read++;
-}
+#if IFNET_INPUT_SANITY_CHK
+SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
+ CTLFLAG_RW, &dlil_input_sanity_check , 0,
+ "Turn on sanity checking in DLIL input");
+#endif
-static void
-dlil_read_end(void)
-{
- struct uthread *uth = get_bsdthread_info(current_thread());
-
- OSDecrementAtomic((UInt32*)&dlil_read_count);
- uth->dlil_incremented_read--;
- if (dlil_read_count == dlil_writer_waiting)
- wakeup(_cast_non_const(&dlil_writer_waiting));
-}
+unsigned int net_affinity = 1;
+static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
-static int
-dlil_write_begin(void)
-{
- struct uthread *uth = get_bsdthread_info(current_thread());
-
- if (uth->dlil_incremented_read != 0) {
- return EDEADLK;
- }
- lck_mtx_lock(dlil_mutex);
- OSBitOrAtomic((UInt32)dlil_writer_waiting, (UInt32*)&dlil_read_count);
-again:
- if (dlil_read_count == dlil_writer_waiting) {
- uth->dlil_incremented_read = dlil_writer_waiting;
- return 0;
- }
- else {
- tsleep(_cast_non_const(&dlil_writer_waiting), PRIBIO, "dlil_writer_waiting", 1);
- goto again;
- }
-}
+extern u_int32_t inject_buckets;
-static void
-dlil_write_end(void)
-{
- struct uthread *uth = get_bsdthread_info(current_thread());
-
- if (uth->dlil_incremented_read != dlil_writer_waiting)
- panic("dlil_write_end - thread is not a writer");
- OSBitAndAtomic((UInt32)~dlil_writer_waiting, (UInt32*)&dlil_read_count);
- lck_mtx_unlock(dlil_mutex);
- uth->dlil_incremented_read = 0;
- wakeup(&dlil_read_count);
-}
+static lck_grp_attr_t *dlil_grp_attributes = NULL;
+static lck_attr_t *dlil_lck_attributes = NULL;
+static lck_grp_t *dlil_input_lock_grp = NULL;
#define PROTO_HASH_SLOTS 0x5
*/
static int
-proto_hash_value(u_long protocol_family)
+proto_hash_value(u_int32_t protocol_family)
{
+ /*
+ * dlil_proto_unplumb_all() depends on the mapping between
+ * the hash bucket index and the protocol family defined
+ * here; future changes must be applied there as well.
+ */
switch(protocol_family) {
case PF_INET:
- return 0;
+ return (0);
case PF_INET6:
- return 1;
+ return (1);
case PF_APPLETALK:
- return 2;
+ return (2);
case PF_VLAN:
- return 3;
+ return (3);
+ case PF_UNSPEC:
default:
- return 4;
+ return (4);
}
}
-static
-struct if_family_str *find_family_module(u_long if_family)
+/*
+ * Caller must already be holding ifnet lock.
+ */
+static struct if_proto *
+find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
{
- struct if_family_str *mod = NULL;
-
- TAILQ_FOREACH(mod, &if_family_head, if_fam_next) {
- if (mod->if_family == (if_family & 0xffff))
- break;
- }
+ struct if_proto *proto = NULL;
+ u_int32_t i = proto_hash_value(protocol_family);
- return mod;
-}
+ ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
-static
-struct proto_family_str*
-find_proto_module(u_long proto_family, u_long if_family)
-{
- struct proto_family_str *mod = NULL;
+ if (ifp->if_proto_hash != NULL)
+ proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
- TAILQ_FOREACH(mod, &proto_family_head, proto_fam_next) {
- if ((mod->proto_family == (proto_family & 0xffff))
- && (mod->if_family == (if_family & 0xffff)))
- break;
- }
+ while (proto != NULL && proto->protocol_family != protocol_family)
+ proto = SLIST_NEXT(proto, next_hash);
- return mod;
-}
+ if (proto != NULL)
+ if_proto_ref(proto);
-static struct if_proto*
-find_attached_proto(struct ifnet *ifp, u_long protocol_family)
-{
- struct if_proto *proto = NULL;
- u_long i = proto_hash_value(protocol_family);
- if (ifp->if_proto_hash) {
- proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
- }
-
- while(proto && proto->protocol_family != protocol_family) {
- proto = SLIST_NEXT(proto, next_hash);
- }
-
- return proto;
+ return (proto);
}
static void
if_proto_ref(struct if_proto *proto)
{
- OSAddAtomic(1, (UInt32*)&proto->refcount);
+ atomic_add_32(&proto->refcount, 1);
}
+extern void if_rtproto_del(struct ifnet *ifp, int protocol);
+
static void
if_proto_free(struct if_proto *proto)
{
- int oldval = OSAddAtomic(-1, (UInt32*)&proto->refcount);
-
- if (oldval == 1) { /* This was the last reference */
- FREE(proto, M_IFADDR);
+ u_int32_t oldval;
+ struct ifnet *ifp = proto->ifp;
+ u_int32_t proto_family = proto->protocol_family;
+ struct kev_dl_proto_data ev_pr_data;
+
+ oldval = atomic_add_32_ov(&proto->refcount, -1);
+ if (oldval > 1)
+ return;
+
+ /* No more reference on this, protocol must have been detached */
+ VERIFY(proto->detached);
+
+ if (proto->proto_kpi == kProtoKPI_v1) {
+ if (proto->kpi.v1.detached)
+ proto->kpi.v1.detached(ifp, proto->protocol_family);
+ }
+ if (proto->proto_kpi == kProtoKPI_v2) {
+ if (proto->kpi.v2.detached)
+ proto->kpi.v2.detached(ifp, proto->protocol_family);
}
+
+ /*
+ * Cleanup routes that may still be in the routing table for that
+ * interface/protocol pair.
+ */
+ if_rtproto_del(ifp, proto_family);
+
+ /*
+ * The reserved field carries the number of protocol still attached
+ * (subject to change)
+ */
+ ifnet_lock_shared(ifp);
+ ev_pr_data.proto_family = proto_family;
+ ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp);
+ ifnet_lock_done(ifp);
+
+ dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
+ (struct net_event_data *)&ev_pr_data,
+ sizeof(struct kev_dl_proto_data));
+
+ zfree(dlif_proto_zone, proto);
}
__private_extern__ void
-ifnet_lock_assert(
- __unused struct ifnet *ifp,
- __unused int what)
+ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
{
-#if IFNET_RW_LOCK
- /*
- * Not implemented for rw locks.
- *
- * Function exists so when/if we use mutex we can
- * enable this check.
- */
-#else
- lck_mtx_assert(ifp->if_lock, what);
-#endif
+ unsigned int type = 0;
+ int ass = 1;
+
+ switch (what) {
+ case IFNET_LCK_ASSERT_EXCLUSIVE:
+ type = LCK_RW_ASSERT_EXCLUSIVE;
+ break;
+
+ case IFNET_LCK_ASSERT_SHARED:
+ type = LCK_RW_ASSERT_SHARED;
+ break;
+
+ case IFNET_LCK_ASSERT_OWNED:
+ type = LCK_RW_ASSERT_HELD;
+ break;
+
+ case IFNET_LCK_ASSERT_NOTOWNED:
+ /* nothing to do here for RW lock; bypass assert */
+ ass = 0;
+ break;
+
+ default:
+ panic("bad ifnet assert type: %d", what);
+ /* NOTREACHED */
+ }
+ if (ass)
+ lck_rw_assert(&ifp->if_lock, type);
}
__private_extern__ void
-ifnet_lock_shared(
- struct ifnet *ifp)
+ifnet_lock_shared(struct ifnet *ifp)
{
-#if IFNET_RW_LOCK
- lck_rw_lock_shared(ifp->if_lock);
-#else
- lck_mtx_assert(ifp->if_lock, LCK_MTX_ASSERT_NOTOWNED);
- lck_mtx_lock(ifp->if_lock);
-#endif
+ lck_rw_lock_shared(&ifp->if_lock);
}
__private_extern__ void
-ifnet_lock_exclusive(
- struct ifnet *ifp)
+ifnet_lock_exclusive(struct ifnet *ifp)
{
-#if IFNET_RW_LOCK
- lck_rw_lock_exclusive(ifp->if_lock);
-#else
- lck_mtx_assert(ifp->if_lock, LCK_MTX_ASSERT_NOTOWNED);
- lck_mtx_lock(ifp->if_lock);
-#endif
+ lck_rw_lock_exclusive(&ifp->if_lock);
}
__private_extern__ void
-ifnet_lock_done(
- struct ifnet *ifp)
+ifnet_lock_done(struct ifnet *ifp)
{
-#if IFNET_RW_LOCK
- lck_rw_done(ifp->if_lock);
-#else
- lck_mtx_assert(ifp->if_lock, LCK_MTX_ASSERT_OWNED);
- lck_mtx_unlock(ifp->if_lock);
-#endif
+ lck_rw_done(&ifp->if_lock);
}
__private_extern__ void
-ifnet_head_lock_shared()
+ifnet_head_lock_shared(void)
{
- lck_rw_lock_shared(ifnet_head_mutex);
+ lck_rw_lock_shared(&ifnet_head_lock);
}
__private_extern__ void
-ifnet_head_lock_exclusive()
+ifnet_head_lock_exclusive(void)
{
- lck_rw_lock_exclusive(ifnet_head_mutex);
+ lck_rw_lock_exclusive(&ifnet_head_lock);
}
__private_extern__ void
-ifnet_head_done()
+ifnet_head_done(void)
{
- lck_rw_done(ifnet_head_mutex);
+ lck_rw_done(&ifnet_head_lock);
}
/*
- * Public functions.
+ * Caller must already be holding ifnet lock.
*/
-struct ifnet *ifbyfamily(u_long family, short unit)
+static int
+dlil_ifp_proto_count(struct ifnet * ifp)
{
- struct ifnet *ifp;
+ int i, count = 0;
- ifnet_head_lock_shared();
- TAILQ_FOREACH(ifp, &ifnet_head, if_link)
- if ((family == ifp->if_family) && (ifp->if_unit == unit))
- break;
- ifnet_head_done();
+ ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
- return ifp;
-}
+ if (ifp->if_proto_hash == NULL)
+ goto done;
-static int dlil_ifp_proto_count(struct ifnet * ifp)
-{
- int count = 0;
- int i;
-
- if (ifp->if_proto_hash != NULL) {
- for (i = 0; i < PROTO_HASH_SLOTS; i++) {
- struct if_proto *proto;
- SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
- count++;
- }
+ for (i = 0; i < PROTO_HASH_SLOTS; i++) {
+ struct if_proto *proto;
+ SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
+ count++;
}
}
-
- return count;
+done:
+ return (count);
}
__private_extern__ void
-dlil_post_msg(struct ifnet *ifp, u_long event_subclass, u_long event_code,
- struct net_event_data *event_data, u_long event_data_len)
-{
- struct net_event_data ev_data;
- struct kev_msg ev_msg;
-
- /*
- * a net event always start with a net_event_data structure
+dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
+ u_int32_t event_code, struct net_event_data *event_data,
+ u_int32_t event_data_len)
+{
+ struct net_event_data ev_data;
+ struct kev_msg ev_msg;
+
+ bzero(&ev_msg, sizeof (ev_msg));
+ bzero(&ev_data, sizeof (ev_data));
+ /*
+ * a net event always starts with a net_event_data structure
* but the caller can generate a simple net event or
* provide a longer event structure to post
*/
-
- ev_msg.vendor_code = KEV_VENDOR_APPLE;
- ev_msg.kev_class = KEV_NETWORK_CLASS;
- ev_msg.kev_subclass = event_subclass;
- ev_msg.event_code = event_code;
-
- if (event_data == 0) {
+ ev_msg.vendor_code = KEV_VENDOR_APPLE;
+ ev_msg.kev_class = KEV_NETWORK_CLASS;
+ ev_msg.kev_subclass = event_subclass;
+ ev_msg.event_code = event_code;
+
+ if (event_data == NULL) {
event_data = &ev_data;
event_data_len = sizeof(struct net_event_data);
}
-
+
strncpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
event_data->if_family = ifp->if_family;
- event_data->if_unit = (unsigned long) ifp->if_unit;
-
+ event_data->if_unit = (u_int32_t) ifp->if_unit;
+
ev_msg.dv[0].data_length = event_data_len;
- ev_msg.dv[0].data_ptr = event_data;
+ ev_msg.dv[0].data_ptr = event_data;
ev_msg.dv[1].data_length = 0;
-
+
dlil_event_internal(ifp, &ev_msg);
}
-void dlil_init(void);
+static int
+dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inputthread)
+{
+ int error;
+
+ bzero(inputthread, sizeof(*inputthread));
+ /* loopback ifp may not be configured at dlil_init time. */
+ if (ifp == lo_ifp) {
+ (void) strlcat(inputthread->input_name,
+ "dlil_input_main_thread_mtx", DLIL_THREADNAME_LEN);
+ } else {
+ (void) snprintf(inputthread->input_name, DLIL_THREADNAME_LEN,
+ "dlil_input_%s%d_mtx", ifp->if_name, ifp->if_unit);
+ }
+
+ inputthread->lck_grp = lck_grp_alloc_init(inputthread->input_name,
+ dlil_grp_attributes);
+ lck_mtx_init(&inputthread->input_lck, inputthread->lck_grp,
+ dlil_lck_attributes);
+
+ error= kernel_thread_start((thread_continue_t)dlil_input_thread_func,
+ inputthread, &inputthread->input_thread);
+ if (error == 0) {
+ ml_thread_policy(inputthread->input_thread, MACHINE_GROUP,
+ (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_NETISR));
+ /*
+ * Except for the loopback dlil input thread, we create
+ * an affinity set so that the matching workloop thread
+ * can be scheduled on the same processor set.
+ */
+ if (net_affinity && inputthread != dlil_lo_thread_ptr) {
+ struct thread *tp = inputthread->input_thread;
+ u_int32_t tag;
+ /*
+ * Randomize to reduce the probability
+ * of affinity tag namespace collision.
+ */
+ read_random(&tag, sizeof (tag));
+ if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) {
+ thread_reference(tp);
+ inputthread->tag = tag;
+ inputthread->net_affinity = TRUE;
+ }
+ }
+ } else {
+ panic("%s: couldn't create thread", __func__);
+ /* NOTREACHED */
+ }
+ OSAddAtomic(1, &cur_dlil_input_threads);
+#if DLIL_DEBUG
+ printf("%s: threadinfo: %p input_thread=%p threads: cur=%d max=%d\n",
+ __func__, inputthread, inputthread->input_thread,
+ dlil_multithreaded_input, cur_dlil_input_threads);
+#endif
+ return (error);
+}
+
+static kern_return_t
+dlil_affinity_set(struct thread *tp, u_int32_t tag)
+{
+ thread_affinity_policy_data_t policy;
+
+ bzero(&policy, sizeof (policy));
+ policy.affinity_tag = tag;
+ return (thread_policy_set(tp, THREAD_AFFINITY_POLICY,
+ (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT));
+}
+
void
dlil_init(void)
{
- lck_grp_attr_t *grp_attributes = 0;
- lck_attr_t *lck_attributes = 0;
- lck_grp_t *input_lock_grp = 0;
-
+ thread_t thread = THREAD_NULL;
+
+ /*
+ * The following fields must be 64-bit aligned for atomic operations.
+ */
+ IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
+ IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors)
+ IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
+ IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
+ IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
+ IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
+ IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
+ IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
+ IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
+ IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
+ IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
+
+ IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
+ IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors)
+ IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
+ IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
+ IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
+ IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
+ IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
+ IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
+ IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
+ IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
+ IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
+
+ IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ibkpackets);
+ IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ibkbytes);
+ IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_obkpackets);
+ IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_obkbytes);
+ IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ivipackets);
+ IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ivibytes);
+ IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ovipackets);
+ IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ovibytes);
+ IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ivopackets);
+ IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ivobytes);
+ IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ovopackets);
+ IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ovobytes);
+
+ /*
+ * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
+ */
+ _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
+ _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
+ _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
+ _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
+ _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
+ _CASSERT(IF_HWASSIST_CSUM_TCP_SUM16 == IFNET_CSUM_SUM16);
+ _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
+ _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
+ _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
+ _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
+
+ /*
+ * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
+ */
+ _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
+
+ PE_parse_boot_argn("net_affinity", &net_affinity,
+ sizeof (net_affinity));
+
+ PE_parse_boot_argn("net_rtref", &net_rtref, sizeof (net_rtref));
+
+ PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof (ifnet_debug));
+
+ dlif_size = (ifnet_debug == 0) ? sizeof (struct dlil_ifnet) :
+ sizeof (struct dlil_ifnet_dbg);
+ /* Enforce 64-bit alignment for dlil_ifnet structure */
+ dlif_bufsize = dlif_size + sizeof (void *) + sizeof (u_int64_t);
+ dlif_bufsize = P2ROUNDUP(dlif_bufsize, sizeof (u_int64_t));
+ dlif_zone = zinit(dlif_bufsize, DLIF_ZONE_MAX * dlif_bufsize,
+ 0, DLIF_ZONE_NAME);
+ if (dlif_zone == NULL) {
+ panic("%s: failed allocating %s", __func__, DLIF_ZONE_NAME);
+ /* NOTREACHED */
+ }
+ zone_change(dlif_zone, Z_EXPAND, TRUE);
+ zone_change(dlif_zone, Z_CALLERACCT, FALSE);
+
+ dlif_filt_size = sizeof (struct ifnet_filter);
+ dlif_filt_zone = zinit(dlif_filt_size,
+ DLIF_FILT_ZONE_MAX * dlif_filt_size, 0, DLIF_FILT_ZONE_NAME);
+ if (dlif_filt_zone == NULL) {
+ panic("%s: failed allocating %s", __func__,
+ DLIF_FILT_ZONE_NAME);
+ /* NOTREACHED */
+ }
+ zone_change(dlif_filt_zone, Z_EXPAND, TRUE);
+ zone_change(dlif_filt_zone, Z_CALLERACCT, FALSE);
+
+ dlif_inp_size = sizeof (struct dlil_threading_info);
+ dlif_inp_zone = zinit(dlif_inp_size,
+ DLIF_INP_ZONE_MAX * dlif_inp_size, 0, DLIF_INP_ZONE_NAME);
+ if (dlif_inp_zone == NULL) {
+ panic("%s: failed allocating %s", __func__, DLIF_INP_ZONE_NAME);
+ /* NOTREACHED */
+ }
+ zone_change(dlif_inp_zone, Z_EXPAND, TRUE);
+ zone_change(dlif_inp_zone, Z_CALLERACCT, FALSE);
+
+ dlif_phash_size = sizeof (struct proto_hash_entry) * PROTO_HASH_SLOTS;
+ dlif_phash_zone = zinit(dlif_phash_size,
+ DLIF_PHASH_ZONE_MAX * dlif_phash_size, 0, DLIF_PHASH_ZONE_NAME);
+ if (dlif_phash_zone == NULL) {
+ panic("%s: failed allocating %s", __func__,
+ DLIF_PHASH_ZONE_NAME);
+ /* NOTREACHED */
+ }
+ zone_change(dlif_phash_zone, Z_EXPAND, TRUE);
+ zone_change(dlif_phash_zone, Z_CALLERACCT, FALSE);
+
+ dlif_proto_size = sizeof (struct if_proto);
+ dlif_proto_zone = zinit(dlif_proto_size,
+ DLIF_PROTO_ZONE_MAX * dlif_proto_size, 0, DLIF_PROTO_ZONE_NAME);
+ if (dlif_proto_zone == NULL) {
+ panic("%s: failed allocating %s", __func__,
+ DLIF_PROTO_ZONE_NAME);
+ /* NOTREACHED */
+ }
+ zone_change(dlif_proto_zone, Z_EXPAND, TRUE);
+ zone_change(dlif_proto_zone, Z_CALLERACCT, FALSE);
+
+ ifnet_llreach_init();
+
TAILQ_INIT(&dlil_ifnet_head);
- TAILQ_INIT(&if_family_head);
- TAILQ_INIT(&proto_family_head);
TAILQ_INIT(&ifnet_head);
-
+ TAILQ_INIT(&ifnet_detaching_head);
+
/* Setup the lock groups we will use */
- grp_attributes = lck_grp_attr_alloc_init();
+ dlil_grp_attributes = lck_grp_attr_alloc_init();
+
+ dlil_lock_group = lck_grp_alloc_init("dlil internal locks",
+ dlil_grp_attributes);
+ ifnet_lock_group = lck_grp_alloc_init("ifnet locks",
+ dlil_grp_attributes);
+ ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock",
+ dlil_grp_attributes);
+ dlil_input_lock_grp = lck_grp_alloc_init("dlil input lock",
+ dlil_grp_attributes);
- dlil_lock_group = lck_grp_alloc_init("dlil internal locks", grp_attributes);
-#if IFNET_RW_LOCK
- ifnet_lock_group = lck_grp_alloc_init("ifnet locks", grp_attributes);
-#else
- ifnet_lock_group = lck_grp_alloc_init("ifnet locks", grp_attributes);
-#endif
- ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock", grp_attributes);
- input_lock_grp = lck_grp_alloc_init("dlil input lock", grp_attributes);
- lck_grp_attr_free(grp_attributes);
- grp_attributes = 0;
-
/* Setup the lock attributes we will use */
- lck_attributes = lck_attr_alloc_init();
-
+ dlil_lck_attributes = lck_attr_alloc_init();
+
ifnet_lock_attr = lck_attr_alloc_init();
-
- dlil_input_lock = lck_spin_alloc_init(input_lock_grp, lck_attributes);
- input_lock_grp = 0;
-
- ifnet_head_mutex = lck_rw_alloc_init(ifnet_head_lock_group, lck_attributes);
- proto_family_mutex = lck_mtx_alloc_init(dlil_lock_group, lck_attributes);
- dlil_ifnet_mutex = lck_mtx_alloc_init(dlil_lock_group, lck_attributes);
- dlil_mutex = lck_mtx_alloc_init(dlil_lock_group, lck_attributes);
-
- lck_attr_free(lck_attributes);
- lck_attributes = 0;
-
+
+ lck_rw_init(&ifnet_head_lock, ifnet_head_lock_group,
+ dlil_lck_attributes);
+ lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes);
+
+ lck_attr_free(dlil_lck_attributes);
+ dlil_lck_attributes = NULL;
+
+ ifa_init();
+
/*
- * Start up the dlil input thread once everything is initialized
+ * Create and start up the first dlil input thread once everything
+ * is initialized.
*/
- (void) kernel_thread(kernel_task, dlil_input_thread);
- (void) kernel_thread(kernel_task, dlil_call_delayed_detach_thread);
+ dlil_create_input_thread(lo_ifp, dlil_lo_thread_ptr);
+
+ if (kernel_thread_start((thread_continue_t)ifnet_delayed_thread_func,
+ NULL, &thread) != 0) {
+ panic("%s: couldn't create detach thread", __func__);
+ /* NOTREACHED */
+ }
+ thread_deallocate(thread);
+
+#if PF
+ /* Initialize the packet filter */
+ pfinit();
+#endif /* PF */
}
-int
-dlil_attach_filter(
- struct ifnet *ifp,
- const struct iff_filter *if_filter,
- interface_filter_t *filter_ref)
-{
- int retval = 0;
- struct ifnet_filter *filter;
-
- MALLOC(filter, struct ifnet_filter *, sizeof(*filter), M_NKE, M_WAITOK);
- if (filter == NULL)
- return ENOMEM;
- bzero(filter, sizeof(*filter));
+static void
+if_flt_monitor_busy(struct ifnet *ifp)
+{
+ lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
+
+ ++ifp->if_flt_busy;
+ VERIFY(ifp->if_flt_busy != 0);
+}
+
+static void
+if_flt_monitor_unbusy(struct ifnet *ifp)
+{
+ if_flt_monitor_leave(ifp);
+}
+
+static void
+if_flt_monitor_enter(struct ifnet *ifp)
+{
+ lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
+
+ while (ifp->if_flt_busy) {
+ ++ifp->if_flt_waiters;
+ (void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
+ (PZERO - 1), "if_flt_monitor", NULL);
+ }
+ if_flt_monitor_busy(ifp);
+}
+
+static void
+if_flt_monitor_leave(struct ifnet *ifp)
+{
+ lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
+
+ VERIFY(ifp->if_flt_busy != 0);
+ --ifp->if_flt_busy;
+
+ if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
+ ifp->if_flt_waiters = 0;
+ wakeup(&ifp->if_flt_head);
+ }
+}
+
+__private_extern__ int
+dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
+ interface_filter_t *filter_ref)
+{
+ int retval = 0;
+ struct ifnet_filter *filter = NULL;
+
+ ifnet_head_lock_shared();
+ /* Check that the interface is in the global list */
+ if (!ifnet_lookup(ifp)) {
+ retval = ENXIO;
+ goto done;
+ }
+
+ filter = zalloc(dlif_filt_zone);
+ if (filter == NULL) {
+ retval = ENOMEM;
+ goto done;
+ }
+ bzero(filter, dlif_filt_size);
-
+ /* refcnt held above during lookup */
filter->filt_ifp = ifp;
filter->filt_cookie = if_filter->iff_cookie;
filter->filt_name = if_filter->iff_name;
filter->filt_event = if_filter->iff_event;
filter->filt_ioctl = if_filter->iff_ioctl;
filter->filt_detached = if_filter->iff_detached;
-
- if ((retval = dlil_write_begin()) != 0) {
- /* Failed to acquire the write lock */
- FREE(filter, M_NKE);
- return retval;
- }
+
+ lck_mtx_lock(&ifp->if_flt_lock);
+ if_flt_monitor_enter(ifp);
+
+ lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
- dlil_write_end();
+
+ if_flt_monitor_leave(ifp);
+ lck_mtx_unlock(&ifp->if_flt_lock);
+
*filter_ref = filter;
- return retval;
+
+ /*
+ * Bump filter count and route_generation ID to let TCP
+ * know it shouldn't do TSO on this connection
+ */
+ OSAddAtomic(1, &dlil_filter_count);
+ if (use_routegenid)
+ routegenid_update();
+
+ if (dlil_verbose) {
+ printf("%s%d: %s filter attached\n", ifp->if_name,
+ ifp->if_unit, if_filter->iff_name);
+ }
+done:
+ ifnet_head_done();
+ if (retval != 0 && ifp != NULL) {
+ DLIL_PRINTF("%s%d: failed to attach %s (err=%d)\n",
+ ifp->if_name, ifp->if_unit, if_filter->iff_name, retval);
+ }
+ if (retval != 0 && filter != NULL)
+ zfree(dlif_filt_zone, filter);
+
+ return (retval);
}
static int
-dlil_detach_filter_internal(interface_filter_t filter, int detached)
+dlil_detach_filter_internal(interface_filter_t filter, int detached)
{
int retval = 0;
-
+
if (detached == 0) {
- ifnet_t ifp = NULL;
- interface_filter_t entry = NULL;
-
- /* Take the write lock */
- retval = dlil_write_begin();
- if (retval != 0 && retval != EDEADLK)
- return retval;
-
- /*
- * At this point either we have the write lock (retval == 0)
- * or we couldn't get it (retval == EDEADLK) because someone
- * else up the stack is holding the read lock. It is safe to
- * read, either the read or write is held. Verify the filter
- * parameter before proceeding.
- */
+ ifnet_t ifp = NULL;
+
ifnet_head_lock_shared();
TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
+ interface_filter_t entry = NULL;
+
+ lck_mtx_lock(&ifp->if_flt_lock);
TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
- if (entry == filter)
- break;
+ if (entry != filter || entry->filt_skip)
+ continue;
+ /*
+ * We've found a match; since it's possible
+ * that the thread gets blocked in the monitor,
+ * we do the lock dance. Interface should
+ * not be detached since we still have a use
+ * count held during filter attach.
+ */
+ entry->filt_skip = 1; /* skip input/output */
+ lck_mtx_unlock(&ifp->if_flt_lock);
+ ifnet_head_done();
+
+ lck_mtx_lock(&ifp->if_flt_lock);
+ if_flt_monitor_enter(ifp);
+ lck_mtx_assert(&ifp->if_flt_lock,
+ LCK_MTX_ASSERT_OWNED);
+
+ /* Remove the filter from the list */
+ TAILQ_REMOVE(&ifp->if_flt_head, filter,
+ filt_next);
+
+ if_flt_monitor_leave(ifp);
+ lck_mtx_unlock(&ifp->if_flt_lock);
+ if (dlil_verbose) {
+ printf("%s%d: %s filter detached\n",
+ ifp->if_name, ifp->if_unit,
+ filter->filt_name);
+ }
+ goto destroy;
}
- if (entry == filter)
- break;
+ lck_mtx_unlock(&ifp->if_flt_lock);
}
ifnet_head_done();
-
- if (entry != filter) {
- /* filter parameter is not a valid filter ref */
- if (retval == 0) {
- dlil_write_end();
- }
- return EINVAL;
- }
-
- if (retval == EDEADLK) {
- /* Perform a delayed detach */
- filter->filt_detaching = 1;
- dlil_detach_waiting = 1;
- wakeup(&dlil_detach_waiting);
- return 0;
- }
-
- /* Remove the filter from the list */
- TAILQ_REMOVE(&ifp->if_flt_head, filter, filt_next);
- dlil_write_end();
+
+ /* filter parameter is not a valid filter ref */
+ retval = EINVAL;
+ goto done;
}
-
- /* Call the detached funciton if there is one */
+
+ if (dlil_verbose)
+ printf("%s filter detached\n", filter->filt_name);
+
+destroy:
+
+ /* Call the detached function if there is one */
if (filter->filt_detached)
filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
/* Free the filter */
- FREE(filter, M_NKE);
-
- return retval;
+ zfree(dlif_filt_zone, filter);
+
+ /*
+ * Decrease filter count and route_generation ID to let TCP
+ * know it should reevalute doing TSO or not
+ */
+ OSAddAtomic(-1, &dlil_filter_count);
+ if (use_routegenid)
+ routegenid_update();
+
+done:
+ if (retval != 0) {
+ DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
+ filter->filt_name, retval);
+ }
+ return (retval);
}
-void
+__private_extern__ void
dlil_detach_filter(interface_filter_t filter)
{
if (filter == NULL)
}
static void
-dlil_input_thread_continue(
- __unused void* foo,
- __unused wait_result_t wait)
+dlil_input_thread_func(struct dlil_threading_info *inputthread)
{
while (1) {
- struct mbuf *m, *m_loop;
-
- lck_spin_lock(dlil_input_lock);
- m = dlil_input_mbuf_head;
- dlil_input_mbuf_head = NULL;
- dlil_input_mbuf_tail = NULL;
- m_loop = dlil_input_loop_head;
- dlil_input_loop_head = NULL;
- dlil_input_loop_tail = NULL;
- lck_spin_unlock(dlil_input_lock);
-
+ struct mbuf *m = NULL, *m_loop = NULL;
+#if IFNET_INPUT_SANITY_CHK
+ int loop_cnt = 0, mbuf_cnt;
+ int count;
+ struct mbuf *m1;
+#endif /* IFNET_INPUT_SANITY_CHK */
+
+ lck_mtx_lock_spin(&inputthread->input_lck);
+
+ /* Wait until there is work to be done */
+ while (!(inputthread->input_waiting & ~DLIL_INPUT_RUNNING)) {
+ inputthread->input_waiting &= ~DLIL_INPUT_RUNNING;
+ msleep(&inputthread->input_waiting,
+ &inputthread->input_lck, 0,
+ inputthread->input_name, 0);
+ }
+
+ lck_mtx_assert(&inputthread->input_lck, LCK_MTX_ASSERT_OWNED);
+
+ m = inputthread->mbuf_head;
+ inputthread->mbuf_head = NULL;
+ inputthread->mbuf_tail = NULL;
+
+ if (inputthread->input_waiting & DLIL_INPUT_TERMINATE) {
+ lck_mtx_unlock(&inputthread->input_lck);
+
+ if (m != NULL)
+ mbuf_freem_list(m);
+
+ OSAddAtomic(-1, &cur_dlil_input_threads);
+
+ lck_mtx_destroy(&inputthread->input_lck,
+ inputthread->lck_grp);
+ lck_grp_free(inputthread->lck_grp);
+
+ zfree(dlif_inp_zone, inputthread);
+
+ /* for the extra refcnt from kernel_thread_start() */
+ thread_deallocate(current_thread());
+
+ /* this is the end */
+ thread_terminate(current_thread());
+ /* NOTREACHED */
+ return;
+ }
+
+ inputthread->input_waiting |= DLIL_INPUT_RUNNING;
+ inputthread->input_waiting &= ~DLIL_INPUT_WAITING;
+
+ if (inputthread == dlil_lo_thread_ptr) {
+ m_loop = dlil_lo_input_mbuf_head;
+ dlil_lo_input_mbuf_head = NULL;
+ dlil_lo_input_mbuf_tail = NULL;
+ }
+
+#if IFNET_INPUT_SANITY_CHK
+ if (dlil_input_sanity_check != 0) {
+ mbuf_cnt = inputthread->mbuf_count;
+ inputthread->mbuf_count = 0;
+ if (inputthread == dlil_lo_thread_ptr) {
+ loop_cnt = dlil_lo_input_mbuf_count;
+ dlil_lo_input_mbuf_count = 0;
+ }
+
+ lck_mtx_unlock(&inputthread->input_lck);
+
+ for (m1 = m, count = 0; m1; m1 = mbuf_nextpkt(m1)) {
+ count++;
+ }
+ if (count != mbuf_cnt) {
+ panic("%s - thread=%p reg. loop queue "
+ "has %d packets, should have %d\n",
+ __func__, inputthread, count, mbuf_cnt);
+ /* NOTREACHED */
+ }
+
+ if (inputthread == dlil_lo_thread_ptr) {
+ for (m1 = m_loop, count = 0; m1;
+ m1 = mbuf_nextpkt(m1)) {
+ count++;
+ }
+ if (count != loop_cnt) {
+ panic("%s - thread=%p loop queue "
+ "has %d packets, should have %d\n",
+ __func__, inputthread, count,
+ loop_cnt);
+ /* NOTREACHED */
+ }
+ }
+ } else
+#endif /* IFNET_INPUT_SANITY_CHK */
+ {
+ lck_mtx_unlock(&inputthread->input_lck);
+ }
+
+
/*
* NOTE warning %%% attention !!!!
- * We should think about putting some thread starvation safeguards if
- * we deal with long chains of packets.
+ * We should think about putting some thread starvation
+ * safeguards if we deal with long chains of packets.
*/
- while (m) {
- struct mbuf *m0 = m->m_nextpkt;
- void *header = m->m_pkthdr.header;
-
- m->m_nextpkt = NULL;
- m->m_pkthdr.header = NULL;
- (void) dlil_input_packet(m->m_pkthdr.rcvif, m, header);
- m = m0;
- }
- m = m_loop;
- while (m) {
- struct mbuf *m0 = m->m_nextpkt;
- void *header = m->m_pkthdr.header;
- struct ifnet *ifp = &loif[0];
-
- m->m_nextpkt = NULL;
- m->m_pkthdr.header = NULL;
- (void) dlil_input_packet(ifp, m, header);
- m = m0;
+ if (m_loop) {
+ if (inputthread == dlil_lo_thread_ptr) {
+ dlil_input_packet_list(lo_ifp, m_loop);
+ }
+#if IFNET_INPUT_SANITY_CHK
+ else {
+ panic("%s - thread=%p loop queue has %d "
+ "packets, should have none!\n", __func__,
+ inputthread, loop_cnt);
+ /* NOTREACHED */
+ }
+#endif /* IFNET_INPUT_SANITY_CHK */
}
-
- proto_input_run();
-
- if (dlil_input_mbuf_head == NULL &&
- dlil_input_loop_head == NULL && inject_buckets == 0) {
- assert_wait(&dlil_input_thread_wakeup, THREAD_UNINT);
- (void) thread_block(dlil_input_thread_continue);
- /* NOTREACHED */
+
+ if (m != NULL)
+ dlil_input_packet_list(0, m);
+
+ lck_mtx_lock_spin(&inputthread->input_lck);
+
+ if (inputthread->input_waiting &
+ (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)) {
+ lck_mtx_unlock(&inputthread->input_lck);
+ proto_input_run();
+ } else {
+ lck_mtx_unlock(&inputthread->input_lck);
}
}
}
-void dlil_input_thread(void)
+errno_t
+ifnet_input(ifnet_t ifp, mbuf_t m_head,
+ const struct ifnet_stat_increment_param *stats)
{
- register thread_t self = current_thread();
-
- ml_thread_policy(self, MACHINE_GROUP,
- (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_NETISR));
-
- dlil_initialized = 1;
- dlil_input_thread_ptr = current_thread();
- dlil_input_thread_continue(NULL, THREAD_RESTART);
-}
+ struct thread *tp = current_thread();
+ mbuf_t m_tail;
+ struct dlil_threading_info *inp;
+#if IFNET_INPUT_SANITY_CHK
+ u_int32_t pkt_count = 0;
+#endif /* IFNET_INPUT_SANITY_CHK */
-int
-dlil_input_with_stats(
- struct ifnet *ifp,
- struct mbuf *m_head,
- struct mbuf *m_tail,
- const struct ifnet_stat_increment_param *stats)
-{
- /* WARNING
+ if (ifp == NULL || m_head == NULL) {
+ if (m_head != NULL)
+ mbuf_freem_list(m_head);
+ return (EINVAL);
+ }
+
+ m_tail = m_head;
+ while (1) {
+#if IFNET_INPUT_SANITY_CHK
+ if (dlil_input_sanity_check != 0) {
+ ifnet_t rcvif;
+
+ rcvif = mbuf_pkthdr_rcvif(m_tail);
+ pkt_count++;
+
+ if (rcvif == NULL ||
+ (ifp->if_type != IFT_LOOP && rcvif != ifp) ||
+ !(mbuf_flags(m_head) & MBUF_PKTHDR)) {
+ panic("%s - invalid mbuf %p\n",
+ __func__, m_tail);
+ /* NOTREACHED */
+ }
+ }
+#endif /* IFNET_INPUT_SANITY_CHK */
+ if (mbuf_nextpkt(m_tail) == NULL)
+ break;
+ m_tail = mbuf_nextpkt(m_tail);
+ }
+
+ inp = ifp->if_input_thread;
+
+ if (dlil_multithreaded_input == 0 || inp == NULL)
+ inp = dlil_lo_thread_ptr;
+
+ /*
+ * If there is a matching dlil input thread associated with an
+ * affinity set, associate this workloop thread with the same set.
+ * We will only do this once.
+ */
+ lck_mtx_lock_spin(&inp->input_lck);
+ if (inp->net_affinity && inp->workloop_thread == NULL) {
+ u_int32_t tag = inp->tag;
+ inp->workloop_thread = tp;
+ lck_mtx_unlock(&inp->input_lck);
+
+ /* Associated the current thread with the new affinity tag */
+ (void) dlil_affinity_set(tp, tag);
+
+ /*
+ * Take a reference on the workloop (current) thread; during
+ * detach, we will need to refer to it in order ot tear down
+ * its affinity.
+ */
+ thread_reference(tp);
+ lck_mtx_lock_spin(&inp->input_lck);
+ }
+
+ /* WARNING
* Because of loopbacked multicast we cannot stuff the ifp in
* the rcvif of the packet header: loopback has its own dlil
* input queue
*/
-
- lck_spin_lock(dlil_input_lock);
- if (ifp->if_type != IFT_LOOP) {
- if (dlil_input_mbuf_head == NULL)
- dlil_input_mbuf_head = m_head;
- else if (dlil_input_mbuf_tail != NULL)
- dlil_input_mbuf_tail->m_nextpkt = m_head;
- dlil_input_mbuf_tail = m_tail ? m_tail : m_head;
+
+ if (inp == dlil_lo_thread_ptr && ifp->if_type == IFT_LOOP) {
+ if (dlil_lo_input_mbuf_head == NULL)
+ dlil_lo_input_mbuf_head = m_head;
+ else if (dlil_lo_input_mbuf_tail != NULL)
+ dlil_lo_input_mbuf_tail->m_nextpkt = m_head;
+ dlil_lo_input_mbuf_tail = m_tail;
+#if IFNET_INPUT_SANITY_CHK
+ if (dlil_input_sanity_check != 0) {
+ dlil_lo_input_mbuf_count += pkt_count;
+ inp->input_mbuf_cnt += pkt_count;
+ inp->input_wake_cnt++;
+
+ lck_mtx_assert(&inp->input_lck, LCK_MTX_ASSERT_OWNED);
+ }
+#endif
} else {
- if (dlil_input_loop_head == NULL)
- dlil_input_loop_head = m_head;
- else if (dlil_input_loop_tail != NULL)
- dlil_input_loop_tail->m_nextpkt = m_head;
- dlil_input_loop_tail = m_tail ? m_tail : m_head;
+ if (inp->mbuf_head == NULL)
+ inp->mbuf_head = m_head;
+ else if (inp->mbuf_tail != NULL)
+ inp->mbuf_tail->m_nextpkt = m_head;
+ inp->mbuf_tail = m_tail;
+#if IFNET_INPUT_SANITY_CHK
+ if (dlil_input_sanity_check != 0) {
+ inp->mbuf_count += pkt_count;
+ inp->input_mbuf_cnt += pkt_count;
+ inp->input_wake_cnt++;
+
+ lck_mtx_assert(&inp->input_lck, LCK_MTX_ASSERT_OWNED);
+ }
+#endif
+ }
+
+ inp->input_waiting |= DLIL_INPUT_WAITING;
+ if ((inp->input_waiting & DLIL_INPUT_RUNNING) == 0) {
+ wakeup((caddr_t)&inp->input_waiting);
}
+ lck_mtx_unlock(&inp->input_lck);
+
if (stats) {
- ifp->if_data.ifi_ipackets += stats->packets_in;
- ifp->if_data.ifi_ibytes += stats->bytes_in;
- ifp->if_data.ifi_ierrors += stats->errors_in;
-
- ifp->if_data.ifi_opackets += stats->packets_out;
- ifp->if_data.ifi_obytes += stats->bytes_out;
- ifp->if_data.ifi_oerrors += stats->errors_out;
-
- ifp->if_data.ifi_collisions += stats->collisions;
- ifp->if_data.ifi_iqdrops += stats->dropped;
- }
- lck_spin_unlock(dlil_input_lock);
-
- wakeup((caddr_t)&dlil_input_thread_wakeup);
-
- return 0;
-}
+ atomic_add_64(&ifp->if_data.ifi_ipackets, stats->packets_in);
+ atomic_add_64(&ifp->if_data.ifi_ibytes, stats->bytes_in);
+ atomic_add_64(&ifp->if_data.ifi_ierrors, stats->errors_in);
-int
-dlil_input(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail)
-{
- return dlil_input_with_stats(ifp, m_head, m_tail, NULL);
-}
+ atomic_add_64(&ifp->if_data.ifi_opackets, stats->packets_out);
+ atomic_add_64(&ifp->if_data.ifi_obytes, stats->bytes_out);
+ atomic_add_64(&ifp->if_data.ifi_oerrors, stats->errors_out);
-int
-dlil_input_packet(struct ifnet *ifp, struct mbuf *m,
- char *frame_header)
-{
- int retval;
- struct if_proto *ifproto = 0;
- protocol_family_t protocol_family;
- struct ifnet_filter *filter;
+ atomic_add_64(&ifp->if_data.ifi_collisions, stats->collisions);
+ atomic_add_64(&ifp->if_data.ifi_iqdrops, stats->dropped);
+ }
+ return (0);
+}
- KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START,0,0,0,0,0);
+static int
+dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
+ char **frame_header_p, protocol_family_t protocol_family)
+{
+ struct ifnet_filter *filter;
/*
- * Lock the interface while we run through
- * the filters and the demux. This lock
- * protects the filter list and the demux list.
+ * Pass the inbound packet to the interface filters
*/
- dlil_read_begin();
+ lck_mtx_lock_spin(&ifp->if_flt_lock);
+ /* prevent filter list from changing in case we drop the lock */
+ if_flt_monitor_busy(ifp);
+ TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
+ int result;
+
+ if (!filter->filt_skip && filter->filt_input != NULL &&
+ (filter->filt_protocol == 0 ||
+ filter->filt_protocol == protocol_family)) {
+ lck_mtx_unlock(&ifp->if_flt_lock);
+
+ result = (*filter->filt_input)(filter->filt_cookie,
+ ifp, protocol_family, m_p, frame_header_p);
+
+ lck_mtx_lock_spin(&ifp->if_flt_lock);
+ if (result != 0) {
+ /* we're done with the filter list */
+ if_flt_monitor_unbusy(ifp);
+ lck_mtx_unlock(&ifp->if_flt_lock);
+ return (result);
+ }
+ }
+ }
+ /* we're done with the filter list */
+ if_flt_monitor_unbusy(ifp);
+ lck_mtx_unlock(&ifp->if_flt_lock);
/*
- * Call family demux module. If the demux module finds a match
- * for the frame it will fill-in the ifproto pointer.
+ * Strip away M_PROTO1 bit prior to sending packet up the stack as
+ * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
*/
+ if (*m_p != NULL)
+ (*m_p)->m_flags &= ~M_PROTO1;
- retval = ifp->if_demux(ifp, m, frame_header, &protocol_family);
- if (retval != 0)
- protocol_family = 0;
- if (retval == EJUSTRETURN) {
- dlil_read_end();
- return 0;
- }
+ return (0);
+}
- /* DANGER!!! */
- if (m->m_flags & (M_BCAST|M_MCAST))
- ifp->if_imcasts++;
+static int
+dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
+ protocol_family_t protocol_family)
+{
+ struct ifnet_filter *filter;
/*
- * Run interface filters
+ * Pass the outbound packet to the interface filters
*/
-
- /* Do not pass VLAN tagged packets to filters PR-3586856 */
- if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
- TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
- int filter_result;
- if (filter->filt_input && (filter->filt_protocol == 0 ||
- filter->filt_protocol == protocol_family)) {
- filter_result = filter->filt_input(filter->filt_cookie, ifp, protocol_family, &m, &frame_header);
-
- if (filter_result) {
- dlil_read_end();
- if (filter_result == EJUSTRETURN) {
- filter_result = 0;
- }
- else {
- m_freem(m);
- }
-
- return filter_result;
- }
+ lck_mtx_lock_spin(&ifp->if_flt_lock);
+ /* prevent filter list from changing in case we drop the lock */
+ if_flt_monitor_busy(ifp);
+ TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
+ int result;
+
+ if (!filter->filt_skip && filter->filt_output != NULL &&
+ (filter->filt_protocol == 0 ||
+ filter->filt_protocol == protocol_family)) {
+ lck_mtx_unlock(&ifp->if_flt_lock);
+
+ result = filter->filt_output(filter->filt_cookie, ifp,
+ protocol_family, m_p);
+
+ lck_mtx_lock_spin(&ifp->if_flt_lock);
+ if (result != 0) {
+ /* we're done with the filter list */
+ if_flt_monitor_unbusy(ifp);
+ lck_mtx_unlock(&ifp->if_flt_lock);
+ return (result);
}
}
}
+ /* we're done with the filter list */
+ if_flt_monitor_unbusy(ifp);
+ lck_mtx_unlock(&ifp->if_flt_lock);
- /* Demux is done, interface filters have been processed, unlock the mutex */
- if (retval || ((m->m_flags & M_PROMISC) != 0) ) {
- dlil_read_end();
- if (retval != EJUSTRETURN) {
- m_freem(m);
- return retval;
+ return (0);
+}
+
+static void
+dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
+{
+ int error;
+
+ if (ifproto->proto_kpi == kProtoKPI_v1) {
+ /* Version 1 protocols get one packet at a time */
+ while (m != NULL) {
+ char * frame_header;
+ mbuf_t next_packet;
+
+ next_packet = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+ frame_header = m->m_pkthdr.header;
+ m->m_pkthdr.header = NULL;
+ error = (*ifproto->kpi.v1.input)(ifproto->ifp,
+ ifproto->protocol_family, m, frame_header);
+ if (error != 0 && error != EJUSTRETURN)
+ m_freem(m);
+ m = next_packet;
}
- else
- return 0;
- }
-
- ifproto = find_attached_proto(ifp, protocol_family);
-
- if (ifproto == 0) {
- dlil_read_end();
- DLIL_PRINTF("ERROR - dlil_input - if_demux didn't return an if_proto pointer\n");
- m_freem(m);
- return 0;
+ } else if (ifproto->proto_kpi == kProtoKPI_v2) {
+ /* Version 2 protocols support packet lists */
+ error = (*ifproto->kpi.v2.input)(ifproto->ifp,
+ ifproto->protocol_family, m);
+ if (error != 0 && error != EJUSTRETURN)
+ m_freem_list(m);
}
-
- /*
- * Hand the packet off to the protocol.
- */
+ return;
+}
- if (ifproto->dl_domain && (ifproto->dl_domain->dom_flags & DOM_REENTRANT) == 0) {
- lck_mtx_lock(ifproto->dl_domain->dom_mtx);
- }
+__private_extern__ void
+dlil_input_packet_list(struct ifnet * ifp_param, struct mbuf *m)
+{
+ int error = 0;
+ protocol_family_t protocol_family;
+ mbuf_t next_packet;
+ ifnet_t ifp = ifp_param;
+ char * frame_header;
+ struct if_proto * last_ifproto = NULL;
+ mbuf_t pkt_first = NULL;
+ mbuf_t * pkt_next = NULL;
- if (ifproto->proto_kpi == kProtoKPI_DLIL)
- retval = (*ifproto->kpi.dlil.dl_input)(m, frame_header,
- ifp, ifproto->protocol_family,
- TRUE);
- else
- retval = ifproto->kpi.v1.input(ifp, ifproto->protocol_family, m, frame_header);
+ KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START,0,0,0,0,0);
- if (ifproto->dl_domain && (ifproto->dl_domain->dom_flags & DOM_REENTRANT) == 0) {
- lck_mtx_unlock(ifproto->dl_domain->dom_mtx);
- }
- dlil_read_end();
+ while (m != NULL) {
+ struct if_proto *ifproto = NULL;
+ int iorefcnt = 0;
- if (retval == EJUSTRETURN)
- retval = 0;
- else
- if (retval)
+ if (ifp_param == NULL)
+ ifp = m->m_pkthdr.rcvif;
+
+ /* Check if this mbuf looks valid */
+ MBUF_INPUT_CHECK(m, ifp);
+
+ next_packet = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+ frame_header = m->m_pkthdr.header;
+ m->m_pkthdr.header = NULL;
+
+ /* Get an IO reference count if the interface is not
+ * loopback and it is attached.
+ */
+ if (ifp != lo_ifp) {
+ if (!ifnet_is_attached(ifp, 1)) {
+ m_freem(m);
+ goto next;
+ }
+ iorefcnt = 1;
+ }
+
+ switch (m->m_pkthdr.prio) {
+ case MBUF_TC_BK:
+ atomic_add_64(&ifp->if_tc.ifi_ibkpackets, 1);
+ atomic_add_64(&ifp->if_tc.ifi_ibkbytes, m->m_pkthdr.len);
+ break;
+ case MBUF_TC_VI:
+ atomic_add_64(&ifp->if_tc.ifi_ivipackets, 1);
+ atomic_add_64(&ifp->if_tc.ifi_ivibytes, m->m_pkthdr.len);
+ break;
+ case MBUF_TC_VO:
+ atomic_add_64(&ifp->if_tc.ifi_ivopackets, 1);
+ atomic_add_64(&ifp->if_tc.ifi_ivobytes, m->m_pkthdr.len);
+ break;
+ default:
+ break;
+ }
+
+ /* find which protocol family this packet is for */
+ ifnet_lock_shared(ifp);
+ error = (*ifp->if_demux)(ifp, m, frame_header,
+ &protocol_family);
+ ifnet_lock_done(ifp);
+ if (error != 0) {
+ if (error == EJUSTRETURN)
+ goto next;
+ protocol_family = 0;
+ }
+
+ if (m->m_flags & (M_BCAST|M_MCAST))
+ atomic_add_64(&ifp->if_imcasts, 1);
+
+ /* run interface filters, exclude VLAN packets PR-3586856 */
+ if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
+ error = dlil_interface_filters_input(ifp, &m,
+ &frame_header, protocol_family);
+ if (error != 0) {
+ if (error != EJUSTRETURN)
+ m_freem(m);
+ goto next;
+ }
+ }
+ if (error != 0 || ((m->m_flags & M_PROMISC) != 0) ) {
m_freem(m);
+ goto next;
+ }
+
+ /* Lookup the protocol attachment to this interface */
+ if (protocol_family == 0) {
+ ifproto = NULL;
+ } else if (last_ifproto != NULL && last_ifproto->ifp == ifp &&
+ (last_ifproto->protocol_family == protocol_family)) {
+ VERIFY(ifproto == NULL);
+ ifproto = last_ifproto;
+ if_proto_ref(last_ifproto);
+ } else {
+ VERIFY(ifproto == NULL);
+ ifnet_lock_shared(ifp);
+ /* callee holds a proto refcnt upon success */
+ ifproto = find_attached_proto(ifp, protocol_family);
+ ifnet_lock_done(ifp);
+ }
+ if (ifproto == NULL) {
+ /* no protocol for this packet, discard */
+ m_freem(m);
+ goto next;
+ }
+ if (ifproto != last_ifproto) {
+ if (last_ifproto != NULL) {
+ /* pass up the list for the previous protocol */
+ dlil_ifproto_input(last_ifproto, pkt_first);
+ pkt_first = NULL;
+ if_proto_free(last_ifproto);
+ }
+ last_ifproto = ifproto;
+ if_proto_ref(ifproto);
+ }
+ /* extend the list */
+ m->m_pkthdr.header = frame_header;
+ if (pkt_first == NULL) {
+ pkt_first = m;
+ } else {
+ *pkt_next = m;
+ }
+ pkt_next = &m->m_nextpkt;
+
+next:
+ if (next_packet == NULL && last_ifproto != NULL) {
+ /* pass up the last list of packets */
+ dlil_ifproto_input(last_ifproto, pkt_first);
+ if_proto_free(last_ifproto);
+ last_ifproto = NULL;
+ }
+ if (ifproto != NULL) {
+ if_proto_free(ifproto);
+ ifproto = NULL;
+ }
+
+ m = next_packet;
+
+ /* update the driver's multicast filter, if needed */
+ if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
+ ifp->if_updatemcasts = 0;
+ if (iorefcnt == 1)
+ ifnet_decr_iorefcnt(ifp);
+ }
KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END,0,0,0,0,0);
- return retval;
+ return;
+}
+
+errno_t
+if_mcasts_update(struct ifnet *ifp)
+{
+ errno_t err;
+
+ err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
+ if (err == EAFNOSUPPORT)
+ err = 0;
+ printf("%s%d: %s %d suspended link-layer multicast membership(s) "
+ "(err=%d)\n", ifp->if_name, ifp->if_unit,
+ (err == 0 ? "successfully restored" : "failed to restore"),
+ ifp->if_updatemcasts, err);
+
+ /* just return success */
+ return (0);
}
static int
dlil_event_internal(struct ifnet *ifp, struct kev_msg *event)
{
struct ifnet_filter *filter;
-
- if (ifp_use(ifp, kIfNetUseCount_MustNotBeZero) == 0) {
- dlil_read_begin();
-
- /* Pass the event to the interface filters */
- TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
- if (filter->filt_event)
- filter->filt_event(filter->filt_cookie, ifp, filter->filt_protocol, event);
+
+ /* Get an io ref count if the interface is attached */
+ if (!ifnet_is_attached(ifp, 1))
+ goto done;
+
+ /*
+ * Pass the event to the interface filters
+ */
+ lck_mtx_lock_spin(&ifp->if_flt_lock);
+ /* prevent filter list from changing in case we drop the lock */
+ if_flt_monitor_busy(ifp);
+ TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
+ if (filter->filt_event != NULL) {
+ lck_mtx_unlock(&ifp->if_flt_lock);
+
+ filter->filt_event(filter->filt_cookie, ifp,
+ filter->filt_protocol, event);
+
+ lck_mtx_lock_spin(&ifp->if_flt_lock);
}
-
- if (ifp->if_proto_hash) {
- int i;
-
- for (i = 0; i < PROTO_HASH_SLOTS; i++) {
- struct if_proto *proto;
-
- SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
- /* Pass the event to the protocol */
- if (proto->proto_kpi == kProtoKPI_DLIL) {
- if (proto->kpi.dlil.dl_event)
- proto->kpi.dlil.dl_event(ifp, event);
- }
- else {
- if (proto->kpi.v1.event)
- proto->kpi.v1.event(ifp, proto->protocol_family, event);
- }
+ }
+ /* we're done with the filter list */
+ if_flt_monitor_unbusy(ifp);
+ lck_mtx_unlock(&ifp->if_flt_lock);
+
+ ifnet_lock_shared(ifp);
+ if (ifp->if_proto_hash != NULL) {
+ int i;
+
+ for (i = 0; i < PROTO_HASH_SLOTS; i++) {
+ struct if_proto *proto;
+
+ SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
+ next_hash) {
+ proto_media_event eventp =
+ (proto->proto_kpi == kProtoKPI_v1 ?
+ proto->kpi.v1.event :
+ proto->kpi.v2.event);
+
+ if (eventp != NULL) {
+ if_proto_ref(proto);
+ ifnet_lock_done(ifp);
+
+ eventp(ifp, proto->protocol_family,
+ event);
+
+ ifnet_lock_shared(ifp);
+ if_proto_free(proto);
}
}
}
-
- dlil_read_end();
-
- /* Pass the event to the interface */
- if (ifp->if_event)
- ifp->if_event(ifp, event);
-
- if (ifp_unuse(ifp))
- ifp_use_reached_zero(ifp);
}
-
- return kev_post_msg(event);
+ ifnet_lock_done(ifp);
+
+ /* Pass the event to the interface */
+ if (ifp->if_event != NULL)
+ ifp->if_event(ifp, event);
+
+ /* Release the io ref count */
+ ifnet_decr_iorefcnt(ifp);
+
+done:
+ return (kev_post_msg(event));
}
-int
-dlil_event(struct ifnet *ifp, struct kern_event_msg *event)
+errno_t
+ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
{
- int result = 0;
-
struct kev_msg kev_msg;
+ int result = 0;
+ if (ifp == NULL || event == NULL)
+ return (EINVAL);
+
+ bzero(&kev_msg, sizeof (kev_msg));
kev_msg.vendor_code = event->vendor_code;
kev_msg.kev_class = event->kev_class;
kev_msg.kev_subclass = event->kev_subclass;
kev_msg.dv[0].data_ptr = &event->event_data[0];
kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
kev_msg.dv[1].data_length = 0;
-
result = dlil_event_internal(ifp, &kev_msg);
-
- return result;
+ return (result);
}
-int
-dlil_output_list(
- struct ifnet* ifp,
- u_long proto_family,
- struct mbuf *packetlist,
- caddr_t route,
- const struct sockaddr *dest,
- int raw)
-{
- char *frame_type = 0;
- char *dst_linkaddr = 0;
- int error, retval = 0;
- char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
- char dst_linkaddr_buffer[MAX_LINKADDR * 4];
- struct ifnet_filter *filter;
- struct if_proto *proto = 0;
+#if CONFIG_MACF_NET
+#include <netinet/ip6.h>
+#include <netinet/ip.h>
+static int
+dlil_get_socket_type(struct mbuf **mp, int family, int raw)
+{
struct mbuf *m;
-
- KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START,0,0,0,0,0);
-#if BRIDGE
- if ((raw != 0) || proto_family != PF_INET || do_brige) {
-#else
- if ((raw != 0) || proto_family != PF_INET) {
-#endif
- while (packetlist) {
- m = packetlist;
- packetlist = packetlist->m_nextpkt;
- m->m_nextpkt = NULL;
- error = dlil_output(ifp, proto_family, m, route, dest, raw);
- if (error) {
- if (packetlist)
- m_freem_list(packetlist);
- return (error);
- }
+ struct ip *ip;
+ struct ip6_hdr *ip6;
+ int type = SOCK_RAW;
+
+ if (!raw) {
+ switch (family) {
+ case PF_INET:
+ m = m_pullup(*mp, sizeof(struct ip));
+ if (m == NULL)
+ break;
+ *mp = m;
+ ip = mtod(m, struct ip *);
+ if (ip->ip_p == IPPROTO_TCP)
+ type = SOCK_STREAM;
+ else if (ip->ip_p == IPPROTO_UDP)
+ type = SOCK_DGRAM;
+ break;
+ case PF_INET6:
+ m = m_pullup(*mp, sizeof(struct ip6_hdr));
+ if (m == NULL)
+ break;
+ *mp = m;
+ ip6 = mtod(m, struct ip6_hdr *);
+ if (ip6->ip6_nxt == IPPROTO_TCP)
+ type = SOCK_STREAM;
+ else if (ip6->ip6_nxt == IPPROTO_UDP)
+ type = SOCK_DGRAM;
+ break;
}
- return (0);
- }
-
- dlil_read_begin();
-
- frame_type = frame_type_buffer;
- dst_linkaddr = dst_linkaddr_buffer;
- m = packetlist;
- packetlist = packetlist->m_nextpkt;
- m->m_nextpkt = NULL;
-
- proto = find_attached_proto(ifp, proto_family);
- if (proto == NULL) {
- retval = ENXIO;
- goto cleanup;
}
- retval = 0;
- if (proto->proto_kpi == kProtoKPI_DLIL) {
- if (proto->kpi.dlil.dl_pre_output)
- retval = proto->kpi.dlil.dl_pre_output(ifp, proto_family, &m, dest, route, frame_type, dst_linkaddr);
- }
- else {
- if (proto->kpi.v1.pre_output)
- retval = proto->kpi.v1.pre_output(ifp, proto_family, &m, dest, route, frame_type, dst_linkaddr);
- }
+ return (type);
+}
+#endif
- if (retval) {
- if (retval != EJUSTRETURN) {
- m_freem(m);
- }
- goto cleanup;
+static void
+if_inc_traffic_class_out(ifnet_t ifp, mbuf_t m)
+{
+ if (!(m->m_flags & M_PKTHDR))
+ return;
+
+ switch (m->m_pkthdr.prio) {
+ case MBUF_TC_BK:
+ atomic_add_64(&ifp->if_tc.ifi_obkpackets, 1);
+ atomic_add_64(&ifp->if_tc.ifi_obkbytes, m->m_pkthdr.len);
+ break;
+ case MBUF_TC_VI:
+ atomic_add_64(&ifp->if_tc.ifi_ovipackets, 1);
+ atomic_add_64(&ifp->if_tc.ifi_ovibytes, m->m_pkthdr.len);
+ break;
+ case MBUF_TC_VO:
+ atomic_add_64(&ifp->if_tc.ifi_ovopackets, 1);
+ atomic_add_64(&ifp->if_tc.ifi_ovobytes, m->m_pkthdr.len);
+ break;
+ default:
+ break;
}
-
- do {
-
-
- if (ifp->if_framer) {
- retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr, frame_type);
- if (retval) {
- if (retval != EJUSTRETURN) {
- m_freem(m);
- }
- goto cleanup;
- }
- }
-
- /*
- * Let interface filters (if any) do their thing ...
- */
- /* Do not pass VLAN tagged packets to filters PR-3586856 */
- if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
- TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
- if ((filter->filt_protocol == 0 || (filter->filt_protocol == proto_family)) &&
- filter->filt_output) {
- retval = filter->filt_output(filter->filt_cookie, ifp, proto_family, &m);
- if (retval) {
- if (retval == EJUSTRETURN)
- continue;
- else {
- m_freem(m);
- }
- goto cleanup;
- }
- }
- }
- }
- /*
- * Finally, call the driver.
- */
-
- KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, 0,0,0,0,0);
- retval = ifp->if_output(ifp, m);
- if (retval) {
- printf("dlil_output_list: output error retval = %x\n", retval);
- goto cleanup;
- }
- KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0,0,0,0,0);
-
- m = packetlist;
- if (m) {
- packetlist = packetlist->m_nextpkt;
- m->m_nextpkt = NULL;
- }
- } while (m);
-
-
- KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END,0,0,0,0,0);
-
-cleanup:
- dlil_read_end();
- if (packetlist) /* if any packet left, clean up */
- m_freem_list(packetlist);
- if (retval == EJUSTRETURN)
- retval = 0;
- return retval;
-}
+}
/*
* dlil_output
* because a protocol is likely to interact with an ifp while it
* is under the protocol lock.
*/
-int
-dlil_output(
- struct ifnet* ifp,
- u_long proto_family,
- struct mbuf *m,
- caddr_t route,
- const struct sockaddr *dest,
- int raw)
-{
- char *frame_type = 0;
- char *dst_linkaddr = 0;
- int retval = 0;
- char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
- char dst_linkaddr_buffer[MAX_LINKADDR * 4];
- struct ifnet_filter *filter;
-
+errno_t
+dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
+ void *route, const struct sockaddr *dest, int raw)
+{
+ char *frame_type = NULL;
+ char *dst_linkaddr = NULL;
+ int retval = 0;
+ char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
+ char dst_linkaddr_buffer[MAX_LINKADDR * 4];
+ struct if_proto *proto = NULL;
+ mbuf_t m;
+ mbuf_t send_head = NULL;
+ mbuf_t *send_tail = &send_head;
+ int iorefcnt = 0;
+
KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START,0,0,0,0,0);
-
- dlil_read_begin();
-
- frame_type = frame_type_buffer;
- dst_linkaddr = dst_linkaddr_buffer;
-
+
+ /* Get an io refcnt if the interface is attached to prevent ifnet_detach
+ * from happening while this operation is in progress */
+ if (!ifnet_is_attached(ifp, 1)) {
+ retval = ENXIO;
+ goto cleanup;
+ }
+ iorefcnt = 1;
+
+ /* update the driver's multicast filter, if needed */
+ if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
+ ifp->if_updatemcasts = 0;
+
+ frame_type = frame_type_buffer;
+ dst_linkaddr = dst_linkaddr_buffer;
+
if (raw == 0) {
- struct if_proto *proto = 0;
-
+ ifnet_lock_shared(ifp);
+ /* callee holds a proto refcnt upon success */
proto = find_attached_proto(ifp, proto_family);
if (proto == NULL) {
- m_freem(m);
+ ifnet_lock_done(ifp);
retval = ENXIO;
goto cleanup;
}
-
+ ifnet_lock_done(ifp);
+ }
+
+preout_again:
+ if (packetlist == NULL)
+ goto cleanup;
+
+ m = packetlist;
+ packetlist = packetlist->m_nextpkt;
+ m->m_nextpkt = NULL;
+
+ if (raw == 0) {
+ proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
+ proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
retval = 0;
- if (proto->proto_kpi == kProtoKPI_DLIL) {
- if (proto->kpi.dlil.dl_pre_output)
- retval = proto->kpi.dlil.dl_pre_output(ifp, proto_family, &m, dest, route, frame_type, dst_linkaddr);
- }
- else {
- if (proto->kpi.v1.pre_output)
- retval = proto->kpi.v1.pre_output(ifp, proto_family, &m, dest, route, frame_type, dst_linkaddr);
- }
-
- if (retval) {
- if (retval != EJUSTRETURN) {
+ if (preoutp != NULL) {
+ retval = preoutp(ifp, proto_family, &m, dest, route,
+ frame_type, dst_linkaddr);
+
+ if (retval != 0) {
+ if (retval == EJUSTRETURN)
+ goto preout_again;
m_freem(m);
+ goto cleanup;
}
- goto cleanup;
}
}
-
- /*
- * Call framing module
- */
- if ((raw == 0) && (ifp->if_framer)) {
- retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr, frame_type);
- if (retval) {
- if (retval != EJUSTRETURN) {
- m_freem(m);
+
+#if CONFIG_MACF_NET
+ retval = mac_ifnet_check_transmit(ifp, m, proto_family,
+ dlil_get_socket_type(&m, proto_family, raw));
+ if (retval) {
+ m_freem(m);
+ goto cleanup;
+ }
+#endif
+
+ do {
+#if CONFIG_DTRACE
+ if (proto_family == PF_INET) {
+ struct ip *ip = mtod(m, struct ip*);
+ DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
+ struct ip *, ip, struct ifnet *, ifp,
+ struct ip *, ip, struct ip6_hdr *, NULL);
+
+ } else if (proto_family == PF_INET6) {
+ struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr*);
+ DTRACE_IP6(send, struct mbuf*, m, struct inpcb *, NULL,
+ struct ip6_hdr *, ip6, struct ifnet*, ifp,
+ struct ip*, NULL, struct ip6_hdr *, ip6);
+ }
+#endif /* CONFIG_DTRACE */
+
+ if (raw == 0 && ifp->if_framer) {
+ int rcvif_set = 0;
+
+ /*
+ * If this is a broadcast packet that needs to be
+ * looped back into the system, set the inbound ifp
+ * to that of the outbound ifp. This will allow
+ * us to determine that it is a legitimate packet
+ * for the system. Only set the ifp if it's not
+ * already set, just to be safe.
+ */
+ if ((m->m_flags & (M_BCAST | M_LOOP)) &&
+ m->m_pkthdr.rcvif == NULL) {
+ m->m_pkthdr.rcvif = ifp;
+ rcvif_set = 1;
+ }
+
+ retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
+ frame_type);
+ if (retval) {
+ if (retval != EJUSTRETURN)
+ m_freem(m);
+ goto next;
+ }
+
+ /*
+ * Clear the ifp if it was set above, and to be
+ * safe, only if it is still the same as the
+ * outbound ifp we have in context. If it was
+ * looped back, then a copy of it was sent to the
+ * loopback interface with the rcvif set, and we
+ * are clearing the one that will go down to the
+ * layer below.
+ */
+ if (rcvif_set && m->m_pkthdr.rcvif == ifp)
+ m->m_pkthdr.rcvif = NULL;
+ }
+
+ /*
+ * Let interface filters (if any) do their thing ...
+ */
+ /* Do not pass VLAN tagged packets to filters PR-3586856 */
+ if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
+ retval = dlil_interface_filters_output(ifp,
+ &m, proto_family);
+ if (retval != 0) {
+ if (retval != EJUSTRETURN)
+ m_freem(m);
+ goto next;
}
+ }
+ /*
+ * Strip away M_PROTO1 bit prior to sending packet to the driver
+ * as this field may be used by the driver
+ */
+ m->m_flags &= ~M_PROTO1;
+
+ /*
+ * If the underlying interface is not capable of handling a
+ * packet whose data portion spans across physically disjoint
+ * pages, we need to "normalize" the packet so that we pass
+ * down a chain of mbufs where each mbuf points to a span that
+ * resides in the system page boundary. If the packet does
+ * not cross page(s), the following is a no-op.
+ */
+ if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) {
+ if ((m = m_normalize(m)) == NULL)
+ goto next;
+ }
+
+ /*
+ * If this is a TSO packet, make sure the interface still
+ * advertise TSO capability.
+ */
+
+ if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) &&
+ !(ifp->if_hwassist & IFNET_TSO_IPV4)) {
+ retval = EMSGSIZE;
+ m_freem(m);
goto cleanup;
}
- }
-
-#if BRIDGE
- /* !!!LOCKING!!!
- *
- * Need to consider how to handle this.
- */
- broken-locking
- if (do_bridge) {
- struct mbuf *m0 = m;
- struct ether_header *eh = mtod(m, struct ether_header *);
-
- if (m->m_pkthdr.rcvif)
- m->m_pkthdr.rcvif = NULL;
- ifp = bridge_dst_lookup(eh);
- bdg_forward(&m0, ifp);
- if (m0)
- m_freem(m0);
-
- return 0;
- }
-#endif
-
-
- /*
- * Let interface filters (if any) do their thing ...
- */
-
- /* Do not pass VLAN tagged packets to filters PR-3586856 */
- if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
- TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
- if ((filter->filt_protocol == 0 || (filter->filt_protocol == proto_family)) &&
- filter->filt_output) {
- retval = filter->filt_output(filter->filt_cookie, ifp, proto_family, &m);
- if (retval) {
- if (retval != EJUSTRETURN)
- m_freem(m);
- goto cleanup;
- }
+
+ if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) &&
+ !(ifp->if_hwassist & IFNET_TSO_IPV6)) {
+ retval = EMSGSIZE;
+ m_freem(m);
+ goto cleanup;
+ }
+
+ /*
+ * Finally, call the driver.
+ */
+ if ((ifp->if_eflags & IFEF_SENDLIST) != 0) {
+ *send_tail = m;
+ send_tail = &m->m_nextpkt;
+ } else {
+ if_inc_traffic_class_out(ifp, m);
+ KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
+ 0,0,0,0,0);
+ retval = ifp->if_output(ifp, m);
+ if (retval && dlil_verbose) {
+ printf("%s: output error on %s%d retval = %d\n",
+ __func__, ifp->if_name, ifp->if_unit,
+ retval);
}
+ KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
+ 0,0,0,0,0);
+ }
+ KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0,0,0,0,0);
+
+next:
+ m = packetlist;
+ if (m) {
+ packetlist = packetlist->m_nextpkt;
+ m->m_nextpkt = NULL;
}
+ } while (m);
+
+ if (send_head) {
+ if_inc_traffic_class_out(ifp, send_head);
+
+ KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, 0,0,0,0,0);
+ retval = ifp->if_output(ifp, send_head);
+ if (retval && dlil_verbose) {
+ printf("%s: output error on %s%d retval = %d\n",
+ __func__, ifp->if_name, ifp->if_unit, retval);
+ }
+ KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0,0,0,0,0);
}
-
- /*
- * Finally, call the driver.
- */
-
- KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, 0,0,0,0,0);
- retval = ifp->if_output(ifp, m);
- KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0,0,0,0,0);
-
+
KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END,0,0,0,0,0);
cleanup:
- dlil_read_end();
+ if (proto != NULL)
+ if_proto_free(proto);
+ if (packetlist) /* if any packets are left, clean up */
+ mbuf_freem_list(packetlist);
if (retval == EJUSTRETURN)
retval = 0;
- return retval;
+ if (iorefcnt == 1)
+ ifnet_decr_iorefcnt(ifp);
+
+ return (retval);
}
-int
-dlil_ioctl(u_long proto_fam,
- struct ifnet *ifp,
- u_long ioctl_code,
- caddr_t ioctl_arg)
-{
- struct ifnet_filter *filter;
- int retval = EOPNOTSUPP;
- int result = 0;
- struct if_family_str *if_family;
- int holding_read = 0;
-
- /* Attempt to increment the use count. If it's zero, bail out, the ifp is invalid */
- result = ifp_use(ifp, kIfNetUseCount_MustNotBeZero);
- if (result != 0)
- return EOPNOTSUPP;
-
- dlil_read_begin();
- holding_read = 1;
-
+errno_t
+ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
+ void *ioctl_arg)
+{
+ struct ifnet_filter *filter;
+ int retval = EOPNOTSUPP;
+ int result = 0;
+
+ if (ifp == NULL || ioctl_code == 0)
+ return (EINVAL);
+
+ /* Get an io ref count if the interface is attached */
+ if (!ifnet_is_attached(ifp, 1))
+ return (EOPNOTSUPP);
+
/* Run the interface filters first.
* We want to run all filters before calling the protocol,
* interface family, or interface.
*/
+ lck_mtx_lock_spin(&ifp->if_flt_lock);
+ /* prevent filter list from changing in case we drop the lock */
+ if_flt_monitor_busy(ifp);
TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
- if ((filter->filt_protocol == 0 || (filter->filt_protocol == proto_fam)) &&
- filter->filt_ioctl != NULL) {
- result = filter->filt_ioctl(filter->filt_cookie, ifp, proto_fam, ioctl_code, ioctl_arg);
+ if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
+ filter->filt_protocol == proto_fam)) {
+ lck_mtx_unlock(&ifp->if_flt_lock);
+
+ result = filter->filt_ioctl(filter->filt_cookie, ifp,
+ proto_fam, ioctl_code, ioctl_arg);
+
+ lck_mtx_lock_spin(&ifp->if_flt_lock);
+
/* Only update retval if no one has handled the ioctl */
if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
if (result == ENOTSUP)
result = EOPNOTSUPP;
retval = result;
- if (retval && retval != EOPNOTSUPP) {
+ if (retval != 0 && retval != EOPNOTSUPP) {
+ /* we're done with the filter list */
+ if_flt_monitor_unbusy(ifp);
+ lck_mtx_unlock(&ifp->if_flt_lock);
goto cleanup;
}
}
}
}
-
+ /* we're done with the filter list */
+ if_flt_monitor_unbusy(ifp);
+ lck_mtx_unlock(&ifp->if_flt_lock);
+
/* Allow the protocol to handle the ioctl */
- if (proto_fam) {
- struct if_proto *proto = find_attached_proto(ifp, proto_fam);
-
- if (proto != 0) {
+ if (proto_fam != 0) {
+ struct if_proto *proto;
+
+ /* callee holds a proto refcnt upon success */
+ ifnet_lock_shared(ifp);
+ proto = find_attached_proto(ifp, proto_fam);
+ ifnet_lock_done(ifp);
+ if (proto != NULL) {
+ proto_media_ioctl ioctlp =
+ (proto->proto_kpi == kProtoKPI_v1 ?
+ proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
result = EOPNOTSUPP;
- if (proto->proto_kpi == kProtoKPI_DLIL) {
- if (proto->kpi.dlil.dl_ioctl)
- result = proto->kpi.dlil.dl_ioctl(proto_fam, ifp, ioctl_code, ioctl_arg);
- }
- else {
- if (proto->kpi.v1.ioctl)
- result = proto->kpi.v1.ioctl(ifp, proto_fam, ioctl_code, ioctl_arg);
- }
-
+ if (ioctlp != NULL)
+ result = ioctlp(ifp, proto_fam, ioctl_code,
+ ioctl_arg);
+ if_proto_free(proto);
+
/* Only update retval if no one has handled the ioctl */
if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
if (result == ENOTSUP)
result = EOPNOTSUPP;
retval = result;
- if (retval && retval != EOPNOTSUPP) {
+ if (retval && retval != EOPNOTSUPP)
goto cleanup;
- }
}
}
}
-
- /*
- * Since we have incremented the use count on the ifp, we are guaranteed
- * that the ifp will not go away (the function pointers may not be changed).
- * We release the dlil read lock so the interface ioctl may trigger a
- * protocol attach. This happens with vlan and may occur with other virtual
- * interfaces.
- */
- dlil_read_end();
- holding_read = 0;
-
+
/* retval is either 0 or EOPNOTSUPP */
-
- /*
- * Let the family handle this ioctl.
- * If it returns something non-zero and not EOPNOTSUPP, we're done.
- * If it returns zero, the ioctl was handled, so set retval to zero.
- */
- if_family = find_family_module(ifp->if_family);
- if ((if_family) && (if_family->ifmod_ioctl)) {
- result = (*if_family->ifmod_ioctl)(ifp, ioctl_code, ioctl_arg);
-
- /* Only update retval if no one has handled the ioctl */
- if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
- if (result == ENOTSUP)
- result = EOPNOTSUPP;
- retval = result;
- if (retval && retval != EOPNOTSUPP) {
- goto cleanup;
- }
- }
- }
-
+
/*
* Let the interface handle this ioctl.
* If it returns EOPNOTSUPP, ignore that, we may have
* already handled this in the protocol or family.
*/
- if (ifp->if_ioctl)
+ if (ifp->if_ioctl)
result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
-
+
/* Only update retval if no one has handled the ioctl */
if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
if (result == ENOTSUP)
goto cleanup;
}
}
-
-cleanup:
- if (holding_read)
- dlil_read_end();
- if (ifp_unuse(ifp))
- ifp_use_reached_zero(ifp);
+cleanup:
if (retval == EJUSTRETURN)
retval = 0;
- return retval;
+
+ ifnet_decr_iorefcnt(ifp);
+
+ return (retval);
}
__private_extern__ errno_t
-dlil_set_bpf_tap(
- ifnet_t ifp,
- bpf_tap_mode mode,
- bpf_packet_func callback)
+dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
{
errno_t error = 0;
-
- dlil_read_begin();
- if (ifp->if_set_bpf_tap)
+
+
+ if (ifp->if_set_bpf_tap) {
+ /* Get an io reference on the interface if it is attached */
+ if (!ifnet_is_attached(ifp, 1))
+ return ENXIO;
error = ifp->if_set_bpf_tap(ifp, mode, callback);
- dlil_read_end();
-
- return error;
+ ifnet_decr_iorefcnt(ifp);
+ }
+ return (error);
}
-__private_extern__ errno_t
-dlil_resolve_multi(
- struct ifnet *ifp,
- const struct sockaddr *proto_addr,
- struct sockaddr *ll_addr,
- size_t ll_len)
+errno_t
+dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
+ struct sockaddr *ll_addr, size_t ll_len)
{
errno_t result = EOPNOTSUPP;
struct if_proto *proto;
const struct sockaddr *verify;
-
- dlil_read_begin();
-
+ proto_media_resolve_multi resolvep;
+
+ if (!ifnet_is_attached(ifp, 1))
+ return result;
+
bzero(ll_addr, ll_len);
-
- /* Call the protocol first */
+
+ /* Call the protocol first; callee holds a proto refcnt upon success */
+ ifnet_lock_shared(ifp);
proto = find_attached_proto(ifp, proto_addr->sa_family);
- if (proto != NULL && proto->proto_kpi != kProtoKPI_DLIL &&
- proto->kpi.v1.resolve_multi != NULL) {
- result = proto->kpi.v1.resolve_multi(ifp, proto_addr,
- (struct sockaddr_dl*)ll_addr, ll_len);
+ ifnet_lock_done(ifp);
+ if (proto != NULL) {
+ resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
+ proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
+ if (resolvep != NULL)
+ result = resolvep(ifp, proto_addr,
+ (struct sockaddr_dl*)ll_addr, ll_len);
+ if_proto_free(proto);
}
-
+
/* Let the interface verify the multicast address */
if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
if (result == 0)
verify = proto_addr;
result = ifp->if_check_multi(ifp, verify);
}
-
- dlil_read_end();
-
- return result;
+
+ ifnet_decr_iorefcnt(ifp);
+ return (result);
}
__private_extern__ errno_t
-dlil_send_arp_internal(
- ifnet_t ifp,
- u_short arpop,
- const struct sockaddr_dl* sender_hw,
- const struct sockaddr* sender_proto,
- const struct sockaddr_dl* target_hw,
- const struct sockaddr* target_proto)
+dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
+ const struct sockaddr_dl* sender_hw, const struct sockaddr* sender_proto,
+ const struct sockaddr_dl* target_hw, const struct sockaddr* target_proto)
{
struct if_proto *proto;
errno_t result = 0;
-
- dlil_read_begin();
-
+
+ /* callee holds a proto refcnt upon success */
+ ifnet_lock_shared(ifp);
proto = find_attached_proto(ifp, target_proto->sa_family);
- if (proto == NULL || proto->proto_kpi == kProtoKPI_DLIL ||
- proto->kpi.v1.send_arp == NULL) {
+ ifnet_lock_done(ifp);
+ if (proto == NULL) {
result = ENOTSUP;
+ } else {
+ proto_media_send_arp arpp;
+ arpp = (proto->proto_kpi == kProtoKPI_v1 ?
+ proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
+ if (arpp == NULL)
+ result = ENOTSUP;
+ else
+ result = arpp(ifp, arpop, sender_hw, sender_proto,
+ target_hw, target_proto);
+ if_proto_free(proto);
}
- else {
- result = proto->kpi.v1.send_arp(ifp, arpop, sender_hw, sender_proto,
- target_hw, target_proto);
+
+ return (result);
+}
+
+static __inline__ int
+_is_announcement(const struct sockaddr_in * sender_sin,
+ const struct sockaddr_in * target_sin)
+{
+ if (sender_sin == NULL) {
+ return (FALSE);
}
-
- dlil_read_end();
-
- return result;
+ return (sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr);
}
__private_extern__ errno_t
-dlil_send_arp(
- ifnet_t ifp,
- u_short arpop,
- const struct sockaddr_dl* sender_hw,
- const struct sockaddr* sender_proto,
- const struct sockaddr_dl* target_hw,
- const struct sockaddr* target_proto)
+dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl* sender_hw,
+ const struct sockaddr* sender_proto, const struct sockaddr_dl* target_hw,
+ const struct sockaddr* target_proto)
{
errno_t result = 0;
-
- if (target_proto == NULL || (sender_proto &&
- sender_proto->sa_family != target_proto->sa_family))
- return EINVAL;
-
+ const struct sockaddr_in * sender_sin;
+ const struct sockaddr_in * target_sin;
+
+ if (target_proto == NULL || (sender_proto != NULL &&
+ sender_proto->sa_family != target_proto->sa_family))
+ return (EINVAL);
+
/*
* If this is an ARP request and the target IP is IPv4LL,
- * send the request on all interfaces.
+ * send the request on all interfaces. The exception is
+ * an announcement, which must only appear on the specific
+ * interface.
*/
- if (IN_LINKLOCAL(((const struct sockaddr_in*)target_proto)->sin_addr.s_addr)
- && ipv4_ll_arp_aware != 0 && target_proto->sa_family == AF_INET &&
- arpop == ARPOP_REQUEST) {
+ sender_sin = (const struct sockaddr_in *)sender_proto;
+ target_sin = (const struct sockaddr_in *)target_proto;
+ if (target_proto->sa_family == AF_INET &&
+ IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
+ ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
+ !_is_announcement(target_sin, sender_sin)) {
ifnet_t *ifp_list;
u_int32_t count;
u_int32_t ifp_on;
-
+
result = ENOTSUP;
if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
for (ifp_on = 0; ifp_on < count; ifp_on++) {
- errno_t new_result;
- ifaddr_t source_hw = NULL;
- ifaddr_t source_ip = NULL;
- struct sockaddr_in source_ip_copy;
-
+ errno_t new_result;
+ ifaddr_t source_hw = NULL;
+ ifaddr_t source_ip = NULL;
+ struct sockaddr_in source_ip_copy;
+ struct ifnet *cur_ifp = ifp_list[ifp_on];
+
/*
- * Only arp on interfaces marked for IPv4LL ARPing. This may
- * mean that we don't ARP on the interface the subnet route
- * points to.
+ * Only arp on interfaces marked for IPv4LL
+ * ARPing. This may mean that we don't ARP on
+ * the interface the subnet route points to.
*/
- if ((ifp_list[ifp_on]->if_eflags & IFEF_ARPLL) == 0) {
+ if (!(cur_ifp->if_eflags & IFEF_ARPLL))
continue;
- }
-
- source_hw = TAILQ_FIRST(&ifp_list[ifp_on]->if_addrhead);
-
+
/* Find the source IP address */
- ifnet_lock_shared(ifp_list[ifp_on]);
- TAILQ_FOREACH(source_ip, &ifp_list[ifp_on]->if_addrhead,
- ifa_link) {
- if (source_ip->ifa_addr &&
- source_ip->ifa_addr->sa_family == AF_INET) {
+ ifnet_lock_shared(cur_ifp);
+ source_hw = cur_ifp->if_lladdr;
+ TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
+ ifa_link) {
+ IFA_LOCK(source_ip);
+ if (source_ip->ifa_addr != NULL &&
+ source_ip->ifa_addr->sa_family ==
+ AF_INET) {
+ /* Copy the source IP address */
+ source_ip_copy =
+ *(struct sockaddr_in *)
+ source_ip->ifa_addr;
+ IFA_UNLOCK(source_ip);
break;
}
+ IFA_UNLOCK(source_ip);
}
-
+
/* No IP Source, don't arp */
if (source_ip == NULL) {
- ifnet_lock_done(ifp_list[ifp_on]);
+ ifnet_lock_done(cur_ifp);
continue;
}
-
- /* Copy the source IP address */
- source_ip_copy = *(struct sockaddr_in*)source_ip->ifa_addr;
-
- ifnet_lock_done(ifp_list[ifp_on]);
-
+
+ IFA_ADDREF(source_hw);
+ ifnet_lock_done(cur_ifp);
+
/* Send the ARP */
- new_result = dlil_send_arp_internal(ifp_list[ifp_on], arpop,
- (struct sockaddr_dl*)source_hw->ifa_addr,
- (struct sockaddr*)&source_ip_copy, NULL,
- target_proto);
-
+ new_result = dlil_send_arp_internal(cur_ifp,
+ arpop,
+ (struct sockaddr_dl *)source_hw->ifa_addr,
+ (struct sockaddr *)&source_ip_copy, NULL,
+ target_proto);
+
+ IFA_REMREF(source_hw);
if (result == ENOTSUP) {
result = new_result;
}
}
+ ifnet_list_free(ifp_list);
}
-
- ifnet_list_free(ifp_list);
- }
- else {
- result = dlil_send_arp_internal(ifp, arpop, sender_hw, sender_proto,
- target_hw, target_proto);
+ } else {
+ result = dlil_send_arp_internal(ifp, arpop, sender_hw,
+ sender_proto, target_hw, target_proto);
}
-
- return result;
+
+ return (result);
}
+/*
+ * Caller must hold ifnet head lock.
+ */
static int
-ifp_use(
- struct ifnet *ifp,
- int handle_zero)
+ifnet_lookup(struct ifnet *ifp)
{
- int old_value;
- int retval = 0;
-
- do {
- old_value = ifp->if_usecnt;
- if (old_value == 0 && handle_zero == kIfNetUseCount_MustNotBeZero) {
- retval = ENXIO; // ifp is invalid
+ struct ifnet *_ifp;
+
+ lck_rw_assert(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
+ TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
+ if (_ifp == ifp)
break;
- }
- } while (!OSCompareAndSwap((UInt32)old_value, (UInt32)old_value + 1, (UInt32*)&ifp->if_usecnt));
-
- return retval;
+ }
+ return (_ifp != NULL);
}
-
-/* ifp_unuse is broken into two pieces.
- *
- * ifp_use and ifp_unuse must be called between when the caller calls
- * dlil_write_begin and dlil_write_end. ifp_unuse needs to perform some
- * operations after dlil_write_end has been called. For this reason,
- * anyone calling ifp_unuse must call ifp_use_reached_zero if ifp_unuse
- * returns a non-zero value. The caller must call ifp_use_reached_zero
- * after the caller has called dlil_write_end.
+/*
+ * Caller has to pass a non-zero refio argument to get a
+ * IO reference count. This will prevent ifnet_detach from
+ * being called when there are outstanding io reference counts.
*/
-static void
-ifp_use_reached_zero(
- struct ifnet *ifp)
-{
- struct if_family_str *if_family;
- ifnet_detached_func free_func;
-
- dlil_read_begin();
-
- if (ifp->if_usecnt != 0)
- panic("ifp_use_reached_zero: ifp->if_usecnt != 0");
-
- /* Let BPF know we're detaching */
- bpfdetach(ifp);
-
- ifnet_head_lock_exclusive();
- ifnet_lock_exclusive(ifp);
-
- /* Remove ourselves from the list */
- TAILQ_REMOVE(&ifnet_head, ifp, if_link);
- ifnet_addrs[ifp->if_index - 1] = 0;
-
- /* ifp should be removed from the interface list */
- while (ifp->if_multiaddrs.lh_first) {
- struct ifmultiaddr *ifma = ifp->if_multiaddrs.lh_first;
-
- /*
- * When the interface is gone, we will no longer
- * be listening on these multicasts. Various bits
- * of the stack may be referencing these multicasts,
- * release only our reference.
- */
- LIST_REMOVE(ifma, ifma_link);
- ifma->ifma_ifp = NULL;
- ifma_release(ifma);
- }
- ifnet_head_done();
-
- ifp->if_eflags &= ~IFEF_DETACHING; // clear the detaching flag
- ifnet_lock_done(ifp);
+int
+ifnet_is_attached(struct ifnet *ifp, int refio)
+{
+ int ret;
- if_family = find_family_module(ifp->if_family);
- if (if_family && if_family->del_if)
- if_family->del_if(ifp);
-#if 0
- if (--if_family->if_usecnt == 0) {
- if (if_family->shutdown)
- (*if_family->shutdown)();
-
- TAILQ_REMOVE(&if_family_head, if_family, if_fam_next);
- FREE(if_family, M_IFADDR);
+ lck_mtx_lock_spin(&ifp->if_ref_lock);
+ if ((ret = ((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) ==
+ IFRF_ATTACHED))) {
+ if (refio > 0)
+ ifp->if_refio++;
}
-#endif
-
- dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, 0, 0);
- free_func = ifp->if_free;
- dlil_read_end();
-
- if (free_func)
- free_func(ifp);
-}
+ lck_mtx_unlock(&ifp->if_ref_lock);
-static int
-ifp_unuse(
- struct ifnet *ifp)
-{
- int oldval;
- oldval = OSDecrementAtomic((UInt32*)&ifp->if_usecnt);
- if (oldval == 0)
- panic("ifp_unuse: ifp(%s%n)->if_usecnt was zero\n", ifp->if_name, ifp->if_unit);
-
- if (oldval > 1)
- return 0;
-
- if ((ifp->if_eflags & IFEF_DETACHING) == 0)
- panic("ifp_unuse: use count reached zero but detching flag is not set!");
-
- return 1; /* caller must call ifp_use_reached_zero */
+ return (ret);
}
void
-ifp_reference(
- struct ifnet *ifp)
+ifnet_decr_iorefcnt(struct ifnet *ifp)
{
- int oldval;
- oldval = OSIncrementAtomic(&ifp->if_refcnt);
+ lck_mtx_lock_spin(&ifp->if_ref_lock);
+ VERIFY(ifp->if_refio > 0);
+ VERIFY((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) != 0);
+ ifp->if_refio--;
+
+ /* if there are no more outstanding io references, wakeup the
+ * ifnet_detach thread if detaching flag is set.
+ */
+ if (ifp->if_refio == 0 &&
+ (ifp->if_refflags & IFRF_DETACHING) != 0) {
+ /* Convert the spinlock to a regular mutex if we have
+ * to wait for any reason while doing a wakeup.
+ */
+ lck_mtx_convert_spin(&ifp->if_ref_lock);
+ wakeup(&(ifp->if_refio));
+ }
+ lck_mtx_unlock(&ifp->if_ref_lock);
}
-void
-ifp_release(
- struct ifnet *ifp)
+static void
+dlil_if_trace(struct dlil_ifnet *dl_if, int refhold)
{
- int oldval;
- oldval = OSDecrementAtomic((UInt32*)&ifp->if_refcnt);
- if (oldval == 0)
- panic("dlil_if_reference - refcount decremented past zero!");
-}
+ struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if;
+ ctrace_t *tr;
+ u_int32_t idx;
+ u_int16_t *cnt;
-extern lck_mtx_t *domain_proto_mtx;
+ if (!(dl_if->dl_if_flags & DLIF_DEBUG)) {
+ panic("%s: dl_if %p has no debug structure", __func__, dl_if);
+ /* NOTREACHED */
+ }
-static int
-dlil_attach_protocol_internal(
- struct if_proto *proto,
- const struct ddesc_head_str *demux,
- const struct ifnet_demux_desc *demux_list,
- u_int32_t demux_count)
-{
- struct ddesc_head_str temp_head;
- struct kev_dl_proto_data ev_pr_data;
- struct ifnet *ifp = proto->ifp;
- int retval = 0;
- u_long hash_value = proto_hash_value(proto->protocol_family);
- int if_using_kpi = (ifp->if_eflags & IFEF_USEKPI) != 0;
- void* free_me = NULL;
-
- /* setup some of the common values */
-
- {
- lck_mtx_lock(domain_proto_mtx);
- struct domain *dp = domains;
- while (dp && (protocol_family_t)dp->dom_family != proto->protocol_family)
- dp = dp->dom_next;
- proto->dl_domain = dp;
- lck_mtx_unlock(domain_proto_mtx);
- }
-
- /*
- * Convert the demux descriptors to a type the interface
- * will understand. Checking e_flags should be safe, this
- * flag won't change.
- */
- if (if_using_kpi && demux) {
- /* Convert the demux linked list to a demux_list */
- struct dlil_demux_desc *demux_entry;
- struct ifnet_demux_desc *temp_list = NULL;
- u_int32_t i = 0;
-
- TAILQ_FOREACH(demux_entry, demux, next) {
- i++;
- }
-
- temp_list = _MALLOC(sizeof(struct ifnet_demux_desc) * i, M_TEMP, M_WAITOK);
- free_me = temp_list;
-
- if (temp_list == NULL)
- return ENOMEM;
-
- i = 0;
- TAILQ_FOREACH(demux_entry, demux, next) {
- /* dlil_demux_desc types 1, 2, and 3 are obsolete and can not be translated */
- if (demux_entry->type == 1 ||
- demux_entry->type == 2 ||
- demux_entry->type == 3) {
- FREE(free_me, M_TEMP);
- return ENOTSUP;
- }
-
- temp_list[i].type = demux_entry->type;
- temp_list[i].data = demux_entry->native_type;
- temp_list[i].datalen = demux_entry->variants.native_type_length;
- i++;
- }
- demux_count = i;
- demux_list = temp_list;
+ if (refhold) {
+ cnt = &dl_if_dbg->dldbg_if_refhold_cnt;
+ tr = dl_if_dbg->dldbg_if_refhold;
+ } else {
+ cnt = &dl_if_dbg->dldbg_if_refrele_cnt;
+ tr = dl_if_dbg->dldbg_if_refrele;
}
- else if (!if_using_kpi && demux_list != NULL) {
- struct dlil_demux_desc *demux_entry;
- u_int32_t i = 0;
-
- demux_entry = _MALLOC(sizeof(struct dlil_demux_desc) * demux_count, M_TEMP, M_WAITOK);
- free_me = demux_entry;
- if (demux_entry == NULL)
- return ENOMEM;
-
- TAILQ_INIT(&temp_head);
-
- for (i = 0; i < demux_count; i++) {
- demux_entry[i].type = demux_list[i].type;
- demux_entry[i].native_type = demux_list[i].data;
- demux_entry[i].variants.native_type_length = demux_list[i].datalen;
- TAILQ_INSERT_TAIL(&temp_head, &demux_entry[i], next);
- }
- demux = &temp_head;
+
+ idx = atomic_add_16_ov(cnt, 1) % IF_REF_TRACE_HIST_SIZE;
+ ctrace_record(&tr[idx]);
+}
+
+errno_t
+dlil_if_ref(struct ifnet *ifp)
+{
+ struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
+
+ if (dl_if == NULL)
+ return (EINVAL);
+
+ lck_mtx_lock_spin(&dl_if->dl_if_lock);
+ ++dl_if->dl_if_refcnt;
+ if (dl_if->dl_if_refcnt == 0) {
+ panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
+ /* NOTREACHED */
}
-
- /*
- * Take the write lock to protect readers and exclude other writers.
- */
- dlil_write_begin();
-
- /* Check that the interface isn't currently detaching */
- ifnet_lock_shared(ifp);
- if ((ifp->if_eflags & IFEF_DETACHING) != 0) {
+ if (dl_if->dl_if_trace != NULL)
+ (*dl_if->dl_if_trace)(dl_if, TRUE);
+ lck_mtx_unlock(&dl_if->dl_if_lock);
+
+ return (0);
+}
+
+errno_t
+dlil_if_free(struct ifnet *ifp)
+{
+ struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
+
+ if (dl_if == NULL)
+ return (EINVAL);
+
+ lck_mtx_lock_spin(&dl_if->dl_if_lock);
+ if (dl_if->dl_if_refcnt == 0) {
+ panic("%s: negative refcnt for ifp=%p", __func__, ifp);
+ /* NOTREACHED */
+ }
+ --dl_if->dl_if_refcnt;
+ if (dl_if->dl_if_trace != NULL)
+ (*dl_if->dl_if_trace)(dl_if, FALSE);
+ lck_mtx_unlock(&dl_if->dl_if_lock);
+
+ return (0);
+}
+
+static errno_t
+dlil_attach_protocol_internal(struct if_proto *proto,
+ const struct ifnet_demux_desc *demux_list, u_int32_t demux_count)
+{
+ struct kev_dl_proto_data ev_pr_data;
+ struct ifnet *ifp = proto->ifp;
+ int retval = 0;
+ u_int32_t hash_value = proto_hash_value(proto->protocol_family);
+ struct if_proto *prev_proto;
+ struct if_proto *_proto;
+
+ /* callee holds a proto refcnt upon success */
+ ifnet_lock_exclusive(ifp);
+ _proto = find_attached_proto(ifp, proto->protocol_family);
+ if (_proto != NULL) {
ifnet_lock_done(ifp);
- dlil_write_end();
- if (free_me)
- FREE(free_me, M_TEMP);
- return ENXIO;
+ if_proto_free(_proto);
+ return (EEXIST);
}
- ifnet_lock_done(ifp);
-
- if (find_attached_proto(ifp, proto->protocol_family) != NULL) {
- dlil_write_end();
- if (free_me)
- FREE(free_me, M_TEMP);
- return EEXIST;
- }
-
+
/*
* Call family module add_proto routine so it can refine the
* demux descriptors as it wishes.
*/
- if (if_using_kpi)
- retval = ifp->if_add_proto_u.kpi(ifp, proto->protocol_family, demux_list, demux_count);
- else {
- retval = ifp->if_add_proto_u.original(ifp, proto->protocol_family,
- _cast_non_const(demux));
- }
+ retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
+ demux_count);
if (retval) {
- dlil_write_end();
- if (free_me)
- FREE(free_me, M_TEMP);
- return retval;
+ ifnet_lock_done(ifp);
+ return (retval);
}
-
- /*
- * We can't fail from this point on.
- * Increment the number of uses (protocol attachments + interface attached).
- */
- ifp_use(ifp, kIfNetUseCount_MustNotBeZero);
-
+
/*
* Insert the protocol in the hash
*/
- {
- struct if_proto* prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
- while (prev_proto && SLIST_NEXT(prev_proto, next_hash) != NULL)
- prev_proto = SLIST_NEXT(prev_proto, next_hash);
- if (prev_proto)
- SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
- else
- SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value], proto, next_hash);
- }
+ prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
+ while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL)
+ prev_proto = SLIST_NEXT(prev_proto, next_hash);
+ if (prev_proto)
+ SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
+ else
+ SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
+ proto, next_hash);
+
+ /* hold a proto refcnt for attach */
+ if_proto_ref(proto);
/*
- * Add to if_proto list for this interface
+ * The reserved field carries the number of protocol still attached
+ * (subject to change)
*/
- if_proto_ref(proto);
- if (proto->proto_kpi == kProtoKPI_DLIL && proto->kpi.dlil.dl_offer)
- ifp->offercnt++;
- dlil_write_end();
-
- /* the reserved field carries the number of protocol still attached (subject to change) */
ev_pr_data.proto_family = proto->protocol_family;
ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp);
- dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
- (struct net_event_data *)&ev_pr_data,
- sizeof(struct kev_dl_proto_data));
-
- DLIL_PRINTF("Attached protocol %d to %s%d - %d\n", proto->protocol_family,
- ifp->if_name, ifp->if_unit, retval);
- if (free_me)
- FREE(free_me, M_TEMP);
- return retval;
+ ifnet_lock_done(ifp);
+
+ dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
+ (struct net_event_data *)&ev_pr_data,
+ sizeof (struct kev_dl_proto_data));
+ return (retval);
}
-__private_extern__ int
-dlil_attach_protocol_kpi(ifnet_t ifp, protocol_family_t protocol,
- const struct ifnet_attach_proto_param *proto_details)
+errno_t
+ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
+ const struct ifnet_attach_proto_param *proto_details)
{
int retval = 0;
struct if_proto *ifproto = NULL;
-
- ifproto = _MALLOC(sizeof(struct if_proto), M_IFADDR, M_WAITOK);
- if (ifproto == 0) {
- DLIL_PRINTF("ERROR - DLIL failed if_proto allocation\n");
+
+ ifnet_head_lock_shared();
+ if (ifp == NULL || protocol == 0 || proto_details == NULL) {
+ retval = EINVAL;
+ goto end;
+ }
+ /* Check that the interface is in the global list */
+ if (!ifnet_lookup(ifp)) {
+ retval = ENXIO;
+ goto end;
+ }
+
+ ifproto = zalloc(dlif_proto_zone);
+ if (ifproto == NULL) {
retval = ENOMEM;
goto end;
}
- bzero(ifproto, sizeof(*ifproto));
-
+ bzero(ifproto, dlif_proto_size);
+
+ /* refcnt held above during lookup */
ifproto->ifp = ifp;
ifproto->protocol_family = protocol;
ifproto->proto_kpi = kProtoKPI_v1;
ifproto->kpi.v1.detached = proto_details->detached;
ifproto->kpi.v1.resolve_multi = proto_details->resolve;
ifproto->kpi.v1.send_arp = proto_details->send_arp;
-
- retval = dlil_attach_protocol_internal(ifproto, NULL,
- proto_details->demux_list, proto_details->demux_count);
-
+
+ retval = dlil_attach_protocol_internal(ifproto,
+ proto_details->demux_list, proto_details->demux_count);
+
+ if (dlil_verbose) {
+ printf("%s%d: attached v1 protocol %d\n", ifp->if_name,
+ ifp->if_unit, protocol);
+ }
+
end:
- if (retval && ifproto)
- FREE(ifproto, M_IFADDR);
- return retval;
+ if (retval != 0 && retval != EEXIST && ifp != NULL) {
+ DLIL_PRINTF("%s%d: failed to attach v1 protocol %d (err=%d)\n",
+ ifp->if_name, ifp->if_unit, protocol, retval);
+ }
+ ifnet_head_done();
+ if (retval != 0 && ifproto != NULL)
+ zfree(dlif_proto_zone, ifproto);
+ return (retval);
}
-int
-dlil_attach_protocol(struct dlil_proto_reg_str *proto)
+errno_t
+ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
+ const struct ifnet_attach_proto_param_v2 *proto_details)
{
- struct ifnet *ifp = NULL;
+ int retval = 0;
struct if_proto *ifproto = NULL;
- int retval = 0;
- /*
- * Do everything we can before taking the write lock
- */
-
- if ((proto->protocol_family == 0) || (proto->interface_family == 0))
- return EINVAL;
-
- /*
- * Allocate and init a new if_proto structure
- */
- ifproto = _MALLOC(sizeof(struct if_proto), M_IFADDR, M_WAITOK);
- if (!ifproto) {
- DLIL_PRINTF("ERROR - DLIL failed if_proto allocation\n");
- retval = ENOMEM;
+ ifnet_head_lock_shared();
+ if (ifp == NULL || protocol == 0 || proto_details == NULL) {
+ retval = EINVAL;
goto end;
}
-
-
- /* ifbyfamily returns us an ifp with an incremented if_usecnt */
- ifp = ifbyfamily(proto->interface_family, proto->unit_number);
- if (!ifp) {
- DLIL_PRINTF("dlil_attach_protocol -- no such interface %d unit %d\n",
- proto->interface_family, proto->unit_number);
+ /* Check that the interface is in the global list */
+ if (!ifnet_lookup(ifp)) {
retval = ENXIO;
goto end;
}
- bzero(ifproto, sizeof(struct if_proto));
-
- ifproto->ifp = ifp;
- ifproto->protocol_family = proto->protocol_family;
- ifproto->proto_kpi = kProtoKPI_DLIL;
- ifproto->kpi.dlil.dl_input = proto->input;
- ifproto->kpi.dlil.dl_pre_output = proto->pre_output;
- ifproto->kpi.dlil.dl_event = proto->event;
- ifproto->kpi.dlil.dl_offer = proto->offer;
- ifproto->kpi.dlil.dl_ioctl = proto->ioctl;
- ifproto->kpi.dlil.dl_detached = proto->detached;
-
- retval = dlil_attach_protocol_internal(ifproto, &proto->demux_desc_head, NULL, 0);
-
-end:
- if (retval && ifproto)
- FREE(ifproto, M_IFADDR);
- return retval;
-}
+ ifproto = zalloc(dlif_proto_zone);
+ if (ifproto == NULL) {
+ retval = ENOMEM;
+ goto end;
+ }
+ bzero(ifproto, sizeof(*ifproto));
-extern void if_rtproto_del(struct ifnet *ifp, int protocol);
+ /* refcnt held above during lookup */
+ ifproto->ifp = ifp;
+ ifproto->protocol_family = protocol;
+ ifproto->proto_kpi = kProtoKPI_v2;
+ ifproto->kpi.v2.input = proto_details->input;
+ ifproto->kpi.v2.pre_output = proto_details->pre_output;
+ ifproto->kpi.v2.event = proto_details->event;
+ ifproto->kpi.v2.ioctl = proto_details->ioctl;
+ ifproto->kpi.v2.detached = proto_details->detached;
+ ifproto->kpi.v2.resolve_multi = proto_details->resolve;
+ ifproto->kpi.v2.send_arp = proto_details->send_arp;
-static int
-dlil_detach_protocol_internal(
- struct if_proto *proto)
-{
- struct ifnet *ifp = proto->ifp;
- u_long proto_family = proto->protocol_family;
- struct kev_dl_proto_data ev_pr_data;
-
- if (proto->proto_kpi == kProtoKPI_DLIL) {
- if (proto->kpi.dlil.dl_detached)
- proto->kpi.dlil.dl_detached(proto->protocol_family, ifp);
- }
- else {
- if (proto->kpi.v1.detached)
- proto->kpi.v1.detached(ifp, proto->protocol_family);
+ retval = dlil_attach_protocol_internal(ifproto,
+ proto_details->demux_list, proto_details->demux_count);
+
+ if (dlil_verbose) {
+ printf("%s%d: attached v2 protocol %d\n", ifp->if_name,
+ ifp->if_unit, protocol);
}
- if_proto_free(proto);
-
- /*
- * Cleanup routes that may still be in the routing table for that interface/protocol pair.
- */
-
- if_rtproto_del(ifp, proto_family);
-
- /* the reserved field carries the number of protocol still attached (subject to change) */
- ev_pr_data.proto_family = proto_family;
- ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp);
- dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
- (struct net_event_data *)&ev_pr_data,
- sizeof(struct kev_dl_proto_data));
- return 0;
+
+end:
+ if (retval != 0 && retval != EEXIST && ifp != NULL) {
+ DLIL_PRINTF("%s%d: failed to attach v2 protocol %d (err=%d)\n",
+ ifp->if_name, ifp->if_unit, protocol, retval);
+ }
+ ifnet_head_done();
+ if (retval != 0 && ifproto != NULL)
+ zfree(dlif_proto_zone, ifproto);
+ return (retval);
}
-int
-dlil_detach_protocol(struct ifnet *ifp, u_long proto_family)
+errno_t
+ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
{
struct if_proto *proto = NULL;
int retval = 0;
- int use_reached_zero = 0;
-
-
- if ((retval = dlil_write_begin()) != 0) {
- if (retval == EDEADLK) {
- retval = 0;
- dlil_read_begin();
- proto = find_attached_proto(ifp, proto_family);
- if (proto == 0) {
- retval = ENXIO;
- }
- else {
- proto->detaching = 1;
- dlil_detach_waiting = 1;
- wakeup(&dlil_detach_waiting);
- }
- dlil_read_end();
- }
+
+ if (ifp == NULL || proto_family == 0) {
+ retval = EINVAL;
goto end;
}
-
+
+ ifnet_lock_exclusive(ifp);
+ /* callee holds a proto refcnt upon success */
proto = find_attached_proto(ifp, proto_family);
-
if (proto == NULL) {
retval = ENXIO;
- dlil_write_end();
+ ifnet_lock_done(ifp);
goto end;
}
-
- /*
- * Call family module del_proto
- */
-
+
+ /* call family module del_proto */
if (ifp->if_del_proto)
ifp->if_del_proto(ifp, proto->protocol_family);
- if (proto->proto_kpi == kProtoKPI_DLIL && proto->kpi.dlil.dl_offer)
- ifp->offercnt--;
+ SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
+ proto, if_proto, next_hash);
- SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)], proto, if_proto, next_hash);
-
- /*
- * We can do the rest of the work outside of the write lock.
- */
- use_reached_zero = ifp_unuse(ifp);
- dlil_write_end();
-
- dlil_detach_protocol_internal(proto);
+ if (proto->proto_kpi == kProtoKPI_v1) {
+ proto->kpi.v1.input = ifproto_media_input_v1;
+ proto->kpi.v1.pre_output= ifproto_media_preout;
+ proto->kpi.v1.event = ifproto_media_event;
+ proto->kpi.v1.ioctl = ifproto_media_ioctl;
+ proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
+ proto->kpi.v1.send_arp = ifproto_media_send_arp;
+ } else {
+ proto->kpi.v2.input = ifproto_media_input_v2;
+ proto->kpi.v2.pre_output = ifproto_media_preout;
+ proto->kpi.v2.event = ifproto_media_event;
+ proto->kpi.v2.ioctl = ifproto_media_ioctl;
+ proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
+ proto->kpi.v2.send_arp = ifproto_media_send_arp;
+ }
+ proto->detached = 1;
+ ifnet_lock_done(ifp);
+
+ if (dlil_verbose) {
+ printf("%s%d: detached %s protocol %d\n", ifp->if_name,
+ ifp->if_unit, (proto->proto_kpi == kProtoKPI_v1) ?
+ "v1" : "v2", proto_family);
+ }
+
+ /* release proto refcnt held during protocol attach */
+ if_proto_free(proto);
/*
- * Only handle the case where the interface will go away after
- * we've sent the message. This way post message can send the
- * message to the interface safely.
+ * Release proto refcnt held during lookup; the rest of
+ * protocol detach steps will happen when the last proto
+ * reference is released.
*/
-
- if (use_reached_zero)
- ifp_use_reached_zero(ifp);
-
+ if_proto_free(proto);
+
end:
- return retval;
+ return (retval);
}
-/*
- * dlil_delayed_detach_thread is responsible for detaching
- * protocols, protocol filters, and interface filters after
- * an attempt was made to detach one of those items while
- * it was not safe to do so (i.e. called dlil_read_begin).
- *
- * This function will take the dlil write lock and walk
- * through each of the interfaces looking for items with
- * the detaching flag set. When an item is found, it is
- * detached from the interface and placed on a local list.
- * After all of the items have been collected, we drop the
- * write lock and performed the post detach. This is done
- * so we only have to take the write lock once.
- *
- * When detaching a protocol filter, if we find that we
- * have detached the very last protocol and we need to call
- * ifp_use_reached_zero, we have to break out of our work
- * to drop the write lock so we can call ifp_use_reached_zero.
- */
-
-static void
-dlil_delayed_detach_thread(__unused void* foo, __unused wait_result_t wait)
+
+static errno_t
+ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
+ struct mbuf *packet, char *header)
{
- thread_t self = current_thread();
- int asserted = 0;
-
- ml_thread_policy(self, MACHINE_GROUP,
- (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_NETISR));
+#pragma unused(ifp, protocol, packet, header)
+ return (ENXIO);
+}
+
+static errno_t
+ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
+ struct mbuf *packet)
+{
+#pragma unused(ifp, protocol, packet)
+ return (ENXIO);
+
+}
+
+static errno_t
+ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
+ mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type,
+ char *link_layer_dest)
+{
+#pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
+ return (ENXIO);
-
- while (1) {
- if (dlil_detach_waiting != 0 && dlil_write_begin() == 0) {
- struct ifnet *ifp;
- struct proto_hash_entry detached_protos;
- struct ifnet_filter_head detached_filters;
- struct if_proto *proto;
- struct if_proto *next_proto;
- struct ifnet_filter *filt;
- struct ifnet_filter *next_filt;
- int reached_zero;
-
- reached_zero = 0;
-
- /* Clear the detach waiting flag */
- dlil_detach_waiting = 0;
- TAILQ_INIT(&detached_filters);
- SLIST_INIT(&detached_protos);
-
- ifnet_head_lock_shared();
- TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
- int i;
-
- // Look for protocols and protocol filters
- for (i = 0; i < PROTO_HASH_SLOTS && !reached_zero; i++) {
- struct if_proto **prev_nextptr = &SLIST_FIRST(&ifp->if_proto_hash[i]);
- for (proto = *prev_nextptr; proto; proto = *prev_nextptr) {
-
- // Detach this protocol
- if (proto->detaching) {
- if (ifp->if_del_proto)
- ifp->if_del_proto(ifp, proto->protocol_family);
- if (proto->proto_kpi == kProtoKPI_DLIL && proto->kpi.dlil.dl_offer)
- ifp->offercnt--;
- *prev_nextptr = SLIST_NEXT(proto, next_hash);
- SLIST_INSERT_HEAD(&detached_protos, proto, next_hash);
- reached_zero = ifp_unuse(ifp);
- if (reached_zero) {
- break;
- }
- }
- else {
- // Update prev_nextptr to point to our next ptr
- prev_nextptr = &SLIST_NEXT(proto, next_hash);
- }
- }
- }
-
- // look for interface filters that need to be detached
- for (filt = TAILQ_FIRST(&ifp->if_flt_head); filt; filt = next_filt) {
- next_filt = TAILQ_NEXT(filt, filt_next);
- if (filt->filt_detaching != 0) {
- // take this interface filter off the interface filter list
- TAILQ_REMOVE(&ifp->if_flt_head, filt, filt_next);
-
- // put this interface filter on the detached filters list
- TAILQ_INSERT_TAIL(&detached_filters, filt, filt_next);
- }
- }
-
- if (ifp->if_delayed_detach) {
- ifp->if_delayed_detach = 0;
- reached_zero = ifp_unuse(ifp);
- }
-
- if (reached_zero)
- break;
- }
- ifnet_head_done();
- dlil_write_end();
-
- for (filt = TAILQ_FIRST(&detached_filters); filt; filt = next_filt) {
- next_filt = TAILQ_NEXT(filt, filt_next);
- /*
- * dlil_detach_filter_internal won't remove an item from
- * the list if it is already detached (second parameter).
- * The item will be freed though.
- */
- dlil_detach_filter_internal(filt, 1);
- }
-
- for (proto = SLIST_FIRST(&detached_protos); proto; proto = next_proto) {
- next_proto = SLIST_NEXT(proto, next_hash);
- dlil_detach_protocol_internal(proto);
- }
-
- if (reached_zero) {
- ifp_use_reached_zero(ifp);
- dlil_detach_waiting = 1; // we may have missed something
- }
- }
-
- if (!asserted && dlil_detach_waiting == 0) {
- asserted = 1;
- assert_wait(&dlil_detach_waiting, THREAD_UNINT);
- }
-
- if (dlil_detach_waiting == 0) {
- asserted = 0;
- thread_block(dlil_delayed_detach_thread);
- }
- }
}
static void
-dlil_call_delayed_detach_thread(void) {
- dlil_delayed_detach_thread(NULL, THREAD_RESTART);
+ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
+ const struct kev_msg *event)
+{
+#pragma unused(ifp, protocol, event)
+}
+
+static errno_t
+ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
+ unsigned long command, void *argument)
+{
+#pragma unused(ifp, protocol, command, argument)
+ return (ENXIO);
+}
+
+static errno_t
+ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
+ struct sockaddr_dl *out_ll, size_t ll_len)
+{
+#pragma unused(ifp, proto_addr, out_ll, ll_len)
+ return (ENXIO);
+}
+
+static errno_t
+ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
+ const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
+ const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
+{
+#pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
+ return (ENXIO);
}
extern int if_next_index(void);
-__private_extern__ int
-dlil_if_attach_with_address(
- struct ifnet *ifp,
- const struct sockaddr_dl *ll_addr)
+errno_t
+ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
{
- u_long interface_family = ifp->if_family;
- struct if_family_str *if_family = NULL;
- int stat;
struct ifnet *tmp_if;
- struct proto_hash_entry *new_proto_list = NULL;
- int locked = 0;
-
-
- ifnet_head_lock_shared();
+ struct ifaddr *ifa;
+ struct if_data_internal if_data_saved;
+ struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
+
+ if (ifp == NULL)
+ return (EINVAL);
+ /*
+ * Serialize ifnet attach using dlil_ifnet_lock, in order to
+ * prevent the interface from being configured while it is
+ * embryonic, as ifnet_head_lock is dropped and reacquired
+ * below prior to marking the ifnet with IFRF_ATTACHED.
+ */
+ dlil_if_lock();
+ ifnet_head_lock_exclusive();
/* Verify we aren't already on the list */
TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
if (tmp_if == ifp) {
ifnet_head_done();
- return EEXIST;
+ dlil_if_unlock();
+ return (EEXIST);
}
}
-
- ifnet_head_done();
-
- if ((ifp->if_eflags & IFEF_REUSE) == 0 || ifp->if_lock == 0)
-#if IFNET_RW_LOCK
- ifp->if_lock = lck_rw_alloc_init(ifnet_lock_group, ifnet_lock_attr);
-#else
- ifp->if_lock = lck_mtx_alloc_init(ifnet_lock_group, ifnet_lock_attr);
-#endif
- if (ifp->if_lock == 0) {
- return ENOMEM;
+ lck_mtx_lock_spin(&ifp->if_ref_lock);
+ if (ifp->if_refflags & IFRF_ATTACHED) {
+ panic("%s: flags mismatch (attached set) ifp=%p",
+ __func__, ifp);
+ /* NOTREACHED */
}
+ lck_mtx_unlock(&ifp->if_ref_lock);
+
+ ifnet_lock_exclusive(ifp);
+
+ /* Sanity check */
+ VERIFY(ifp->if_detaching_link.tqe_next == NULL);
+ VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
- // Only use family if this is not a KPI interface
- if ((ifp->if_eflags & IFEF_USEKPI) == 0) {
- if_family = find_family_module(interface_family);
+ if (ll_addr != NULL) {
+ if (ifp->if_addrlen == 0) {
+ ifp->if_addrlen = ll_addr->sdl_alen;
+ } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
+ ifnet_lock_done(ifp);
+ ifnet_head_done();
+ dlil_if_unlock();
+ return (EINVAL);
+ }
}
/*
- * Allow interfaces withouth protocol families to attach
+ * Allow interfaces without protocol families to attach
* only if they have the necessary fields filled out.
*/
-
- if ((if_family == 0) &&
- (ifp->if_add_proto == 0 || ifp->if_del_proto == 0)) {
- DLIL_PRINTF("Attempt to attach interface without family module - %d\n",
- interface_family);
- return ENODEV;
- }
-
- if ((ifp->if_eflags & IFEF_REUSE) == 0 || ifp->if_proto_hash == NULL) {
- MALLOC(new_proto_list, struct proto_hash_entry*, sizeof(struct proto_hash_entry) * PROTO_HASH_SLOTS,
- M_NKE, M_WAITOK);
-
- if (new_proto_list == 0) {
- return ENOBUFS;
+ if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
+ DLIL_PRINTF("%s: Attempt to attach interface without "
+ "family module - %d\n", __func__, ifp->if_family);
+ ifnet_lock_done(ifp);
+ ifnet_head_done();
+ dlil_if_unlock();
+ return (ENODEV);
+ }
+
+ /* Allocate protocol hash table */
+ VERIFY(ifp->if_proto_hash == NULL);
+ ifp->if_proto_hash = zalloc(dlif_phash_zone);
+ if (ifp->if_proto_hash == NULL) {
+ ifnet_lock_done(ifp);
+ ifnet_head_done();
+ dlil_if_unlock();
+ return (ENOBUFS);
+ }
+ bzero(ifp->if_proto_hash, dlif_phash_size);
+
+ lck_mtx_lock_spin(&ifp->if_flt_lock);
+ VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
+ TAILQ_INIT(&ifp->if_flt_head);
+ VERIFY(ifp->if_flt_busy == 0);
+ VERIFY(ifp->if_flt_waiters == 0);
+ lck_mtx_unlock(&ifp->if_flt_lock);
+
+ VERIFY(TAILQ_EMPTY(&ifp->if_prefixhead));
+ TAILQ_INIT(&ifp->if_prefixhead);
+
+ if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
+ VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
+ LIST_INIT(&ifp->if_multiaddrs);
+ }
+
+ VERIFY(ifp->if_allhostsinm == NULL);
+ VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
+ TAILQ_INIT(&ifp->if_addrhead);
+
+ if (ifp->if_snd.ifq_maxlen == 0)
+ ifp->if_snd.ifq_maxlen = ifqmaxlen;
+
+ if (ifp->if_index == 0) {
+ int idx = if_next_index();
+
+ if (idx == -1) {
+ ifp->if_index = 0;
+ ifnet_lock_done(ifp);
+ ifnet_head_done();
+ dlil_if_unlock();
+ return (ENOBUFS);
}
+ ifp->if_index = idx;
+ }
+ /* There should not be anything occupying this slot */
+ VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
+
+ /* allocate (if needed) and initialize a link address */
+ VERIFY(!(dl_if->dl_if_flags & DLIF_REUSE) || ifp->if_lladdr != NULL);
+ ifa = dlil_alloc_lladdr(ifp, ll_addr);
+ if (ifa == NULL) {
+ ifnet_lock_done(ifp);
+ ifnet_head_done();
+ dlil_if_unlock();
+ return (ENOBUFS);
}
- dlil_write_begin();
- locked = 1;
+ VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
+ ifnet_addrs[ifp->if_index - 1] = ifa;
+
+ /* make this address the first on the list */
+ IFA_LOCK(ifa);
+ /* hold a reference for ifnet_addrs[] */
+ IFA_ADDREF_LOCKED(ifa);
+ /* if_attach_link_ifa() holds a reference for ifa_link */
+ if_attach_link_ifa(ifp, ifa);
+ IFA_UNLOCK(ifa);
+
+#if CONFIG_MACF_NET
+ mac_ifnet_label_associate(ifp);
+#endif
+
+ TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
+ ifindex2ifnet[ifp->if_index] = ifp;
+
+ /* Hold a reference to the underlying dlil_ifnet */
+ ifnet_reference(ifp);
/*
- * Call the family module to fill in the appropriate fields in the
- * ifnet structure.
+ * A specific dlil input thread is created per Ethernet/cellular
+ * interface. pseudo interfaces or other types of interfaces use
+ * the main ("loopback") thread.
+ *
+ * If the sysctl "net.link.generic.system.multi_threaded_input" is set
+ * to zero, all packets will be handled by the main loopback thread,
+ * reverting to 10.4.x behaviour.
*/
-
- if (if_family) {
- stat = if_family->add_if(ifp);
- if (stat) {
- DLIL_PRINTF("dlil_if_attach -- add_if failed with %d\n", stat);
- dlil_write_end();
- return stat;
- }
- ifp->if_add_proto_u.original = if_family->add_proto;
- ifp->if_del_proto = if_family->del_proto;
- if_family->refcnt++;
- }
-
- ifp->offercnt = 0;
- TAILQ_INIT(&ifp->if_flt_head);
-
-
- if (new_proto_list) {
- bzero(new_proto_list, (PROTO_HASH_SLOTS * sizeof(struct proto_hash_entry)));
- ifp->if_proto_hash = new_proto_list;
- new_proto_list = 0;
- }
-
- /* old_if_attach */
- {
- struct ifaddr *ifa = 0;
-
- if (ifp->if_snd.ifq_maxlen == 0)
- ifp->if_snd.ifq_maxlen = ifqmaxlen;
- TAILQ_INIT(&ifp->if_prefixhead);
- LIST_INIT(&ifp->if_multiaddrs);
- ifnet_touch_lastchange(ifp);
-
- /* usecount to track attachment to the ifnet list */
- ifp_use(ifp, kIfNetUseCount_MayBeZero);
-
- /* Lock the list of interfaces */
- ifnet_head_lock_exclusive();
- ifnet_lock_exclusive(ifp);
-
- if ((ifp->if_eflags & IFEF_REUSE) == 0 || ifp->if_index == 0) {
- char workbuf[64];
- int namelen, masklen, socksize, ifasize;
-
- ifp->if_index = if_next_index();
-
- namelen = snprintf(workbuf, sizeof(workbuf), "%s%d", ifp->if_name, ifp->if_unit);
-#define _offsetof(t, m) ((int)((caddr_t)&((t *)0)->m))
- masklen = _offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
- socksize = masklen + ifp->if_addrlen;
-#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
- if ((u_long)socksize < sizeof(struct sockaddr_dl))
- socksize = sizeof(struct sockaddr_dl);
- socksize = ROUNDUP(socksize);
- ifasize = sizeof(struct ifaddr) + 2 * socksize;
- ifa = (struct ifaddr*)_MALLOC(ifasize, M_IFADDR, M_WAITOK);
- if (ifa) {
- struct sockaddr_dl *sdl = (struct sockaddr_dl *)(ifa + 1);
- ifnet_addrs[ifp->if_index - 1] = ifa;
- bzero(ifa, ifasize);
- sdl->sdl_len = socksize;
- sdl->sdl_family = AF_LINK;
- bcopy(workbuf, sdl->sdl_data, namelen);
- sdl->sdl_nlen = namelen;
- sdl->sdl_index = ifp->if_index;
- sdl->sdl_type = ifp->if_type;
- if (ll_addr) {
- sdl->sdl_alen = ll_addr->sdl_alen;
- if (ll_addr->sdl_alen != ifp->if_addrlen)
- panic("dlil_if_attach - ll_addr->sdl_alen != ifp->if_addrlen");
- bcopy(CONST_LLADDR(ll_addr), LLADDR(sdl), sdl->sdl_alen);
- }
- ifa->ifa_ifp = ifp;
- ifa->ifa_rtrequest = link_rtrequest;
- ifa->ifa_addr = (struct sockaddr*)sdl;
- sdl = (struct sockaddr_dl*)(socksize + (caddr_t)sdl);
- ifa->ifa_netmask = (struct sockaddr*)sdl;
- sdl->sdl_len = masklen;
- while (namelen != 0)
- sdl->sdl_data[--namelen] = 0xff;
- }
+ if (dlil_multithreaded_input &&
+ (ifp->if_type == IFT_ETHER || ifp->if_type == IFT_CELLULAR)) {
+ int err;
+
+ ifp->if_input_thread = zalloc(dlif_inp_zone);
+ if (ifp->if_input_thread == NULL) {
+ panic("%s: ifp=%p couldn't alloc threading",
+ __func__, ifp);
+ /* NOTREACHED */
}
- else {
- /* preserve the first ifaddr */
- ifnet_addrs[ifp->if_index - 1] = TAILQ_FIRST(&ifp->if_addrhead);
+ bzero(ifp->if_input_thread, dlif_inp_size);
+ err = dlil_create_input_thread(ifp, ifp->if_input_thread);
+ if (err != 0) {
+ panic("%s: ifp=%p couldn't get a thread. "
+ "err=%d", __func__, ifp, err);
+ /* NOTREACHED */
}
-
+#ifdef DLIL_DEBUG
+ printf("%s: dlil thread for ifp=%p if_index=%d\n",
+ __func__, ifp, ifp->if_index);
+#endif
+ }
- TAILQ_INIT(&ifp->if_addrhead);
- ifa = ifnet_addrs[ifp->if_index - 1];
-
- if (ifa) {
- /*
- * We don't use if_attach_ifa because we want
- * this address to be first on the list.
- */
- ifaref(ifa);
- ifa->ifa_debug |= IFA_ATTACHED;
- TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
+ /* Clear stats (save and restore other fields that we care) */
+ if_data_saved = ifp->if_data;
+ bzero(&ifp->if_data, sizeof (ifp->if_data));
+ ifp->if_data.ifi_type = if_data_saved.ifi_type;
+ ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
+ ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
+ ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
+ ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
+ ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
+ ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
+ ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
+ ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
+ ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
+ ifnet_touch_lastchange(ifp);
+
+ /* Record attach PC stacktrace */
+ ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
+
+ ifp->if_updatemcasts = 0;
+ if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
+ struct ifmultiaddr *ifma;
+ LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+ IFMA_LOCK(ifma);
+ if (ifma->ifma_addr->sa_family == AF_LINK ||
+ ifma->ifma_addr->sa_family == AF_UNSPEC)
+ ifp->if_updatemcasts++;
+ IFMA_UNLOCK(ifma);
}
-
- TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
- ifindex2ifnet[ifp->if_index] = ifp;
-
- ifnet_head_done();
+
+ printf("%s%d: attached with %d suspended link-layer multicast "
+ "membership(s)\n", ifp->if_name, ifp->if_unit,
+ ifp->if_updatemcasts);
}
- dlil_write_end();
-
- if (if_family && if_family->init_if) {
- stat = if_family->init_if(ifp);
- if (stat) {
- DLIL_PRINTF("dlil_if_attach -- init_if failed with %d\n", stat);
- }
+
+ ifnet_lock_done(ifp);
+ ifnet_head_done();
+
+ lck_mtx_lock(&ifp->if_cached_route_lock);
+ /* Enable forwarding cached route */
+ ifp->if_fwd_cacheok = 1;
+ /* Clean up any existing cached routes */
+ if (ifp->if_fwd_route.ro_rt != NULL)
+ rtfree(ifp->if_fwd_route.ro_rt);
+ bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
+ if (ifp->if_src_route.ro_rt != NULL)
+ rtfree(ifp->if_src_route.ro_rt);
+ bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
+ if (ifp->if_src_route6.ro_rt != NULL)
+ rtfree(ifp->if_src_route6.ro_rt);
+ bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
+ lck_mtx_unlock(&ifp->if_cached_route_lock);
+
+ ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
+
+ /*
+ * Allocate and attach IGMPv3/MLDv2 interface specific variables
+ * and trees; do this before the ifnet is marked as attached.
+ * The ifnet keeps the reference to the info structures even after
+ * the ifnet is detached, since the network-layer records still
+ * refer to the info structures even after that. This also
+ * makes it possible for them to still function after the ifnet
+ * is recycled or reattached.
+ */
+#if INET
+ if (IGMP_IFINFO(ifp) == NULL) {
+ IGMP_IFINFO(ifp) = igmp_domifattach(ifp, M_WAITOK);
+ VERIFY(IGMP_IFINFO(ifp) != NULL);
+ } else {
+ VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
+ igmp_domifreattach(IGMP_IFINFO(ifp));
+ }
+#endif /* INET */
+#if INET6
+ if (MLD_IFINFO(ifp) == NULL) {
+ MLD_IFINFO(ifp) = mld_domifattach(ifp, M_WAITOK);
+ VERIFY(MLD_IFINFO(ifp) != NULL);
+ } else {
+ VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
+ mld_domifreattach(MLD_IFINFO(ifp));
+ }
+#endif /* INET6 */
+
+ /*
+ * Finally, mark this ifnet as attached.
+ */
+ lck_mtx_lock(rnh_lock);
+ ifnet_lock_exclusive(ifp);
+ lck_mtx_lock_spin(&ifp->if_ref_lock);
+ ifp->if_refflags = IFRF_ATTACHED;
+ lck_mtx_unlock(&ifp->if_ref_lock);
+ if (net_rtref) {
+ /* boot-args override; enable idle notification */
+ (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
+ IFRF_IDLE_NOTIFY);
+ } else {
+ /* apply previous request(s) to set the idle flags, if any */
+ (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
+ ifp->if_idle_new_flags_mask);
+
}
-
- dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, 0, 0);
ifnet_lock_done(ifp);
+ lck_mtx_unlock(rnh_lock);
+ dlil_if_unlock();
+
+#if PF
+ /*
+ * Attach packet filter to this interface, if enabled.
+ */
+ pf_ifnet_hook(ifp, 1);
+#endif /* PF */
- return 0;
+ dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0);
+
+ if (dlil_verbose) {
+ printf("%s%d: attached%s\n", ifp->if_name, ifp->if_unit,
+ (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
+ }
+
+ return (0);
}
-int
-dlil_if_attach(struct ifnet *ifp)
-{
- dlil_if_attach_with_address(ifp, NULL);
+/*
+ * Prepare the storage for the first/permanent link address, which must
+ * must have the same lifetime as the ifnet itself. Although the link
+ * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
+ * its location in memory must never change as it may still be referred
+ * to by some parts of the system afterwards (unfortunate implementation
+ * artifacts inherited from BSD.)
+ *
+ * Caller must hold ifnet lock as writer.
+ */
+static struct ifaddr *
+dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
+{
+ struct ifaddr *ifa, *oifa;
+ struct sockaddr_dl *asdl, *msdl;
+ char workbuf[IFNAMSIZ*2];
+ int namelen, masklen, socksize;
+ struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
+
+ ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
+ VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
+
+ namelen = snprintf(workbuf, sizeof (workbuf), "%s%d",
+ ifp->if_name, ifp->if_unit);
+ masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
+ socksize = masklen + ifp->if_addrlen;
+#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
+ if ((u_int32_t)socksize < sizeof (struct sockaddr_dl))
+ socksize = sizeof(struct sockaddr_dl);
+ socksize = ROUNDUP(socksize);
+#undef ROUNDUP
+
+ ifa = ifp->if_lladdr;
+ if (socksize > DLIL_SDLMAXLEN ||
+ (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
+ /*
+ * Rare, but in the event that the link address requires
+ * more storage space than DLIL_SDLMAXLEN, allocate the
+ * largest possible storages for address and mask, such
+ * that we can reuse the same space when if_addrlen grows.
+ * This same space will be used when if_addrlen shrinks.
+ */
+ if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
+ int ifasize = sizeof (*ifa) + 2 * SOCK_MAXADDRLEN;
+ ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
+ if (ifa == NULL)
+ return (NULL);
+ ifa_lock_init(ifa);
+ /* Don't set IFD_ALLOC, as this is permanent */
+ ifa->ifa_debug = IFD_LINK;
+ }
+ IFA_LOCK(ifa);
+ /* address and mask sockaddr_dl locations */
+ asdl = (struct sockaddr_dl *)(ifa + 1);
+ bzero(asdl, SOCK_MAXADDRLEN);
+ msdl = (struct sockaddr_dl *)((char *)asdl + SOCK_MAXADDRLEN);
+ bzero(msdl, SOCK_MAXADDRLEN);
+ } else {
+ VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
+ /*
+ * Use the storage areas for address and mask within the
+ * dlil_ifnet structure. This is the most common case.
+ */
+ if (ifa == NULL) {
+ ifa = &dl_if->dl_if_lladdr.ifa;
+ ifa_lock_init(ifa);
+ /* Don't set IFD_ALLOC, as this is permanent */
+ ifa->ifa_debug = IFD_LINK;
+ }
+ IFA_LOCK(ifa);
+ /* address and mask sockaddr_dl locations */
+ asdl = (struct sockaddr_dl *)&dl_if->dl_if_lladdr.asdl;
+ bzero(asdl, sizeof (dl_if->dl_if_lladdr.asdl));
+ msdl = (struct sockaddr_dl *)&dl_if->dl_if_lladdr.msdl;
+ bzero(msdl, sizeof (dl_if->dl_if_lladdr.msdl));
+ }
+
+ /* hold a permanent reference for the ifnet itself */
+ IFA_ADDREF_LOCKED(ifa);
+ oifa = ifp->if_lladdr;
+ ifp->if_lladdr = ifa;
+
+ VERIFY(ifa->ifa_debug == IFD_LINK);
+ ifa->ifa_ifp = ifp;
+ ifa->ifa_rtrequest = link_rtrequest;
+ ifa->ifa_addr = (struct sockaddr *)asdl;
+ asdl->sdl_len = socksize;
+ asdl->sdl_family = AF_LINK;
+ bcopy(workbuf, asdl->sdl_data, namelen);
+ asdl->sdl_nlen = namelen;
+ asdl->sdl_index = ifp->if_index;
+ asdl->sdl_type = ifp->if_type;
+ if (ll_addr != NULL) {
+ asdl->sdl_alen = ll_addr->sdl_alen;
+ bcopy(CONST_LLADDR(ll_addr), LLADDR(asdl), asdl->sdl_alen);
+ } else {
+ asdl->sdl_alen = 0;
+ }
+ ifa->ifa_netmask = (struct sockaddr*)msdl;
+ msdl->sdl_len = masklen;
+ while (namelen != 0)
+ msdl->sdl_data[--namelen] = 0xff;
+ IFA_UNLOCK(ifa);
+
+ if (oifa != NULL)
+ IFA_REMREF(oifa);
+
+ return (ifa);
}
+static void
+if_purgeaddrs(struct ifnet *ifp)
+{
+#if INET
+ in_purgeaddrs(ifp);
+#endif /* INET */
+#if INET6
+ in6_purgeaddrs(ifp);
+#endif /* INET6 */
+#if NETAT
+ at_purgeaddrs(ifp);
+#endif
+}
-int
-dlil_if_detach(struct ifnet *ifp)
+errno_t
+ifnet_detach(ifnet_t ifp)
{
- struct ifnet_filter *filter;
- struct ifnet_filter *filter_next;
- int zeroed = 0;
- int retval = 0;
- struct ifnet_filter_head fhead;
-
-
+ if (ifp == NULL)
+ return (EINVAL);
+
+ ifnet_head_lock_exclusive();
+ lck_mtx_lock(rnh_lock);
ifnet_lock_exclusive(ifp);
-
- if ((ifp->if_eflags & IFEF_DETACHING) != 0) {
+
+ /*
+ * Check to see if this interface has previously triggered
+ * aggressive protocol draining; if so, decrement the global
+ * refcnt and clear PR_AGGDRAIN on the route domain if
+ * there are no more of such an interface around.
+ */
+ (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
+
+ lck_mtx_lock_spin(&ifp->if_ref_lock);
+ if (!(ifp->if_refflags & IFRF_ATTACHED)) {
+ lck_mtx_unlock(&ifp->if_ref_lock);
+ ifnet_lock_done(ifp);
+ ifnet_head_done();
+ lck_mtx_unlock(rnh_lock);
+ return (EINVAL);
+ } else if (ifp->if_refflags & IFRF_DETACHING) {
/* Interface has already been detached */
+ lck_mtx_unlock(&ifp->if_ref_lock);
ifnet_lock_done(ifp);
- return ENXIO;
+ ifnet_head_done();
+ lck_mtx_unlock(rnh_lock);
+ return (ENXIO);
}
-
+ /* Indicate this interface is being detached */
+ ifp->if_refflags &= ~IFRF_ATTACHED;
+ ifp->if_refflags |= IFRF_DETACHING;
+ lck_mtx_unlock(&ifp->if_ref_lock);
+
+ if (dlil_verbose)
+ printf("%s%d: detaching\n", ifp->if_name, ifp->if_unit);
+
/*
- * Indicate this interface is being detached.
- *
- * This should prevent protocols from attaching
- * from this point on. Interface will remain on
- * the list until all of the protocols are detached.
+ * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
+ * no longer be visible during lookups from this point.
*/
- ifp->if_eflags |= IFEF_DETACHING;
+ VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
+ TAILQ_REMOVE(&ifnet_head, ifp, if_link);
+ ifp->if_link.tqe_next = NULL;
+ ifp->if_link.tqe_prev = NULL;
+ ifindex2ifnet[ifp->if_index] = NULL;
+
+ /* Record detach PC stacktrace */
+ ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
+
ifnet_lock_done(ifp);
-
- dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, 0, 0);
-
- if ((retval = dlil_write_begin()) != 0) {
- if (retval == EDEADLK) {
- retval = DLIL_WAIT_FOR_FREE;
-
- /* We need to perform a delayed detach */
- ifp->if_delayed_detach = 1;
- dlil_detach_waiting = 1;
- wakeup(&dlil_detach_waiting);
+ ifnet_head_done();
+ lck_mtx_unlock(rnh_lock);
+
+ /* Let BPF know we're detaching */
+ bpfdetach(ifp);
+
+ /* Mark the interface as DOWN */
+ if_down(ifp);
+
+ /* Disable forwarding cached route */
+ lck_mtx_lock(&ifp->if_cached_route_lock);
+ ifp->if_fwd_cacheok = 0;
+ lck_mtx_unlock(&ifp->if_cached_route_lock);
+
+ /*
+ * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
+ * references to the info structures and leave them attached to
+ * this ifnet.
+ */
+#if INET
+ igmp_domifdetach(ifp);
+#endif /* INET */
+#if INET6
+ mld_domifdetach(ifp);
+#endif /* INET6 */
+
+ dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0);
+
+ /* Let worker thread take care of the rest, to avoid reentrancy */
+ dlil_if_lock();
+ ifnet_detaching_enqueue(ifp);
+ dlil_if_unlock();
+
+ return (0);
+}
+
+static void
+ifnet_detaching_enqueue(struct ifnet *ifp)
+{
+ dlil_if_lock_assert();
+
+ ++ifnet_detaching_cnt;
+ VERIFY(ifnet_detaching_cnt != 0);
+ TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
+ wakeup((caddr_t)&ifnet_delayed_run);
+}
+
+static struct ifnet *
+ifnet_detaching_dequeue(void)
+{
+ struct ifnet *ifp;
+
+ dlil_if_lock_assert();
+
+ ifp = TAILQ_FIRST(&ifnet_detaching_head);
+ VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
+ if (ifp != NULL) {
+ VERIFY(ifnet_detaching_cnt != 0);
+ --ifnet_detaching_cnt;
+ TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
+ ifp->if_detaching_link.tqe_next = NULL;
+ ifp->if_detaching_link.tqe_prev = NULL;
+ }
+ return (ifp);
+}
+
+static void
+ifnet_delayed_thread_func(void)
+{
+ struct ifnet *ifp;
+
+ for (;;) {
+ dlil_if_lock();
+ while (ifnet_detaching_cnt == 0) {
+ (void) msleep(&ifnet_delayed_run, &dlil_ifnet_lock,
+ (PZERO - 1), "ifnet_delayed_thread", NULL);
}
- return retval;
+
+ VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
+
+ /* Take care of detaching ifnet */
+ ifp = ifnet_detaching_dequeue();
+ dlil_if_unlock();
+ if (ifp != NULL)
+ ifnet_detach_final(ifp);
+ }
+}
+
+static void
+ifnet_detach_final(struct ifnet *ifp)
+{
+ struct ifnet_filter *filter, *filter_next;
+ struct ifnet_filter_head fhead;
+ struct dlil_threading_info *inputthread;
+ struct ifaddr *ifa;
+ ifnet_detached_func if_free;
+ int i;
+
+ lck_mtx_lock(&ifp->if_ref_lock);
+ if (!(ifp->if_refflags & IFRF_DETACHING)) {
+ panic("%s: flags mismatch (detaching not set) ifp=%p",
+ __func__, ifp);
+ /* NOTREACHED */
}
-
- /* Steal the list of interface filters */
+
+ /* Wait until the existing IO references get released
+ * before we proceed with ifnet_detach
+ */
+ while (ifp->if_refio > 0) {
+ printf("%s: Waiting for IO references on %s%d interface "
+ "to be released\n", __func__, ifp->if_name, ifp->if_unit);
+ (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
+ (PZERO - 1), "ifnet_ioref_wait", NULL);
+ }
+ lck_mtx_unlock(&ifp->if_ref_lock);
+
+ /* Detach interface filters */
+ lck_mtx_lock(&ifp->if_flt_lock);
+ if_flt_monitor_enter(ifp);
+
+ lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
fhead = ifp->if_flt_head;
TAILQ_INIT(&ifp->if_flt_head);
-
- /* unuse the interface */
- zeroed = ifp_unuse(ifp);
-
- dlil_write_end();
-
+
for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
filter_next = TAILQ_NEXT(filter, filt_next);
+ lck_mtx_unlock(&ifp->if_flt_lock);
+
dlil_detach_filter_internal(filter, 1);
+ lck_mtx_lock(&ifp->if_flt_lock);
}
-
- if (zeroed == 0) {
- retval = DLIL_WAIT_FOR_FREE;
- }
- else
- {
- ifp_use_reached_zero(ifp);
+ if_flt_monitor_leave(ifp);
+ lck_mtx_unlock(&ifp->if_flt_lock);
+
+ /* Tell upper layers to drop their network addresses */
+ if_purgeaddrs(ifp);
+
+ ifnet_lock_exclusive(ifp);
+
+ /* Uplumb all protocols */
+ for (i = 0; i < PROTO_HASH_SLOTS; i++) {
+ struct if_proto *proto;
+
+ proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
+ while (proto != NULL) {
+ protocol_family_t family = proto->protocol_family;
+ ifnet_lock_done(ifp);
+ proto_unplumb(family, ifp);
+ ifnet_lock_exclusive(ifp);
+ proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
+ }
+ /* There should not be any protocols left */
+ VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
}
-
- return retval;
-}
+ zfree(dlif_phash_zone, ifp->if_proto_hash);
+ ifp->if_proto_hash = NULL;
+ /* Detach (permanent) link address from if_addrhead */
+ ifa = TAILQ_FIRST(&ifp->if_addrhead);
+ VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
+ IFA_LOCK(ifa);
+ if_detach_link_ifa(ifp, ifa);
+ IFA_UNLOCK(ifa);
-int
-dlil_reg_if_modules(u_long interface_family,
- struct dlil_ifmod_reg_str *ifmod)
-{
- struct if_family_str *if_family;
-
-
- if (find_family_module(interface_family)) {
- DLIL_PRINTF("Attempt to register dlil family module more than once - %d\n",
- interface_family);
- return EEXIST;
- }
-
- if ((!ifmod->add_if) || (!ifmod->del_if) ||
- (!ifmod->add_proto) || (!ifmod->del_proto)) {
- DLIL_PRINTF("dlil_reg_if_modules passed at least one null pointer\n");
- return EINVAL;
- }
-
- /*
- * The following is a gross hack to keep from breaking
- * Vicomsoft's internet gateway on Jaguar. Vicomsoft
- * does not zero the reserved fields in dlil_ifmod_reg_str.
- * As a result, we have to zero any function that used to
- * be reserved fields at the time Vicomsoft built their
- * kext. Radar #2974305
- */
- if (ifmod->reserved[0] != 0 || ifmod->reserved[1] != 0 || ifmod->reserved[2]) {
- if (interface_family == 123) { /* Vicom */
- ifmod->init_if = 0;
- } else {
- return EINVAL;
+ /* Remove (permanent) link address from ifnet_addrs[] */
+ IFA_REMREF(ifa);
+ ifnet_addrs[ifp->if_index - 1] = NULL;
+
+ /* This interface should not be on {ifnet_head,detaching} */
+ VERIFY(ifp->if_link.tqe_next == NULL);
+ VERIFY(ifp->if_link.tqe_prev == NULL);
+ VERIFY(ifp->if_detaching_link.tqe_next == NULL);
+ VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
+
+ /* Prefix list should be empty by now */
+ VERIFY(TAILQ_EMPTY(&ifp->if_prefixhead));
+
+ /* The slot should have been emptied */
+ VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
+
+ /* There should not be any addresses left */
+ VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
+
+ /*
+ * If thread affinity was set for the workloop thread, we will need
+ * to tear down the affinity and release the extra reference count
+ * taken at attach time;
+ */
+ if ((inputthread = ifp->if_input_thread) != NULL) {
+ if (inputthread->net_affinity) {
+ struct thread *tp;
+
+ if (inputthread == dlil_lo_thread_ptr) {
+ panic("%s: Thread affinity should not be "
+ "enabled on the loopback dlil input "
+ "thread", __func__);
+ /* NOTREACHED */
+ }
+
+ lck_mtx_lock_spin(&inputthread->input_lck);
+ tp = inputthread->workloop_thread;
+ inputthread->workloop_thread = NULL;
+ inputthread->tag = 0;
+ inputthread->net_affinity = FALSE;
+ lck_mtx_unlock(&inputthread->input_lck);
+
+ /* Tear down workloop thread affinity */
+ if (tp != NULL) {
+ (void) dlil_affinity_set(tp,
+ THREAD_AFFINITY_TAG_NULL);
+ thread_deallocate(tp);
+ }
+
+ /* Tear down dlil input thread affinity */
+ tp = inputthread->input_thread;
+ (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
+ thread_deallocate(tp);
}
- }
-
- if_family = (struct if_family_str *) _MALLOC(sizeof(struct if_family_str), M_IFADDR, M_WAITOK);
- if (!if_family) {
- DLIL_PRINTF("dlil_reg_if_modules failed allocation\n");
- return ENOMEM;
- }
-
- bzero(if_family, sizeof(struct if_family_str));
-
- if_family->if_family = interface_family & 0xffff;
- if_family->shutdown = ifmod->shutdown;
- if_family->add_if = ifmod->add_if;
- if_family->del_if = ifmod->del_if;
- if_family->init_if = ifmod->init_if;
- if_family->add_proto = ifmod->add_proto;
- if_family->del_proto = ifmod->del_proto;
- if_family->ifmod_ioctl = ifmod->ifmod_ioctl;
- if_family->refcnt = 1;
- if_family->flags = 0;
-
- TAILQ_INSERT_TAIL(&if_family_head, if_family, if_fam_next);
- return 0;
-}
-
-int dlil_dereg_if_modules(u_long interface_family)
-{
- struct if_family_str *if_family;
- int ret = 0;
-
-
- if_family = find_family_module(interface_family);
- if (if_family == 0) {
- return ENXIO;
- }
-
- if (--if_family->refcnt == 0) {
- if (if_family->shutdown)
- (*if_family->shutdown)();
-
- TAILQ_REMOVE(&if_family_head, if_family, if_fam_next);
- FREE(if_family, M_IFADDR);
- }
- else {
- if_family->flags |= DLIL_SHUTDOWN;
- ret = DLIL_WAIT_FOR_FREE;
- }
-
- return ret;
-}
-
-
-int
-dlil_reg_proto_module(
- u_long protocol_family,
- u_long interface_family,
- int (*attach)(struct ifnet *ifp, u_long protocol_family),
- int (*detach)(struct ifnet *ifp, u_long protocol_family))
-{
- struct proto_family_str *proto_family;
-
- if (attach == NULL) return EINVAL;
-
- lck_mtx_lock(proto_family_mutex);
-
- TAILQ_FOREACH(proto_family, &proto_family_head, proto_fam_next) {
- if (proto_family->proto_family == protocol_family &&
- proto_family->if_family == interface_family) {
- lck_mtx_unlock(proto_family_mutex);
- return EEXIST;
+ /* cleanup ifp dlil input thread, if any */
+ ifp->if_input_thread = NULL;
+
+ if (inputthread != dlil_lo_thread_ptr) {
+#ifdef DLIL_DEBUG
+ printf("%s: wakeup thread threadinfo: %p "
+ "input_thread=%p threads: cur=%d max=%d\n",
+ __func__, inputthread, inputthread->input_thread,
+ dlil_multithreaded_input, cur_dlil_input_threads);
+#endif
+ lck_mtx_lock_spin(&inputthread->input_lck);
+
+ inputthread->input_waiting |= DLIL_INPUT_TERMINATE;
+ if (!(inputthread->input_waiting & DLIL_INPUT_RUNNING))
+ wakeup((caddr_t)&inputthread->input_waiting);
+
+ lck_mtx_unlock(&inputthread->input_lck);
}
}
- proto_family = (struct proto_family_str *) _MALLOC(sizeof(struct proto_family_str), M_IFADDR, M_WAITOK);
- if (!proto_family) {
- lck_mtx_unlock(proto_family_mutex);
- return ENOMEM;
+ /* The driver might unload, so point these to ourselves */
+ if_free = ifp->if_free;
+ ifp->if_output = ifp_if_output;
+ ifp->if_ioctl = ifp_if_ioctl;
+ ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
+ ifp->if_free = ifp_if_free;
+ ifp->if_demux = ifp_if_demux;
+ ifp->if_event = ifp_if_event;
+ ifp->if_framer = ifp_if_framer;
+ ifp->if_add_proto = ifp_if_add_proto;
+ ifp->if_del_proto = ifp_if_del_proto;
+ ifp->if_check_multi = ifp_if_check_multi;
+
+ ifnet_lock_done(ifp);
+
+#if PF
+ /*
+ * Detach this interface from packet filter, if enabled.
+ */
+ pf_ifnet_hook(ifp, 0);
+#endif /* PF */
+
+ /* Filter list should be empty */
+ lck_mtx_lock_spin(&ifp->if_flt_lock);
+ VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
+ VERIFY(ifp->if_flt_busy == 0);
+ VERIFY(ifp->if_flt_waiters == 0);
+ lck_mtx_unlock(&ifp->if_flt_lock);
+
+ /* Last chance to cleanup any cached route */
+ lck_mtx_lock(&ifp->if_cached_route_lock);
+ VERIFY(!ifp->if_fwd_cacheok);
+ if (ifp->if_fwd_route.ro_rt != NULL)
+ rtfree(ifp->if_fwd_route.ro_rt);
+ bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
+ if (ifp->if_src_route.ro_rt != NULL)
+ rtfree(ifp->if_src_route.ro_rt);
+ bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
+ if (ifp->if_src_route6.ro_rt != NULL)
+ rtfree(ifp->if_src_route6.ro_rt);
+ bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
+ lck_mtx_unlock(&ifp->if_cached_route_lock);
+
+ ifnet_llreach_ifdetach(ifp);
+
+ dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0);
+
+ if (if_free != NULL)
+ if_free(ifp);
+
+ /*
+ * Finally, mark this ifnet as detached.
+ */
+ lck_mtx_lock_spin(&ifp->if_ref_lock);
+ if (!(ifp->if_refflags & IFRF_DETACHING)) {
+ panic("%s: flags mismatch (detaching not set) ifp=%p",
+ __func__, ifp);
+ /* NOTREACHED */
}
+ ifp->if_refflags &= ~IFRF_DETACHING;
+ lck_mtx_unlock(&ifp->if_ref_lock);
- bzero(proto_family, sizeof(struct proto_family_str));
- proto_family->proto_family = protocol_family;
- proto_family->if_family = interface_family & 0xffff;
- proto_family->attach_proto = attach;
- proto_family->detach_proto = detach;
+ if (dlil_verbose)
+ printf("%s%d: detached\n", ifp->if_name, ifp->if_unit);
- TAILQ_INSERT_TAIL(&proto_family_head, proto_family, proto_fam_next);
- lck_mtx_unlock(proto_family_mutex);
- return 0;
+ /* Release reference held during ifnet attach */
+ ifnet_release(ifp);
}
-int dlil_dereg_proto_module(u_long protocol_family, u_long interface_family)
+static errno_t
+ifp_if_output(struct ifnet *ifp, struct mbuf *m)
{
- struct proto_family_str *proto_family;
- int ret = 0;
+#pragma unused(ifp)
+ m_freem(m);
+ return (0);
+}
- lck_mtx_lock(proto_family_mutex);
+static errno_t
+ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
+{
+#pragma unused(ifp, fh, pf)
+ m_freem(m);
+ return (EJUSTRETURN);
+}
- proto_family = find_proto_module(protocol_family, interface_family);
- if (proto_family == 0) {
- lck_mtx_unlock(proto_family_mutex);
- return ENXIO;
- }
+static errno_t
+ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
+ const struct ifnet_demux_desc *da, u_int32_t dc)
+{
+#pragma unused(ifp, pf, da, dc)
+ return (EINVAL);
+}
+
+static errno_t
+ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
+{
+#pragma unused(ifp, pf)
+ return (EINVAL);
+}
+
+static errno_t
+ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
+{
+#pragma unused(ifp, sa)
+ return (EOPNOTSUPP);
+}
- TAILQ_REMOVE(&proto_family_head, proto_family, proto_fam_next);
- FREE(proto_family, M_IFADDR);
-
- lck_mtx_unlock(proto_family_mutex);
- return ret;
+static errno_t
+ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
+ const struct sockaddr *sa, const char *ll, const char *t)
+{
+#pragma unused(ifp, m, sa, ll, t)
+ m_freem(*m);
+ *m = NULL;
+ return (EJUSTRETURN);
}
-int dlil_plumb_protocol(u_long protocol_family, struct ifnet *ifp)
+static errno_t
+ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
{
- struct proto_family_str *proto_family;
+#pragma unused(ifp, cmd, arg)
+ return (EOPNOTSUPP);
+}
+
+static errno_t
+ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
+{
+#pragma unused(ifp, tm, f)
+ /* XXX not sure what to do here */
+ return (0);
+}
+
+static void
+ifp_if_free(struct ifnet *ifp)
+{
+#pragma unused(ifp)
+}
+
+static void
+ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
+{
+#pragma unused(ifp, e)
+}
+
+__private_extern__
+int dlil_if_acquire(u_int32_t family, const void *uniqueid,
+ size_t uniqueid_len, struct ifnet **ifp)
+{
+ struct ifnet *ifp1 = NULL;
+ struct dlil_ifnet *dlifp1 = NULL;
+ void *buf, *base, **pbuf;
int ret = 0;
- lck_mtx_lock(proto_family_mutex);
- proto_family = find_proto_module(protocol_family, ifp->if_family);
- if (proto_family == 0) {
- lck_mtx_unlock(proto_family_mutex);
- return ENXIO;
+ dlil_if_lock();
+ TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
+ ifp1 = (struct ifnet *)dlifp1;
+
+ if (ifp1->if_family != family)
+ continue;
+
+ lck_mtx_lock(&dlifp1->dl_if_lock);
+ /* same uniqueid and same len or no unique id specified */
+ if ((uniqueid_len == dlifp1->dl_if_uniqueid_len) &&
+ !bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len)) {
+ /* check for matching interface in use */
+ if (dlifp1->dl_if_flags & DLIF_INUSE) {
+ if (uniqueid_len) {
+ ret = EBUSY;
+ lck_mtx_unlock(&dlifp1->dl_if_lock);
+ goto end;
+ }
+ } else {
+ dlifp1->dl_if_flags |= (DLIF_INUSE|DLIF_REUSE);
+ lck_mtx_unlock(&dlifp1->dl_if_lock);
+ *ifp = ifp1;
+ goto end;
+ }
+ }
+ lck_mtx_unlock(&dlifp1->dl_if_lock);
}
- ret = proto_family->attach_proto(ifp, protocol_family);
+ /* no interface found, allocate a new one */
+ buf = zalloc(dlif_zone);
+ if (buf == NULL) {
+ ret = ENOMEM;
+ goto end;
+ }
+ bzero(buf, dlif_bufsize);
+
+ /* Get the 64-bit aligned base address for this object */
+ base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
+ sizeof (u_int64_t));
+ VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize));
+
+ /*
+ * Wind back a pointer size from the aligned base and
+ * save the original address so we can free it later.
+ */
+ pbuf = (void **)((intptr_t)base - sizeof (void *));
+ *pbuf = buf;
+ dlifp1 = base;
+
+ if (uniqueid_len) {
+ MALLOC(dlifp1->dl_if_uniqueid, void *, uniqueid_len,
+ M_NKE, M_WAITOK);
+ if (dlifp1->dl_if_uniqueid == NULL) {
+ zfree(dlif_zone, dlifp1);
+ ret = ENOMEM;
+ goto end;
+ }
+ bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len);
+ dlifp1->dl_if_uniqueid_len = uniqueid_len;
+ }
- lck_mtx_unlock(proto_family_mutex);
- return ret;
+ ifp1 = (struct ifnet *)dlifp1;
+ dlifp1->dl_if_flags = DLIF_INUSE;
+ if (ifnet_debug) {
+ dlifp1->dl_if_flags |= DLIF_DEBUG;
+ dlifp1->dl_if_trace = dlil_if_trace;
+ }
+ ifp1->if_name = dlifp1->dl_if_namestorage;
+#if CONFIG_MACF_NET
+ mac_ifnet_label_init(ifp1);
+#endif
+
+ lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr);
+ lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr);
+ lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr);
+ lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr);
+ lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_lock_group,
+ ifnet_lock_attr);
+ lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
+ ifnet_lock_attr);
+ lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
+
+ TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
+
+ *ifp = ifp1;
+
+end:
+ dlil_if_unlock();
+
+ VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof (u_int64_t)) &&
+ IS_P2ALIGNED(&ifp1->if_data, sizeof (u_int64_t))));
+
+ return (ret);
}
+__private_extern__ void
+dlil_if_release(ifnet_t ifp)
+{
+ struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
+
+ ifnet_lock_exclusive(ifp);
+ lck_mtx_lock(&dlifp->dl_if_lock);
+ dlifp->dl_if_flags &= ~DLIF_INUSE;
+ strncpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ);
+ ifp->if_name = dlifp->dl_if_namestorage;
+ lck_mtx_unlock(&dlifp->dl_if_lock);
+#if CONFIG_MACF_NET
+ /*
+ * We can either recycle the MAC label here or in dlil_if_acquire().
+ * It seems logical to do it here but this means that anything that
+ * still has a handle on ifp will now see it as unlabeled.
+ * Since the interface is "dead" that may be OK. Revisit later.
+ */
+ mac_ifnet_label_recycle(ifp);
+#endif
+ ifnet_lock_done(ifp);
+}
-int dlil_unplumb_protocol(u_long protocol_family, struct ifnet *ifp)
+__private_extern__ void
+dlil_if_lock(void)
{
- struct proto_family_str *proto_family;
- int ret = 0;
+ lck_mtx_lock(&dlil_ifnet_lock);
+}
- lck_mtx_lock(proto_family_mutex);
+__private_extern__ void
+dlil_if_unlock(void)
+{
+ lck_mtx_unlock(&dlil_ifnet_lock);
+}
- proto_family = find_proto_module(protocol_family, ifp->if_family);
- if (proto_family && proto_family->detach_proto)
- ret = proto_family->detach_proto(ifp, protocol_family);
- else
- ret = dlil_detach_protocol(ifp, protocol_family);
-
- lck_mtx_unlock(proto_family_mutex);
- return ret;
+__private_extern__ void
+dlil_if_lock_assert(void)
+{
+ lck_mtx_assert(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
}
-static errno_t
-dlil_recycle_ioctl(
- __unused ifnet_t ifnet_ptr,
- __unused u_int32_t ioctl_code,
- __unused void *ioctl_arg)
+__private_extern__ void
+dlil_proto_unplumb_all(struct ifnet *ifp)
{
- return EOPNOTSUPP;
+ /*
+ * if_proto_hash[0-3] are for PF_INET, PF_INET6, PF_APPLETALK
+ * and PF_VLAN, where each bucket contains exactly one entry;
+ * PF_VLAN does not need an explicit unplumb.
+ *
+ * if_proto_hash[4] is for other protocols; we expect anything
+ * in this bucket to respond to the DETACHING event (which would
+ * have happened by now) and do the unplumb then.
+ */
+ (void) proto_unplumb(PF_INET, ifp);
+#if INET6
+ (void) proto_unplumb(PF_INET6, ifp);
+#endif /* INET6 */
+#if NETAT
+ (void) proto_unplumb(PF_APPLETALK, ifp);
+#endif /* NETAT */
}
-static int
-dlil_recycle_output(
- __unused struct ifnet *ifnet_ptr,
- struct mbuf *m)
+static void
+ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
+{
+ lck_mtx_lock_spin(&ifp->if_cached_route_lock);
+ lck_mtx_convert_spin(&ifp->if_cached_route_lock);
+
+ route_copyout(dst, &ifp->if_src_route, sizeof (*dst));
+
+ lck_mtx_unlock(&ifp->if_cached_route_lock);
+}
+
+static void
+ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
{
- m_freem(m);
- return 0;
+ lck_mtx_lock_spin(&ifp->if_cached_route_lock);
+ lck_mtx_convert_spin(&ifp->if_cached_route_lock);
+
+ if (ifp->if_fwd_cacheok) {
+ route_copyin(src, &ifp->if_src_route, sizeof (*src));
+ } else {
+ rtfree(src->ro_rt);
+ src->ro_rt = NULL;
+ }
+ lck_mtx_unlock(&ifp->if_cached_route_lock);
}
+#if INET6
static void
-dlil_recycle_free(
- __unused ifnet_t ifnet_ptr)
+ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
{
+ lck_mtx_lock_spin(&ifp->if_cached_route_lock);
+ lck_mtx_convert_spin(&ifp->if_cached_route_lock);
+
+ route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
+ sizeof (*dst));
+
+ lck_mtx_unlock(&ifp->if_cached_route_lock);
}
-static errno_t
-dlil_recycle_set_bpf_tap(
- __unused ifnet_t ifp,
- __unused bpf_tap_mode mode,
- __unused bpf_packet_func callback)
-{
- /* XXX not sure what to do here */
- return 0;
-}
-
-int dlil_if_acquire(
- u_long family,
- const void *uniqueid,
- size_t uniqueid_len,
- struct ifnet **ifp)
-{
- struct ifnet *ifp1 = NULL;
- struct dlil_ifnet *dlifp1 = NULL;
- int ret = 0;
-
- lck_mtx_lock(dlil_ifnet_mutex);
- TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
-
- ifp1 = (struct ifnet *)dlifp1;
-
- if (ifp1->if_family == family) {
-
- /* same uniqueid and same len or no unique id specified */
- if ((uniqueid_len == dlifp1->if_uniqueid_len)
- && !bcmp(uniqueid, dlifp1->if_uniqueid, uniqueid_len)) {
-
- /* check for matching interface in use */
- if (ifp1->if_eflags & IFEF_INUSE) {
- if (uniqueid_len) {
- ret = EBUSY;
- goto end;
- }
- }
- else {
- if (!ifp1->if_lock)
- panic("ifp's lock is gone\n");
- ifnet_lock_exclusive(ifp1);
- ifp1->if_eflags |= (IFEF_INUSE | IFEF_REUSE);
- ifnet_lock_done(ifp1);
- *ifp = ifp1;
- goto end;
- }
- }
- }
- }
-
- /* no interface found, allocate a new one */
- MALLOC(dlifp1, struct dlil_ifnet *, sizeof(*dlifp1), M_NKE, M_WAITOK);
- if (dlifp1 == 0) {
- ret = ENOMEM;
- goto end;
- }
-
- bzero(dlifp1, sizeof(*dlifp1));
-
- if (uniqueid_len) {
- MALLOC(dlifp1->if_uniqueid, void *, uniqueid_len, M_NKE, M_WAITOK);
- if (dlifp1->if_uniqueid == 0) {
- FREE(dlifp1, M_NKE);
- ret = ENOMEM;
- goto end;
- }
- bcopy(uniqueid, dlifp1->if_uniqueid, uniqueid_len);
- dlifp1->if_uniqueid_len = uniqueid_len;
- }
-
- ifp1 = (struct ifnet *)dlifp1;
- ifp1->if_eflags |= IFEF_INUSE;
- ifp1->if_name = dlifp1->if_namestorage;
-
- TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
-
- *ifp = ifp1;
+static void
+ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
+{
+ lck_mtx_lock_spin(&ifp->if_cached_route_lock);
+ lck_mtx_convert_spin(&ifp->if_cached_route_lock);
-end:
- lck_mtx_unlock(dlil_ifnet_mutex);
+ if (ifp->if_fwd_cacheok) {
+ route_copyin((struct route *)src,
+ (struct route *)&ifp->if_src_route6, sizeof (*src));
+ } else {
+ rtfree(src->ro_rt);
+ src->ro_rt = NULL;
+ }
+ lck_mtx_unlock(&ifp->if_cached_route_lock);
+}
+#endif /* INET6 */
+
+struct rtentry *
+ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
+{
+ struct route src_rt;
+ struct sockaddr_in *dst = (struct sockaddr_in *)(&src_rt.ro_dst);
+
+ ifp_src_route_copyout(ifp, &src_rt);
- return ret;
+ if (src_rt.ro_rt == NULL || !(src_rt.ro_rt->rt_flags & RTF_UP) ||
+ src_ip.s_addr != dst->sin_addr.s_addr ||
+ src_rt.ro_rt->generation_id != route_generation) {
+ if (src_rt.ro_rt != NULL) {
+ rtfree(src_rt.ro_rt);
+ src_rt.ro_rt = NULL;
+ } else if (dst->sin_family != AF_INET) {
+ bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
+ dst->sin_len = sizeof (src_rt.ro_dst);
+ dst->sin_family = AF_INET;
+ }
+ dst->sin_addr = src_ip;
+
+ if (src_rt.ro_rt == NULL) {
+ src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst,
+ 0, 0, ifp->if_index);
+
+ if (src_rt.ro_rt != NULL) {
+ /* retain a ref, copyin consumes one */
+ struct rtentry *rte = src_rt.ro_rt;
+ RT_ADDREF(rte);
+ ifp_src_route_copyin(ifp, &src_rt);
+ src_rt.ro_rt = rte;
+ }
+ }
+ }
+
+ return (src_rt.ro_rt);
}
-void dlil_if_release(struct ifnet *ifp)
+#if INET6
+struct rtentry*
+ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
{
- struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
+ struct route_in6 src_rt;
-
- /* Interface does not have a lock until it is attached - radar 3713951 */
- if (ifp->if_lock)
- ifnet_lock_exclusive(ifp);
- ifp->if_eflags &= ~IFEF_INUSE;
- ifp->if_ioctl = dlil_recycle_ioctl;
- ifp->if_output = dlil_recycle_output;
- ifp->if_free = dlil_recycle_free;
- ifp->if_set_bpf_tap = dlil_recycle_set_bpf_tap;
+ ifp_src_route6_copyout(ifp, &src_rt);
- strncpy(dlifp->if_namestorage, ifp->if_name, IFNAMSIZ);
- ifp->if_name = dlifp->if_namestorage;
- if (ifp->if_lock)
- ifnet_lock_done(ifp);
-
+ if (src_rt.ro_rt == NULL || !(src_rt.ro_rt->rt_flags & RTF_UP) ||
+ !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr) ||
+ src_rt.ro_rt->generation_id != route_generation) {
+ if (src_rt.ro_rt != NULL) {
+ rtfree(src_rt.ro_rt);
+ src_rt.ro_rt = NULL;
+ } else if (src_rt.ro_dst.sin6_family != AF_INET6) {
+ bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
+ src_rt.ro_dst.sin6_len = sizeof (src_rt.ro_dst);
+ src_rt.ro_dst.sin6_family = AF_INET6;
+ }
+ src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
+ src_rt.ro_dst.sin6_addr = *src_ip6;
+
+ if (src_rt.ro_rt == NULL) {
+ src_rt.ro_rt = rtalloc1_scoped(
+ (struct sockaddr *)&src_rt.ro_dst, 0, 0,
+ ifp->if_index);
+
+ if (src_rt.ro_rt != NULL) {
+ /* retain a ref, copyin consumes one */
+ struct rtentry *rte = src_rt.ro_rt;
+ RT_ADDREF(rte);
+ ifp_src_route6_copyin(ifp, &src_rt);
+ src_rt.ro_rt = rte;
+ }
+ }
+ }
+
+ return (src_rt.ro_rt);
}
+#endif /* INET6 */