]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/netinet/in_var.h
xnu-1699.24.23.tar.gz
[apple/xnu.git] / bsd / netinet / in_var.h
index fe1bc48995133fce5c90e4a3fdea399530abcfd7..0b5d373dee8e43992e1a3b548e8322589d81ed82 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -70,7 +70,7 @@
 #include <sys/kern_event.h>
 #endif
 
-#ifdef PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
 #include <net/route.h>
 
 /*
@@ -84,18 +84,19 @@ struct in_ifaddr {
 #define        ia_ifp                  ia_ifa.ifa_ifp
 #define        ia_flags                ia_ifa.ifa_flags
                                                /* ia_{,sub}net{,mask} in host order */
-       u_long                  ia_net;         /* network number of interface */
-       u_long                  ia_netmask;     /* mask of net part */
-       u_long                  ia_subnet;      /* subnet number, including net */
-       u_long                  ia_subnetmask;  /* mask of subnet part */
+       u_int32_t               ia_net;         /* network number of interface */
+       u_int32_t               ia_netmask;     /* mask of net part */
+       u_int32_t               ia_subnet;      /* subnet number, including net */
+       u_int32_t               ia_subnetmask;  /* mask of subnet part */
        struct in_addr          ia_netbroadcast; /* to recognize net broadcasts */
        TAILQ_ENTRY(in_ifaddr)  ia_link;        /* tailq macro glue */
        struct sockaddr_in      ia_addr;        /* reserve space for interface name */
        struct sockaddr_in      ia_dstaddr;     /* reserve space for broadcast addr */
 #define        ia_broadaddr            ia_dstaddr
        struct sockaddr_in      ia_sockmask;    /* reserve space for general netmask */
+       TAILQ_ENTRY(in_ifaddr)  ia_hash;        /* hash bucket entry */
 };
-#endif /* PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 
 struct in_aliasreq {
        char                    ifra_name[IFNAMSIZ];    /* if name, e.g. "en0" */
@@ -154,7 +155,10 @@ struct kev_in_portinuse {
 #define KEV_INET_PORTINUSE    8        /* use ken_in_portinuse */
 #endif
 
-#ifdef KERNEL_PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
+#include <net/if_var.h>
+#include <kern/locks.h>
+#include <sys/tree.h>
 /*
  * Given a pointer to an in_ifaddr (ifaddr),
  * return a pointer to the addr as a sockaddr_in.
@@ -165,11 +169,18 @@ struct kev_in_portinuse {
 #define IN_LNAOF(in, ifa) \
        ((ntohl((in).s_addr) & ~((struct in_ifaddr *)(ifa)->ia_subnetmask))
 
-extern TAILQ_HEAD(in_ifaddrhead, in_ifaddr) in_ifaddrhead;
+/*
+ * Hash table for IPv4 addresses.
+ */
+__private_extern__ TAILQ_HEAD(in_ifaddrhead, in_ifaddr) in_ifaddrhead;
+__private_extern__ TAILQ_HEAD(in_ifaddrhashhead, in_ifaddr) *in_ifaddrhashtbl;
+__private_extern__ lck_rw_t *in_ifaddr_rwlock;
+
+#define        INADDR_HASH(x)  (&in_ifaddrhashtbl[inaddr_hashval(x)])
+
 extern struct  ifqueue ipintrq;                /* ip packet input queue */
 extern struct  in_addr zeroin_addr;
 extern u_char  inetctlerrmap[];
-extern lck_mtx_t *rt_mtx;
 
 extern int apple_hwcksum_tx;
 extern int apple_hwcksum_rx;
@@ -178,36 +189,42 @@ extern int apple_hwcksum_rx;
  * Macro for finding the interface (ifnet structure) corresponding to one
  * of our IP addresses.
  */
-#define INADDR_TO_IFP(addr, ifp) \
-       /* struct in_addr addr; */ \
-       /* struct ifnet *ifp; */ \
-{ \
-       struct in_ifaddr *ia; \
-\
-       lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED); \
-       lck_mtx_lock(rt_mtx); \
-       TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) \
-               if (IA_SIN(ia)->sin_addr.s_addr == (addr).s_addr) \
-                       break; \
-       (ifp) = (ia == NULL) ? NULL : ia->ia_ifp; \
-       lck_mtx_unlock(rt_mtx); \
+#define INADDR_TO_IFP(addr, ifp)                                       \
+       /* struct in_addr addr; */                                      \
+       /* struct ifnet *ifp; */                                        \
+{                                                                      \
+       struct in_ifaddr *ia;                                           \
+                                                                       \
+       lck_rw_lock_shared(in_ifaddr_rwlock);                           \
+       TAILQ_FOREACH(ia, INADDR_HASH((addr).s_addr), ia_hash) {        \
+               IFA_LOCK_SPIN(&ia->ia_ifa);                             \
+               if (IA_SIN(ia)->sin_addr.s_addr == (addr).s_addr) {     \
+                       IFA_UNLOCK(&ia->ia_ifa);                        \
+                       break;                                          \
+               }                                                       \
+               IFA_UNLOCK(&ia->ia_ifa);                                \
+       }                                                               \
+       (ifp) = (ia == NULL) ? NULL : ia->ia_ifp;                       \
+       lck_rw_done(in_ifaddr_rwlock);                                  \
 }
 
 /*
  * Macro for finding the internet address structure (in_ifaddr) corresponding
- * to a given interface (ifnet structure).
+ * to a given interface (ifnet structure).  Caller is responsible for freeing
+ * the reference.
  */
-#define IFP_TO_IA(ifp, ia) \
-       /* struct ifnet *ifp; */ \
-       /* struct in_ifaddr *ia; */ \
-{ \
-       lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED); \
-       lck_mtx_lock(rt_mtx); \
-       for ((ia) = TAILQ_FIRST(&in_ifaddrhead); \
-           (ia) != NULL && (ia)->ia_ifp != (ifp); \
-           (ia) = TAILQ_NEXT((ia), ia_link)) \
-               continue; \
-       lck_mtx_unlock(rt_mtx); \
+#define IFP_TO_IA(ifp, ia)                                             \
+       /* struct ifnet *ifp; */                                        \
+       /* struct in_ifaddr *ia; */                                     \
+{                                                                      \
+       lck_rw_lock_shared(in_ifaddr_rwlock);                           \
+       for ((ia) = TAILQ_FIRST(&in_ifaddrhead);                        \
+           (ia) != NULL && (ia)->ia_ifp != (ifp);                      \
+           (ia) = TAILQ_NEXT((ia), ia_link))                           \
+               continue;                                               \
+       if ((ia) != NULL)                                               \
+               IFA_ADDREF(&(ia)->ia_ifa);                              \
+       lck_rw_done(in_ifaddr_rwlock);                                  \
 }
 
 /*
@@ -215,31 +232,153 @@ extern int apple_hwcksum_rx;
  * to change that - as it might break a number of things
  */
 
+/*
+ * Legacy IPv4 IGMP per-link structure.
+ */
 struct router_info {
        struct ifnet *rti_ifp;
        int    rti_type; /* type of router which is querier on this interface */
        int    rti_time; /* # of slow timeouts since last old query */
-       struct router_info *rti_next;
+       SLIST_ENTRY(router_info) rti_list;
 };
 
 /*
- * Internet multicast address structure.  There is one of these for each IP
- * multicast group to which this host belongs on a given network interface.
- * For every entry on the interface's if_multiaddrs list which represents
- * an IP multicast group, there is one of these structures.  They are also
- * kept on a system-wide list to make it easier to keep our legacy IGMP code
- * compatible with the rest of the world (see IN_FIRST_MULTI et al, below).
+ * IPv4 multicast IGMP-layer source entry.
+ */
+struct ip_msource {
+       RB_ENTRY(ip_msource)    ims_link;       /* RB tree links */
+       in_addr_t               ims_haddr;      /* host byte order */
+       struct ims_st {
+               uint16_t        ex;             /* # of exclusive members */
+               uint16_t        in;             /* # of inclusive members */
+       }                       ims_st[2];      /* state at t0, t1 */
+       uint8_t                 ims_stp;        /* pending query */
+};
+
+/*
+ * IPv4 multicast PCB-layer source entry.
+ */
+struct in_msource {
+       RB_ENTRY(ip_msource)    ims_link;       /* RB tree links */
+       in_addr_t               ims_haddr;      /* host byte order */
+       uint8_t                 imsl_st[2];     /* state before/at commit */
+};
+
+RB_HEAD(ip_msource_tree, ip_msource);  /* define struct ip_msource_tree */
+
+RB_PROTOTYPE_SC_PREV(__private_extern__, ip_msource_tree, ip_msource,
+    ims_link, ip_msource_cmp);
+
+/*
+ * IPv4 multicast PCB-layer group filter descriptor.
+ */
+struct in_mfilter {
+       struct ip_msource_tree  imf_sources; /* source list for (S,G) */
+       u_long                  imf_nsrc;    /* # of source entries */
+       uint8_t                 imf_st[2];   /* state before/at commit */
+};
+
+struct igmp_ifinfo;
+
+/*
+ * IPv4 group descriptor.
+ *
+ * For every entry on an ifnet's if_multiaddrs list which represents
+ * an IP multicast group, there is one of these structures.
+ *
+ * If any source filters are present, then a node will exist in the RB-tree
+ * to permit fast lookup by source whenever an operation takes place.
+ * This permits pre-order traversal when we issue reports.
+ * Source filter trees are kept separately from the socket layer to
+ * greatly simplify locking.
+ *
+ * When IGMPv3 is active, inm_timer is the response to group query timer.
+ * The state-change timer inm_sctimer is separate; whenever state changes
+ * for the group the state change record is generated and transmitted,
+ * and kept if retransmissions are necessary.
+ *
+ * FUTURE: inm_link is now only used when groups are being purged
+ * on a detaching ifnet. It could be demoted to a SLIST_ENTRY, but
+ * because it is at the very start of the struct, we can't do this
+ * w/o breaking the ABI for ifmcstat.
  */
 struct in_multi {
+       decl_lck_mtx_data(, inm_lock);
+       u_int32_t inm_refcount;         /* reference count */
+       u_int32_t inm_reqcnt;           /* request count for this address */
+       u_int32_t inm_debug;            /* see ifa_debug flags */
        LIST_ENTRY(in_multi) inm_link;  /* queue macro glue */
        struct  in_addr inm_addr;       /* IP multicast address, convenience */
        struct  ifnet *inm_ifp;         /* back pointer to ifnet */
        struct  ifmultiaddr *inm_ifma;  /* back pointer to ifmultiaddr */
-       u_int   inm_timer;              /* IGMP membership report timer */
+       u_int   inm_timer;              /* IGMPv1/v2 group / v3 query timer  */
        u_int   inm_state;              /*  state of the membership */
-       struct  router_info *inm_rti;   /* router info*/
+       void *inm_rti;                  /* unused, legacy field */
+
+       /* New fields for IGMPv3 follow. */
+       struct igmp_ifinfo      *inm_igi;       /* IGMP info */
+       SLIST_ENTRY(in_multi)    inm_nrele;     /* to-be-released by IGMP */
+       u_int32_t                inm_nrelecnt;  /* deferred release count */
+       struct ip_msource_tree   inm_srcs;      /* tree of sources */
+       u_long                   inm_nsrc;      /* # of tree entries */
+
+       struct ifqueue           inm_scq;       /* queue of pending
+                                                * state-change packets */
+       struct timeval           inm_lastgsrtv; /* Time of last G-S-R query */
+       uint16_t                 inm_sctimer;   /* state-change timer */
+       uint16_t                 inm_scrv;      /* state-change rexmit count */
+
+       /*
+        * SSM state counters which track state at T0 (the time the last
+        * state-change report's RV timer went to zero) and T1
+        * (time of pending report, i.e. now).
+        * Used for computing IGMPv3 state-change reports. Several refcounts
+        * are maintained here to optimize for common use-cases.
+        */
+       struct inm_st {
+               uint16_t        iss_fmode;      /* IGMP filter mode */
+               uint16_t        iss_asm;        /* # of ASM listeners */
+               uint16_t        iss_ex;         /* # of exclusive members */
+               uint16_t        iss_in;         /* # of inclusive members */
+               uint16_t        iss_rec;        /* # of recorded sources */
+       }                       inm_st[2];      /* state at t0, t1 */
+
+       void (*inm_trace)               /* callback fn for tracing refs */
+           (struct in_multi *, int);
 };
 
+#define        INM_LOCK_ASSERT_HELD(_inm)                                      \
+       lck_mtx_assert(&(_inm)->inm_lock, LCK_MTX_ASSERT_OWNED)
+
+#define        INM_LOCK_ASSERT_NOTHELD(_inm)                                   \
+       lck_mtx_assert(&(_inm)->inm_lock, LCK_MTX_ASSERT_NOTOWNED)
+
+#define        INM_LOCK(_inm)                                                  \
+       lck_mtx_lock(&(_inm)->inm_lock)
+
+#define        INM_LOCK_SPIN(_inm)                                             \
+       lck_mtx_lock_spin(&(_inm)->inm_lock)
+
+#define        INM_CONVERT_LOCK(_inm) do {                                     \
+       INM_LOCK_ASSERT_HELD(_inm);                                     \
+       lck_mtx_convert_spin(&(_inm)->inm_lock);                        \
+} while (0)
+
+#define        INM_UNLOCK(_inm)                                                \
+       lck_mtx_unlock(&(_inm)->inm_lock)
+
+#define        INM_ADDREF(_inm)                                                \
+       inm_addref(_inm, 0)
+
+#define        INM_ADDREF_LOCKED(_inm)                                         \
+       inm_addref(_inm, 1)
+
+#define        INM_REMREF(_inm)                                                \
+       inm_remref(_inm, 0)
+
+#define        INM_REMREF_LOCKED(_inm)                                         \
+       inm_remref(_inm, 1)
+
 #ifdef SYSCTL_DECL
 SYSCTL_DECL(_net_inet_ip);
 SYSCTL_DECL(_net_inet_raw);
@@ -258,22 +397,36 @@ struct in_multistep {
 /*
  * Macro for looking up the in_multi record for a given IP multicast address
  * on a given interface.  If no matching record is found, "inm" is set null.
+ *
+ * We do this differently compared other BSD implementations; instead of
+ * walking the if_multiaddrs list at the interface and returning the
+ * ifma_protospec value of a matching entry, we search the global list
+ * of in_multi records and find it that way.  Otherwise either the two
+ * structures (in_multi, ifmultiaddr) need to be ref counted both ways,
+ * which will make things too complicated, or they need to reside in the
+ * same protected domain, which they aren't.
+ *
+ * Must be called with in_multihead_lock held.
  */
-#define IN_LOOKUP_MULTI(addr, ifp, inm) \
-       /* struct in_addr addr; */ \
-       /* struct ifnet *ifp; */ \
-       /* struct in_multi *inm; */ \
-do { \
-       struct ifmultiaddr *ifma; \
-\
-       LIST_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) { \
-               if (ifma->ifma_addr->sa_family == AF_INET \
-                   && ((struct sockaddr_in *)ifma->ifma_addr)->sin_addr.s_addr == \
-                   (addr).s_addr) \
-                       break; \
-       } \
-       (inm) = ifma ? ifma->ifma_protospec : NULL; \
-} while(0)
+#define IN_LOOKUP_MULTI(addr, ifp, inm)                                        \
+       /* struct in_addr *addr; */                                     \
+       /* struct ifnet *ifp; */                                        \
+       /* struct in_multi *inm; */                                     \
+do {                                                                   \
+       struct in_multistep _step;                                      \
+       IN_FIRST_MULTI(_step, inm);                                     \
+       while ((inm) != NULL) {                                         \
+               INM_LOCK_SPIN(inm);                                     \
+               if ((inm)->inm_ifp == (ifp) &&                          \
+                   (inm)->inm_addr.s_addr == (addr)->s_addr) {         \
+                       INM_ADDREF_LOCKED(inm);                         \
+                       INM_UNLOCK(inm);                                \
+                       break;                                          \
+               }                                                       \
+               INM_UNLOCK(inm);                                        \
+               IN_NEXT_MULTI(_step, inm);                              \
+       }                                                               \
+} while (0)
 
 /*
  * Macro to step through all of the in_multi records, one at a time.
@@ -281,38 +434,78 @@ do { \
  * provide.  IN_FIRST_MULTI(), below, must be called to initialize "step"
  * and get the first record.  Both macros return a NULL "inm" when there
  * are no remaining records.
+ *
+ * Must be called with in_multihead_lock held.
  */
-#define IN_NEXT_MULTI(step, inm) \
-       /* struct in_multistep  step; */ \
-       /* struct in_multi *inm; */ \
-do { \
-       if (((inm) = (step).i_inm) != NULL) \
-               (step).i_inm = LIST_NEXT((step).i_inm, inm_link); \
-} while(0)
-
-#define IN_FIRST_MULTI(step, inm) \
-       /* struct in_multistep step; */ \
-       /* struct in_multi *inm; */ \
-do { \
-       (step).i_inm = LIST_FIRST(&in_multihead); \
-       IN_NEXT_MULTI((step), (inm)); \
-} while(0)
+#define IN_NEXT_MULTI(step, inm)                                       \
+       /* struct in_multistep  step; */                                \
+       /* struct in_multi *inm; */                                     \
+do {                                                                   \
+       in_multihead_lock_assert(LCK_RW_ASSERT_HELD);                   \
+       if (((inm) = (step).i_inm) != NULL)                             \
+               (step).i_inm = LIST_NEXT((step).i_inm, inm_link);       \
+} while (0)
+
+#define IN_FIRST_MULTI(step, inm)                                      \
+       /* struct in_multistep step; */                                 \
+       /* struct in_multi *inm; */                                     \
+do {                                                                   \
+       in_multihead_lock_assert(LCK_RW_ASSERT_HELD);                   \
+       (step).i_inm = LIST_FIRST(&in_multihead);                       \
+       IN_NEXT_MULTI((step), (inm));                                   \
+} while (0)
 
 struct route;
-struct in_multi *in_addmulti(struct in_addr *, struct ifnet *);
-void   in_delmulti(struct in_multi **);
-int    in_control(struct socket *, u_long, caddr_t, struct ifnet *,
-                       struct proc *);
-void   in_rtqdrain(void);
+struct ip_moptions;
+
+/*
+ * Return values for imo_multi_filter().
+ */
+#define MCAST_PASS             0       /* Pass */
+#define MCAST_NOTGMEMBER       1       /* This host not a member of group */
+#define MCAST_NOTSMEMBER       2       /* This host excluded source */
+#define MCAST_MUTED            3       /* [deprecated] */
+
+extern void in_ifaddr_init(void);
+extern int imo_multi_filter(const struct ip_moptions *, const struct ifnet *,
+    const struct sockaddr *, const struct sockaddr *);
+extern int imo_clone(struct ip_moptions *, struct ip_moptions *);
+extern void inm_commit(struct in_multi *);
+extern void inm_clear_recorded(struct in_multi *);
+extern void inm_print(const struct in_multi *);
+extern int inm_record_source(struct in_multi *inm, const in_addr_t);
+extern void inm_release(struct in_multi *);
+extern void in_multi_init(void);
+extern struct in_multi *in_addmulti(struct in_addr *, struct ifnet *);
+extern void in_delmulti(struct in_multi *);
+extern int in_leavegroup(struct in_multi *, /*const*/ struct in_mfilter *);
+extern int in_multi_detach(struct in_multi *);
+extern void inm_addref(struct in_multi *, int);
+extern void inm_remref(struct in_multi *, int);
+extern void inm_purge(struct in_multi *);
+extern uint8_t ims_get_mode(const struct in_multi *,
+    const struct ip_msource *, uint8_t);
+extern int in_control(struct socket *, u_long, caddr_t, struct ifnet *,
+    struct proc *);
+extern void in_rtqdrain(void);
 extern struct radix_node *in_validate(struct radix_node *);
-void   ip_input(struct mbuf *);
-int    in_ifadown(struct ifaddr *ifa, int);
-void   in_ifscrub(struct ifnet *, struct in_ifaddr *, int);
-int    ipflow_fastforward(struct mbuf *);
-void   ipflow_create(const struct route *, struct mbuf *);
-void   ipflow_slowtimo(void);
-
-#endif /* KERNEL_PRIVATE */
+extern void ip_input(struct mbuf *);
+extern int in_ifadown(struct ifaddr *ifa, int);
+extern void in_ifscrub(struct ifnet *, struct in_ifaddr *, int);
+extern u_int32_t inaddr_hashval(u_int32_t);
+extern void in_purgeaddrs(struct ifnet *);
+extern void    imf_leave(struct in_mfilter *);
+extern void    imf_purge(struct in_mfilter *);
+
+struct inpcb;
+
+__private_extern__ int inp_join_group(struct inpcb *, struct sockopt *);
+__private_extern__ int inp_leave_group(struct inpcb *, struct sockopt *);
+__private_extern__ void in_multihead_lock_exclusive(void);
+__private_extern__ void in_multihead_lock_shared(void);
+__private_extern__ void in_multihead_lock_assert(int);
+__private_extern__ void in_multihead_lock_done(void);
+#endif /* XNU_KERNEL_PRIVATE */
 
 /* INET6 stuff */
 #include <netinet6/in6_var.h>