+#endif /* IPSEC */
+
+#define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 0)
+#define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 2)
+#define DBG_FNC_IP_INPUT NETDBG_CODE(DBG_NETIP, (2 << 8))
+
+#if IPSEC
+extern int ipsec_bypass;
+extern lck_mtx_t *sadb_mutex;
+
+lck_grp_t *sadb_stat_mutex_grp;
+lck_grp_attr_t *sadb_stat_mutex_grp_attr;
+lck_attr_t *sadb_stat_mutex_attr;
+decl_lck_mtx_data(, sadb_stat_mutex_data);
+lck_mtx_t *sadb_stat_mutex = &sadb_stat_mutex_data;
+#endif /* IPSEC */
+
+MBUFQ_HEAD(fq_head);
+
+static int frag_timeout_run; /* frag timer is scheduled to run */
+static void frag_timeout(void *);
+static void frag_sched_timeout(void);
+
+static struct ipq *ipq_alloc(int);
+static void ipq_free(struct ipq *);
+static void ipq_updateparams(void);
+static void ip_input_second_pass(struct mbuf *, struct ifnet *,
+ u_int32_t, int, int, struct ip_fw_in_args *, int);
+
+decl_lck_mtx_data(static, ipqlock);
+static lck_attr_t *ipqlock_attr;
+static lck_grp_t *ipqlock_grp;
+static lck_grp_attr_t *ipqlock_grp_attr;
+
+/* Packet reassembly stuff */
+#define IPREASS_NHASH_LOG2 6
+#define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2)
+#define IPREASS_HMASK (IPREASS_NHASH - 1)
+#define IPREASS_HASH(x, y) \
+ (((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
+
+/* IP fragment reassembly queues (protected by ipqlock) */
+static TAILQ_HEAD(ipqhead, ipq) ipq[IPREASS_NHASH]; /* ip reassembly queues */
+static int maxnipq; /* max packets in reass queues */
+static u_int32_t maxfragsperpacket; /* max frags/packet in reass queues */
+static u_int32_t nipq; /* # of packets in reass queues */
+static u_int32_t ipq_limit; /* ipq allocation limit */
+static u_int32_t ipq_count; /* current # of allocated ipq's */
+
+static int sysctl_ipforwarding SYSCTL_HANDLER_ARGS;
+static int sysctl_maxnipq SYSCTL_HANDLER_ARGS;
+static int sysctl_maxfragsperpacket SYSCTL_HANDLER_ARGS;
+
+#if (DEBUG || DEVELOPMENT)
+static int sysctl_reset_ip_input_stats SYSCTL_HANDLER_ARGS;
+static int sysctl_ip_input_measure_bins SYSCTL_HANDLER_ARGS;
+static int sysctl_ip_input_getperf SYSCTL_HANDLER_ARGS;
+#endif /* (DEBUG || DEVELOPMENT) */
+
+int ipforwarding = 0;
+SYSCTL_PROC(_net_inet_ip, IPCTL_FORWARDING, forwarding,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &ipforwarding, 0,
+ sysctl_ipforwarding, "I", "Enable IP forwarding between interfaces");
+
+static int ipsendredirects = 1; /* XXX */
+SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect,
+ CTLFLAG_RW | CTLFLAG_LOCKED, &ipsendredirects, 0,
+ "Enable sending IP redirects");
+
+int ip_defttl = IPDEFTTL;
+SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW | CTLFLAG_LOCKED,
+ &ip_defttl, 0, "Maximum TTL on IP packets");
+
+static int ip_dosourceroute = 0;
+SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute,
+ CTLFLAG_RW | CTLFLAG_LOCKED, &ip_dosourceroute, 0,
+ "Enable forwarding source routed IP packets");
+
+static int ip_acceptsourceroute = 0;
+SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute,
+ CTLFLAG_RW | CTLFLAG_LOCKED, &ip_acceptsourceroute, 0,
+ "Enable accepting source routed IP packets");
+
+static int ip_sendsourcequench = 0;
+SYSCTL_INT(_net_inet_ip, OID_AUTO, sendsourcequench,
+ CTLFLAG_RW | CTLFLAG_LOCKED, &ip_sendsourcequench, 0,
+ "Enable the transmission of source quench packets");
+
+SYSCTL_PROC(_net_inet_ip, OID_AUTO, maxfragpackets,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &maxnipq, 0, sysctl_maxnipq,
+ "I", "Maximum number of IPv4 fragment reassembly queue entries");
+
+SYSCTL_UINT(_net_inet_ip, OID_AUTO, fragpackets, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &nipq, 0, "Current number of IPv4 fragment reassembly queue entries");
+
+SYSCTL_PROC(_net_inet_ip, OID_AUTO, maxfragsperpacket,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &maxfragsperpacket, 0,
+ sysctl_maxfragsperpacket, "I",
+ "Maximum number of IPv4 fragments allowed per packet");
+
+static uint32_t ip_adj_clear_hwcksum = 0;
+SYSCTL_UINT(_net_inet_ip, OID_AUTO, adj_clear_hwcksum,
+ CTLFLAG_RW | CTLFLAG_LOCKED, &ip_adj_clear_hwcksum, 0,
+ "Invalidate hwcksum info when adjusting length");
+
+/*
+ * XXX - Setting ip_checkinterface mostly implements the receive side of
+ * the Strong ES model described in RFC 1122, but since the routing table
+ * and transmit implementation do not implement the Strong ES model,
+ * setting this to 1 results in an odd hybrid.
+ *
+ * XXX - ip_checkinterface currently must be disabled if you use ipnat
+ * to translate the destination address to another local interface.
+ *
+ * XXX - ip_checkinterface must be disabled if you add IP aliases
+ * to the loopback interface instead of the interface where the
+ * packets for those addresses are received.
+ */
+static int ip_checkinterface = 0;
+SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW | CTLFLAG_LOCKED,
+ &ip_checkinterface, 0, "Verify packet arrives on correct interface");
+
+static int ip_chaining = 1;
+SYSCTL_INT(_net_inet_ip, OID_AUTO, rx_chaining, CTLFLAG_RW | CTLFLAG_LOCKED,
+ &ip_chaining, 1, "Do receive side ip address based chaining");
+
+static int ip_chainsz = 6;
+SYSCTL_INT(_net_inet_ip, OID_AUTO, rx_chainsz, CTLFLAG_RW | CTLFLAG_LOCKED,
+ &ip_chainsz, 1, "IP receive side max chaining");
+
+#if (DEBUG || DEVELOPMENT)
+static int ip_input_measure = 0;
+SYSCTL_PROC(_net_inet_ip, OID_AUTO, input_perf,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
+ &ip_input_measure, 0, sysctl_reset_ip_input_stats, "I", "Do time measurement");
+
+static uint64_t ip_input_measure_bins = 0;
+SYSCTL_PROC(_net_inet_ip, OID_AUTO, input_perf_bins,
+ CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &ip_input_measure_bins, 0,
+ sysctl_ip_input_measure_bins, "I",
+ "bins for chaining performance data histogram");
+
+static net_perf_t net_perf;
+SYSCTL_PROC(_net_inet_ip, OID_AUTO, input_perf_data,
+ CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
+ 0, 0, sysctl_ip_input_getperf, "S,net_perf",
+ "IP input performance data (struct net_perf, net/net_perf.h)");
+#endif /* (DEBUG || DEVELOPMENT) */
+
+#if DIAGNOSTIC
+static int ipprintfs = 0;
+#endif
+
+struct protosw *ip_protox[IPPROTO_MAX];
+
+static lck_grp_attr_t *in_ifaddr_rwlock_grp_attr;
+static lck_grp_t *in_ifaddr_rwlock_grp;
+static lck_attr_t *in_ifaddr_rwlock_attr;
+decl_lck_rw_data(, in_ifaddr_rwlock_data);
+lck_rw_t *in_ifaddr_rwlock = &in_ifaddr_rwlock_data;
+
+/* Protected by in_ifaddr_rwlock */
+struct in_ifaddrhead in_ifaddrhead; /* first inet address */
+struct in_ifaddrhashhead *in_ifaddrhashtbl; /* inet addr hash table */
+
+#define INADDR_NHASH 61
+static u_int32_t inaddr_nhash; /* hash table size */
+static u_int32_t inaddr_hashp; /* next largest prime */
+
+static int ip_getstat SYSCTL_HANDLER_ARGS;
+struct ipstat ipstat;
+SYSCTL_PROC(_net_inet_ip, IPCTL_STATS, stats,
+ CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
+ 0, 0, ip_getstat, "S,ipstat",
+ "IP statistics (struct ipstat, netinet/ip_var.h)");
+
+#if IPCTL_DEFMTU
+SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW | CTLFLAG_LOCKED,
+ &ip_mtu, 0, "Default MTU");
+#endif /* IPCTL_DEFMTU */
+
+#if IPSTEALTH
+static int ipstealth = 0;
+SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW | CTLFLAG_LOCKED,
+ &ipstealth, 0, "");
+#endif /* IPSTEALTH */
+
+/* Firewall hooks */
+#if IPFIREWALL
+ip_fw_chk_t *ip_fw_chk_ptr;
+int fw_enable = 1;
+int fw_bypass = 1;
+int fw_one_pass = 0;
+#endif /* IPFIREWALL */
+
+#if DUMMYNET
+ip_dn_io_t *ip_dn_io_ptr;
+#endif /* DUMMYNET */
+
+SYSCTL_NODE(_net_inet_ip, OID_AUTO, linklocal,
+ CTLFLAG_RW | CTLFLAG_LOCKED, 0, "link local");
+
+struct ip_linklocal_stat ip_linklocal_stat;
+SYSCTL_STRUCT(_net_inet_ip_linklocal, OID_AUTO, stat,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &ip_linklocal_stat, ip_linklocal_stat,
+ "Number of link local packets with TTL less than 255");
+
+SYSCTL_NODE(_net_inet_ip_linklocal, OID_AUTO, in,
+ CTLFLAG_RW | CTLFLAG_LOCKED, 0, "link local input");
+
+int ip_linklocal_in_allowbadttl = 1;
+SYSCTL_INT(_net_inet_ip_linklocal_in, OID_AUTO, allowbadttl,
+ CTLFLAG_RW | CTLFLAG_LOCKED, &ip_linklocal_in_allowbadttl, 0,
+ "Allow incoming link local packets with TTL less than 255");
+
+
+/*
+ * We need to save the IP options in case a protocol wants to respond
+ * to an incoming packet over the same route if the packet got here
+ * using IP source routing. This allows connection establishment and
+ * maintenance when the remote end is on a network that is not known
+ * to us.
+ */
+static int ip_nhops = 0;
+static struct ip_srcrt {
+ struct in_addr dst; /* final destination */
+ char nop; /* one NOP to align */
+ char srcopt[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN and OFFSET */
+ struct in_addr route[MAX_IPOPTLEN / sizeof (struct in_addr)];
+} ip_srcrt;
+
+static void in_ifaddrhashtbl_init(void);
+static void save_rte(u_char *, struct in_addr);
+static int ip_dooptions(struct mbuf *, int, struct sockaddr_in *);
+static void ip_forward(struct mbuf *, int, struct sockaddr_in *);
+static void frag_freef(struct ipqhead *, struct ipq *);
+#if IPDIVERT
+#ifdef IPDIVERT_44
+static struct mbuf *ip_reass(struct mbuf *, u_int32_t *, u_int16_t *);
+#else /* !IPDIVERT_44 */
+static struct mbuf *ip_reass(struct mbuf *, u_int16_t *, u_int16_t *);
+#endif /* !IPDIVERT_44 */
+#else /* !IPDIVERT */
+static struct mbuf *ip_reass(struct mbuf *);
+#endif /* !IPDIVERT */
+static void ip_fwd_route_copyout(struct ifnet *, struct route *);
+static void ip_fwd_route_copyin(struct ifnet *, struct route *);
+static inline u_short ip_cksum(struct mbuf *, int);
+
+int ip_use_randomid = 1;
+SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_RW | CTLFLAG_LOCKED,
+ &ip_use_randomid, 0, "Randomize IP packets IDs");
+
+/*
+ * On platforms which require strict alignment (currently for anything but
+ * i386 or x86_64), check if the IP header pointer is 32-bit aligned; if not,
+ * copy the contents of the mbuf chain into a new chain, and free the original
+ * one. Create some head room in the first mbuf of the new chain, in case
+ * it's needed later on.
+ */
+#if defined(__i386__) || defined(__x86_64__)
+#define IP_HDR_ALIGNMENT_FIXUP(_m, _ifp, _action) do { } while (0)
+#else /* !__i386__ && !__x86_64__ */
+#define IP_HDR_ALIGNMENT_FIXUP(_m, _ifp, _action) do { \
+ if (!IP_HDR_ALIGNED_P(mtod(_m, caddr_t))) { \
+ struct mbuf *_n; \
+ struct ifnet *__ifp = (_ifp); \
+ atomic_add_64(&(__ifp)->if_alignerrs, 1); \
+ if (((_m)->m_flags & M_PKTHDR) && \
+ (_m)->m_pkthdr.pkt_hdr != NULL) \
+ (_m)->m_pkthdr.pkt_hdr = NULL; \
+ _n = m_defrag_offset(_m, max_linkhdr, M_NOWAIT); \
+ if (_n == NULL) { \
+ atomic_add_32(&ipstat.ips_toosmall, 1); \
+ m_freem(_m); \
+ (_m) = NULL; \
+ _action; \
+ } else { \
+ VERIFY(_n != (_m)); \
+ (_m) = _n; \
+ } \
+ } \
+} while (0)
+#endif /* !__i386__ && !__x86_64__ */
+
+/*
+ * GRE input handler function, settable via ip_gre_register_input() for PPTP.
+ */
+static gre_input_func_t gre_input_func;
+
+static void
+ip_init_delayed(void)
+{
+ struct ifreq ifr;
+ int error;
+ struct sockaddr_in *sin;
+
+ bzero(&ifr, sizeof(ifr));
+ strlcpy(ifr.ifr_name, "lo0", sizeof(ifr.ifr_name));
+ sin = (struct sockaddr_in *)(void *)&ifr.ifr_addr;
+ sin->sin_len = sizeof(struct sockaddr_in);
+ sin->sin_family = AF_INET;
+ sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ error = in_control(NULL, SIOCSIFADDR, (caddr_t)&ifr, lo_ifp, kernproc);
+ if (error)
+ printf("%s: failed to initialise lo0's address, error=%d\n",
+ __func__, error);
+}
+
+/*
+ * IP initialization: fill in IP protocol switch table.
+ * All protocols not implemented in kernel go to raw IP protocol handler.
+ */
+void
+ip_init(struct protosw *pp, struct domain *dp)
+{
+ static int ip_initialized = 0;
+ struct protosw *pr;
+ struct timeval tv;
+ int i;
+
+ domain_proto_mtx_lock_assert_held();
+ VERIFY((pp->pr_flags & (PR_INITIALIZED|PR_ATTACHED)) == PR_ATTACHED);
+
+ /* ipq_alloc() uses mbufs for IP fragment queue structures */
+ _CASSERT(sizeof (struct ipq) <= _MLEN);
+
+ /*
+ * Some ioctls (e.g. SIOCAIFADDR) use ifaliasreq struct, which is
+ * interchangeable with in_aliasreq; they must have the same size.
+ */
+ _CASSERT(sizeof (struct ifaliasreq) == sizeof (struct in_aliasreq));
+
+ if (ip_initialized)
+ return;
+ ip_initialized = 1;
+
+ in_ifaddr_init();
+
+ in_ifaddr_rwlock_grp_attr = lck_grp_attr_alloc_init();
+ in_ifaddr_rwlock_grp = lck_grp_alloc_init("in_ifaddr_rwlock",
+ in_ifaddr_rwlock_grp_attr);
+ in_ifaddr_rwlock_attr = lck_attr_alloc_init();
+ lck_rw_init(in_ifaddr_rwlock, in_ifaddr_rwlock_grp,
+ in_ifaddr_rwlock_attr);
+
+ TAILQ_INIT(&in_ifaddrhead);
+ in_ifaddrhashtbl_init();
+
+ ip_moptions_init();
+
+ pr = pffindproto_locked(PF_INET, IPPROTO_RAW, SOCK_RAW);
+ if (pr == NULL) {
+ panic("%s: Unable to find [PF_INET,IPPROTO_RAW,SOCK_RAW]\n",
+ __func__);
+ /* NOTREACHED */
+ }
+
+ /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
+ for (i = 0; i < IPPROTO_MAX; i++)
+ ip_protox[i] = pr;
+ /*
+ * Cycle through IP protocols and put them into the appropriate place
+ * in ip_protox[], skipping protocols IPPROTO_{IP,RAW}.
+ */
+ VERIFY(dp == inetdomain && dp->dom_family == PF_INET);
+ TAILQ_FOREACH(pr, &dp->dom_protosw, pr_entry) {
+ VERIFY(pr->pr_domain == dp);
+ if (pr->pr_protocol != 0 && pr->pr_protocol != IPPROTO_RAW) {
+ /* Be careful to only index valid IP protocols. */
+ if (pr->pr_protocol < IPPROTO_MAX)
+ ip_protox[pr->pr_protocol] = pr;
+ }
+ }
+
+ /* IP fragment reassembly queue lock */
+ ipqlock_grp_attr = lck_grp_attr_alloc_init();
+ ipqlock_grp = lck_grp_alloc_init("ipqlock", ipqlock_grp_attr);
+ ipqlock_attr = lck_attr_alloc_init();
+ lck_mtx_init(&ipqlock, ipqlock_grp, ipqlock_attr);
+
+ lck_mtx_lock(&ipqlock);
+ /* Initialize IP reassembly queue. */
+ for (i = 0; i < IPREASS_NHASH; i++)
+ TAILQ_INIT(&ipq[i]);
+
+ maxnipq = nmbclusters / 32;
+ maxfragsperpacket = 128; /* enough for 64k in 512 byte fragments */
+ ipq_updateparams();
+ lck_mtx_unlock(&ipqlock);
+
+ getmicrotime(&tv);
+ ip_id = RandomULong() ^ tv.tv_usec;
+ ip_initid();
+
+ ipf_init();
+
+#if IPSEC
+ sadb_stat_mutex_grp_attr = lck_grp_attr_alloc_init();
+ sadb_stat_mutex_grp = lck_grp_alloc_init("sadb_stat",
+ sadb_stat_mutex_grp_attr);
+ sadb_stat_mutex_attr = lck_attr_alloc_init();
+ lck_mtx_init(sadb_stat_mutex, sadb_stat_mutex_grp,
+ sadb_stat_mutex_attr);
+
+#endif
+ arp_init();
+ net_init_add(ip_init_delayed);
+}
+
+/*
+ * Initialize IPv4 source address hash table.
+ */
+static void
+in_ifaddrhashtbl_init(void)
+{
+ int i, k, p;
+
+ if (in_ifaddrhashtbl != NULL)
+ return;
+
+ PE_parse_boot_argn("inaddr_nhash", &inaddr_nhash,
+ sizeof (inaddr_nhash));
+ if (inaddr_nhash == 0)
+ inaddr_nhash = INADDR_NHASH;
+
+ MALLOC(in_ifaddrhashtbl, struct in_ifaddrhashhead *,
+ inaddr_nhash * sizeof (*in_ifaddrhashtbl),
+ M_IFADDR, M_WAITOK | M_ZERO);
+ if (in_ifaddrhashtbl == NULL)
+ panic("in_ifaddrhashtbl_init allocation failed");
+
+ /*
+ * Generate the next largest prime greater than inaddr_nhash.
+ */
+ k = (inaddr_nhash % 2 == 0) ? inaddr_nhash + 1 : inaddr_nhash + 2;
+ for (;;) {
+ p = 1;
+ for (i = 3; i * i <= k; i += 2) {
+ if (k % i == 0)
+ p = 0;
+ }
+ if (p == 1)
+ break;
+ k += 2;
+ }
+ inaddr_hashp = k;
+}
+
+u_int32_t
+inaddr_hashval(u_int32_t key)
+{
+ /*
+ * The hash index is the computed prime times the key modulo
+ * the hash size, as documented in "Introduction to Algorithms"
+ * (Cormen, Leiserson, Rivest).
+ */
+ if (inaddr_nhash > 1)
+ return ((key * inaddr_hashp) % inaddr_nhash);
+ else
+ return (0);
+}
+
+void
+ip_proto_dispatch_in_wrapper(struct mbuf *m, int hlen, u_int8_t proto)
+{
+ ip_proto_dispatch_in(m, hlen, proto, 0);
+}
+
+__private_extern__ void
+ip_proto_dispatch_in(struct mbuf *m, int hlen, u_int8_t proto,
+ ipfilter_t inject_ipfref)
+{
+ struct ipfilter *filter;
+ int seen = (inject_ipfref == NULL);
+ int changed_header = 0;
+ struct ip *ip;
+ void (*pr_input)(struct mbuf *, int len);
+
+ if (!TAILQ_EMPTY(&ipv4_filters)) {
+ ipf_ref();
+ TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
+ if (seen == 0) {
+ if ((struct ipfilter *)inject_ipfref == filter)
+ seen = 1;
+ } else if (filter->ipf_filter.ipf_input) {
+ errno_t result;
+
+ if (changed_header == 0) {
+ /*
+ * Perform IP header alignment fixup,
+ * if needed, before passing packet
+ * into filter(s).
+ */
+ IP_HDR_ALIGNMENT_FIXUP(m,
+ m->m_pkthdr.rcvif, ipf_unref());
+
+ /* ipf_unref() already called */
+ if (m == NULL)
+ return;
+
+ changed_header = 1;
+ ip = mtod(m, struct ip *);
+ ip->ip_len = htons(ip->ip_len + hlen);
+ ip->ip_off = htons(ip->ip_off);
+ ip->ip_sum = 0;
+ ip->ip_sum = ip_cksum_hdr_in(m, hlen);
+ }
+ result = filter->ipf_filter.ipf_input(
+ filter->ipf_filter.cookie, (mbuf_t *)&m,
+ hlen, proto);
+ if (result == EJUSTRETURN) {
+ ipf_unref();
+ return;
+ }
+ if (result != 0) {
+ ipf_unref();
+ m_freem(m);
+ return;
+ }
+ }
+ }
+ ipf_unref();
+ }
+
+ /* Perform IP header alignment fixup (post-filters), if needed */
+ IP_HDR_ALIGNMENT_FIXUP(m, m->m_pkthdr.rcvif, return);
+
+ /*
+ * If there isn't a specific lock for the protocol
+ * we're about to call, use the generic lock for AF_INET.
+ * otherwise let the protocol deal with its own locking
+ */
+ ip = mtod(m, struct ip *);
+
+ if (changed_header) {
+ ip->ip_len = ntohs(ip->ip_len) - hlen;
+ ip->ip_off = ntohs(ip->ip_off);
+ }
+
+ if ((pr_input = ip_protox[ip->ip_p]->pr_input) == NULL) {
+ m_freem(m);
+ } else if (!(ip_protox[ip->ip_p]->pr_flags & PR_PROTOLOCK)) {
+ lck_mtx_lock(inet_domain_mutex);
+ pr_input(m, hlen);
+ lck_mtx_unlock(inet_domain_mutex);
+ } else {
+ pr_input(m, hlen);
+ }
+}
+
+struct pktchain_elm {
+ struct mbuf *pkte_head;
+ struct mbuf *pkte_tail;
+ struct in_addr pkte_saddr;
+ struct in_addr pkte_daddr;
+ uint16_t pkte_npkts;
+ uint16_t pkte_proto;
+ uint32_t pkte_nbytes;
+};
+
+typedef struct pktchain_elm pktchain_elm_t;
+
+/* Store upto PKTTBL_SZ unique flows on the stack */
+#define PKTTBL_SZ 7
+
+static struct mbuf *
+ip_chain_insert(struct mbuf *packet, pktchain_elm_t *tbl)
+{
+ struct ip* ip;
+ int pkttbl_idx = 0;
+
+ ip = mtod(packet, struct ip*);
+
+ /* reusing the hash function from inaddr_hashval */
+ pkttbl_idx = inaddr_hashval(ntohs(ip->ip_src.s_addr)) % PKTTBL_SZ;
+ if (tbl[pkttbl_idx].pkte_head == NULL) {
+ tbl[pkttbl_idx].pkte_head = packet;
+ tbl[pkttbl_idx].pkte_saddr.s_addr = ip->ip_src.s_addr;
+ tbl[pkttbl_idx].pkte_daddr.s_addr = ip->ip_dst.s_addr;
+ tbl[pkttbl_idx].pkte_proto = ip->ip_p;
+ } else {
+ if ((ip->ip_dst.s_addr == tbl[pkttbl_idx].pkte_daddr.s_addr) &&
+ (ip->ip_src.s_addr == tbl[pkttbl_idx].pkte_saddr.s_addr) &&
+ (ip->ip_p == tbl[pkttbl_idx].pkte_proto)) {
+ } else {
+ return (packet);
+ }
+ }
+ if (tbl[pkttbl_idx].pkte_tail != NULL)
+ mbuf_setnextpkt(tbl[pkttbl_idx].pkte_tail, packet);
+
+ tbl[pkttbl_idx].pkte_tail = packet;
+ tbl[pkttbl_idx].pkte_npkts += 1;
+ tbl[pkttbl_idx].pkte_nbytes += packet->m_pkthdr.len;
+ return (NULL);
+}
+
+/* args is a dummy variable here for backward compatibility */
+static void
+ip_input_second_pass_loop_tbl(pktchain_elm_t *tbl, struct ip_fw_in_args *args)
+{
+ int i = 0;
+
+ for (i = 0; i < PKTTBL_SZ; i++) {
+ if (tbl[i].pkte_head != NULL) {
+ struct mbuf *m = tbl[i].pkte_head;
+ ip_input_second_pass(m, m->m_pkthdr.rcvif, 0,
+ tbl[i].pkte_npkts, tbl[i].pkte_nbytes, args, 0);
+
+ if (tbl[i].pkte_npkts > 2)
+ ipstat.ips_rxc_chainsz_gt2++;
+ if (tbl[i].pkte_npkts > 4)
+ ipstat.ips_rxc_chainsz_gt4++;
+#if (DEBUG || DEVELOPMENT)
+ if (ip_input_measure)
+ net_perf_histogram(&net_perf, tbl[i].pkte_npkts);
+#endif /* (DEBUG || DEVELOPMENT) */
+ tbl[i].pkte_head = tbl[i].pkte_tail = NULL;
+ tbl[i].pkte_npkts = 0;
+ tbl[i].pkte_nbytes = 0;
+ /* no need to initialize address and protocol in tbl */
+ }
+ }
+}
+
+static void
+ip_input_cpout_args(struct ip_fw_in_args *args, struct ip_fw_args *args1,
+ boolean_t *done_init)
+{
+ if (*done_init == FALSE) {
+ bzero(args1, sizeof(struct ip_fw_args));
+ *done_init = TRUE;
+ }
+ args1->fwa_next_hop = args->fwai_next_hop;
+ args1->fwa_ipfw_rule = args->fwai_ipfw_rule;
+ args1->fwa_pf_rule = args->fwai_pf_rule;
+ args1->fwa_divert_rule = args->fwai_divert_rule;
+}
+
+static void
+ip_input_cpin_args(struct ip_fw_args *args1, struct ip_fw_in_args *args)
+{
+ args->fwai_next_hop = args1->fwa_next_hop;
+ args->fwai_ipfw_rule = args1->fwa_ipfw_rule;
+ args->fwai_pf_rule = args1->fwa_pf_rule;
+ args->fwai_divert_rule = args1->fwa_divert_rule;
+}
+
+typedef enum {
+ IPINPUT_DOCHAIN = 0,
+ IPINPUT_DONTCHAIN,
+ IPINPUT_FREED,
+ IPINPUT_DONE
+} ipinput_chain_ret_t;
+
+static void
+ip_input_update_nstat(struct ifnet *ifp, struct in_addr src_ip,
+ u_int32_t packets, u_int32_t bytes)
+{
+ if (nstat_collect) {
+ struct rtentry *rt = ifnet_cached_rtlookup_inet(ifp,
+ src_ip);
+ if (rt != NULL) {
+ nstat_route_rx(rt, packets, bytes, 0);
+ rtfree(rt);
+ }
+ }
+}
+
+static void
+ip_input_dispatch_chain(struct mbuf *m)
+{
+ struct mbuf *tmp_mbuf = m;
+ struct mbuf *nxt_mbuf = NULL;
+ struct ip *ip = NULL;
+ unsigned int hlen;
+
+ ip = mtod(tmp_mbuf, struct ip *);
+ hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+ while(tmp_mbuf) {
+ nxt_mbuf = mbuf_nextpkt(tmp_mbuf);
+ mbuf_setnextpkt(tmp_mbuf, NULL);
+
+ if ((sw_lro) && (ip->ip_p == IPPROTO_TCP))
+ tmp_mbuf = tcp_lro(tmp_mbuf, hlen);
+ if (tmp_mbuf)
+ ip_proto_dispatch_in(tmp_mbuf, hlen, ip->ip_p, 0);
+ tmp_mbuf = nxt_mbuf;
+ if (tmp_mbuf) {
+ ip = mtod(tmp_mbuf, struct ip *);
+ /* first mbuf of chain already has adjusted ip_len */
+ hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+ ip->ip_len -= hlen;
+ }
+ }
+}
+
+static void
+ip_input_setdst_chain(struct mbuf *m, uint32_t ifindex, struct in_ifaddr *ia)
+{
+ struct mbuf *tmp_mbuf = m;
+
+ while (tmp_mbuf) {
+ ip_setdstifaddr_info(tmp_mbuf, ifindex, ia);
+ tmp_mbuf = mbuf_nextpkt(tmp_mbuf);
+ }
+}
+
+/*
+ * First pass does all essential packet validation and places on a per flow
+ * queue for doing operations that have same outcome for all packets of a flow.
+ * div_info is packet divert/tee info
+ */
+static ipinput_chain_ret_t
+ip_input_first_pass(struct mbuf *m, u_int32_t *div_info,
+ struct ip_fw_in_args *args, int *ours, struct mbuf **modm)
+{
+ struct ip *ip;
+ struct ifnet *inifp;
+ unsigned int hlen;
+ int retval = IPINPUT_DOCHAIN;
+ int len = 0;
+ struct in_addr src_ip;
+#if IPFIREWALL
+ int i;
+#endif
+#if IPFIREWALL || DUMMYNET
+ struct m_tag *copy;
+ struct m_tag *p;
+ boolean_t delete = FALSE;
+ struct ip_fw_args args1;
+ boolean_t init = FALSE;