/*
- * Copyright (c) 1999-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 1999-2014 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <sys/lock.h>
#include <sys/queue.h>
-
-#if PF_PKTHDR
-#include <net/pf_mtag.h>
-#endif /* PF_PKTHDR */
-
+#include <machine/endian.h>
/*
* Mbufs are of a single size, MSIZE (machine/param.h), which
* includes overhead. An mbuf may add a single "mbuf cluster" of size
/* header at beginning of each mbuf: */
struct m_hdr {
- struct mbuf *mh_next; /* next buffer in chain */
- struct mbuf *mh_nextpkt; /* next chain in queue/record */
- int32_t mh_len; /* amount of data in this mbuf */
- caddr_t mh_data; /* location of data */
- short mh_type; /* type of data in this mbuf */
- short mh_flags; /* flags; see below */
+ struct mbuf *mh_next; /* next buffer in chain */
+ struct mbuf *mh_nextpkt; /* next chain in queue/record */
+ caddr_t mh_data; /* location of data */
+ int32_t mh_len; /* amount of data in this mbuf */
+ u_int16_t mh_type; /* type of data in this mbuf */
+ u_int16_t mh_flags; /* flags; see below */
};
/*
u_int32_t m_tag_id; /* Module ID */
};
-#ifdef __LP64__
#define M_TAG_ALIGN(len) \
- P2ROUNDUP(len, sizeof (u_int64_t)) + sizeof (struct m_tag)
-#else
-#define M_TAG_ALIGN(len) \
- P2ROUNDUP(len, sizeof (u_int32_t)) + sizeof (struct m_tag)
-#endif /* !__LP64__ */
+ (P2ROUNDUP(len, sizeof (u_int64_t)) + sizeof (struct m_tag))
#define M_TAG_VALID_PATTERN 0xfeedfacefeedfaceULL
#define M_TAG_FREE_PATTERN 0xdeadbeefdeadbeefULL
+/*
+ * Packet tag header structure (at the top of mbuf). Pointers are
+ * 32-bit in ILP32; m_tag needs 64-bit alignment, hence padded.
+ */
struct m_taghdr {
+#ifndef __LP64__
+ u_int32_t pad; /* For structure alignment */
+#endif /* !__LP64__ */
u_int64_t refcnt; /* Number of tags in this mbuf */
};
-/* record/packet header in first mbuf of chain; valid if M_PKTHDR set */
+/*
+ * Driver auxiliary metadata tag (KERNEL_TAG_TYPE_DRVAUX).
+ */
+struct m_drvaux_tag {
+ u_int32_t da_family; /* IFNET_FAMILY values */
+ u_int32_t da_subfamily; /* IFNET_SUBFAMILY values */
+ u_int32_t da_reserved; /* for future */
+ u_int32_t da_length; /* length of following data */
+};
+
+/* Values for pftag_flags (16-bit wide) */
+#define PF_TAG_GENERATED 0x1 /* pkt generated by PF */
+#define PF_TAG_FRAGCACHE 0x2
+#define PF_TAG_TRANSLATE_LOCALHOST 0x4
+#if PF_ECN
+#define PF_TAG_HDR_INET 0x8 /* hdr points to IPv4 */
+#define PF_TAG_HDR_INET6 0x10 /* hdr points to IPv6 */
+#endif /* PF_ECN */
+/*
+ * PF mbuf tag
+ */
+struct pf_mtag {
+ u_int16_t pftag_flags; /* PF_TAG flags */
+ u_int16_t pftag_rtableid; /* alternate routing table id */
+ u_int16_t pftag_tag;
+ u_int16_t pftag_routed;
+#if PF_ALTQ
+ u_int32_t pftag_qid;
+#endif /* PF_ALTQ */
+#if PF_ECN
+ void *pftag_hdr; /* saved hdr pos in mbuf, for ECN */
+#endif /* PF_ECN */
+};
+
+/*
+ * TCP mbuf tag
+ */
+struct tcp_pktinfo {
+ union {
+ struct {
+ u_int32_t segsz; /* segment size (actual MSS) */
+ } __tx;
+ struct {
+ u_int16_t lro_pktlen; /* max seg size encountered */
+ u_int8_t lro_npkts; /* # of coalesced TCP pkts */
+ u_int8_t lro_timediff; /* time spent in LRO */
+ } __rx;
+ } __offload;
+ union {
+ u_int32_t pri; /* send msg priority */
+ u_int32_t seq; /* recv msg sequence # */
+ } __msgattr;
+#define tso_segsz proto_mtag.__pr_u.tcp.tm_tcp.__offload.__tx.segsz
+#define lro_pktlen proto_mtag.__pr_u.tcp.tm_tcp.__offload.__rx.lro_pktlen
+#define lro_npkts proto_mtag.__pr_u.tcp.tm_tcp.__offload.__rx.lro_npkts
+#define lro_elapsed proto_mtag.__pr_u.tcp.tm_tcp.__offload.__rx.lro_timediff
+#define msg_pri proto_mtag.__pr_u.tcp.tm_tcp.__msgattr.pri
+#define msg_seq proto_mtag.__pr_u.tcp.tm_tcp.__msgattr.seq
+};
+
+/*
+ * MPTCP mbuf tag
+ */
+struct mptcp_pktinfo {
+ u_int64_t mtpi_dsn; /* MPTCP Data Sequence Number */
+ union {
+ u_int64_t mtpi_dan; /* MPTCP Data Ack Number */
+ struct {
+ u_int32_t mtpi_rel_seq; /* Relative Seq Number */
+ u_int32_t mtpi_length; /* Length of mapping */
+ } mtpi_subf;
+ };
+#define mp_dsn proto_mtag.__pr_u.tcp.tm_mptcp.mtpi_dsn
+#define mp_rseq proto_mtag.__pr_u.tcp.tm_mptcp.mtpi_subf.mtpi_rel_seq
+#define mp_rlen proto_mtag.__pr_u.tcp.tm_mptcp.mtpi_subf.mtpi_length
+#define mp_dack proto_mtag.__pr_u.tcp.tm_mptcp.mtpi_subf.mtpi_dan
+};
+
+/*
+ * TCP specific mbuf tag. Note that the current implementation uses
+ * MPTCP metadata strictly between MPTCP and the TCP subflow layers,
+ * hence tm_tcp and tm_mptcp are mutually exclusive. This also means
+ * that TCP messages functionality is currently incompatible with MPTCP.
+ */
+struct tcp_mtag {
+ union {
+ struct tcp_pktinfo tm_tcp; /* TCP and below */
+ struct mptcp_pktinfo tm_mptcp; /* MPTCP-TCP only */
+ };
+};
+
+/*
+ * Protocol specific mbuf tag (at most one protocol metadata per mbuf).
+ *
+ * Care must be taken to ensure that they are mutually exclusive, e.g.
+ * IPSec policy ID implies no TCP segment offload (which is fine given
+ * that the former is used on the virtual ipsec interface that does
+ * not advertise the TSO capability.)
+ */
+struct proto_mtag {
+ union {
+ struct tcp_mtag tcp; /* TCP specific */
+ } __pr_u;
+};
+
+/*
+ * NECP specific mbuf tag.
+ */
+struct necp_mtag {
+ uint32_t necp_policy_id;
+ uint32_t necp_last_interface_index;
+};
+
+/*
+ * Record/packet header in first mbuf of chain; valid only if M_PKTHDR set.
+ */
struct pkthdr {
- int len; /* total packet length */
struct ifnet *rcvif; /* rcv interface */
-
/* variables for ip and tcp reassembly */
- void *header; /* pointer to packet header */
+ void *pkt_hdr; /* pointer to packet header */
+ int32_t len; /* total packet length */
/* variables for hardware checksum */
/* Note: csum_flags is used for hardware checksum and VLAN */
- int csum_flags; /* flags regarding checksum */
- int csum_data; /* data field used by csum routines */
- u_int tso_segsz; /* TSO segment size (actual MSS) */
- u_short vlan_tag; /* VLAN tag, host byte order */
- u_short socket_id; /* socket id */
- SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */
-#if PF_PKTHDR
+ u_int32_t csum_flags; /* flags regarding checksum */
+ union {
+ struct {
+ u_int16_t val; /* checksum value */
+ u_int16_t start; /* checksum start offset */
+ } _csum_rx;
+#define csum_rx_val _csum_rx.val
+#define csum_rx_start _csum_rx.start
+ struct {
+ u_int16_t start; /* checksum start offset */
+ u_int16_t stuff; /* checksum stuff offset */
+ } _csum_tx;
+#define csum_tx_start _csum_tx.start
+#define csum_tx_stuff _csum_tx.stuff
+ u_int32_t csum_data; /* data field used by csum routines */
+ };
+ u_int16_t vlan_tag; /* VLAN tag, host byte order */
+ /*
+ * Packet classifier info
+ *
+ * PKTF_FLOW_ID set means valid flow ID. A non-zero flow ID value
+ * means the packet has been classified by one of the flow sources.
+ * It is also a prerequisite for flow control advisory, which is
+ * enabled by additionally setting PKTF_FLOW_ADV.
+ *
+ * The protocol value is a best-effort representation of the payload.
+ * It is opportunistically updated and used only for optimization.
+ * It is not a substitute for parsing the protocol header(s); use it
+ * only as a hint.
+ *
+ * If PKTF_IFAINFO is set, pkt_ifainfo contains one or both of the
+ * indices of interfaces which own the source and/or destination
+ * addresses of the packet. For the local/loopback case (PKTF_LOOP),
+ * both should be valid, and thus allows for the receiving end to
+ * quickly determine the actual interfaces used by the the addresses;
+ * they may not necessarily be the same or refer to the loopback
+ * interface. Otherwise, in the non-local/loopback case, the indices
+ * are opportunistically set, and because of that only one may be set
+ * (0 means the index has not been determined.) In addition, the
+ * interface address flags are also recorded. This allows us to avoid
+ * storing the corresponding {in,in6}_ifaddr in an mbuf tag. Ideally
+ * this would be a superset of {ia,ia6}_flags, but the namespaces are
+ * overlapping at present, so we'll need a new set of values in future
+ * to achieve this. For now, we will just rely on the address family
+ * related code paths examining this mbuf to interpret the flags.
+ */
+ u_int8_t pkt_proto; /* IPPROTO value */
+ u_int8_t pkt_flowsrc; /* FLOWSRC values */
+ u_int32_t pkt_flowid; /* flow ID */
+ u_int32_t pkt_flags; /* PKTF flags (see below) */
+ u_int32_t pkt_svc; /* MBUF_SVC value */
+ union {
+ struct {
+ u_int16_t src; /* ifindex of src addr i/f */
+ u_int16_t src_flags; /* src PKT_IFAIFF flags */
+ u_int16_t dst; /* ifindex of dst addr i/f */
+ u_int16_t dst_flags; /* dst PKT_IFAIFF flags */
+ } _pkt_iaif;
+#define src_ifindex _pkt_iaif.src
+#define src_iff _pkt_iaif.src_flags
+#define dst_ifindex _pkt_iaif.dst
+#define dst_iff _pkt_iaif.dst_flags
+ u_int64_t pkt_ifainfo; /* data field used by ifainfo */
+ };
+#if MEASURE_BW
+ u_int64_t pkt_bwseq; /* sequence # */
+#endif /* MEASURE_BW */
+ u_int64_t pkt_enqueue_ts; /* enqueue time */
+ /*
+ * Tags (external and built-in)
+ */
+ SLIST_HEAD(packet_tags, m_tag) tags; /* list of external tags */
+ struct proto_mtag proto_mtag; /* built-in protocol-specific tag */
+ struct pf_mtag pf_mtag; /* built-in PF tag */
+ struct necp_mtag necp_mtag; /* built-in NECP tag */
/*
- * Be careful; {en,dis}abling PF_PKTHDR will require xnu recompile;
- * private code outside of xnu must use mbuf_get_mhlen() instead
- * of MHLEN.
+ * Module private scratch space (32-bit aligned), currently 16-bytes
+ * large. Anything stored here is not guaranteed to survive across
+ * modules. This should be the penultimate structure right before
+ * the red zone. Add new fields above this.
*/
- struct pf_mtag pf_mtag;
-#endif /* PF_PKTHDR */
- u_int32_t prio; /* packet priority */
- u_short vt_nrecs; /* # of IGMPv3 records in this chain */
- u_short _pad;
+ struct {
+ union {
+ u_int8_t __mpriv8[16];
+ u_int16_t __mpriv16[8];
+ struct {
+ union {
+ u_int8_t __val8[4];
+ u_int16_t __val16[2];
+ u_int32_t __val32;
+ } __mpriv32_u;
+ } __mpriv32[4];
+ u_int64_t __mpriv64[2];
+ } __mpriv_u;
+ } pkt_mpriv __attribute__((aligned(4)));
+ u_int32_t redzone; /* red zone */
};
+/*
+ * Flow data source type. A data source module is responsible for generating
+ * a unique flow ID and associating it to each data flow as pkt_flowid.
+ * This is required for flow control/advisory, as it allows the output queue
+ * to identify the data source object and inform that it can resume its
+ * transmission (in the event it was flow controlled.)
+ */
+#define FLOWSRC_INPCB 1 /* flow ID generated by INPCB */
+#define FLOWSRC_IFNET 2 /* flow ID generated by interface */
+#define FLOWSRC_PF 3 /* flow ID generated by PF */
+
+/*
+ * Packet flags. Unlike m_flags, all packet flags are copied along when
+ * copying m_pkthdr, i.e. no equivalent of M_COPYFLAGS here. These flags
+ * (and other classifier info) will be cleared during DLIL input.
+ *
+ * Some notes about M_LOOP and PKTF_LOOP:
+ *
+ * - M_LOOP flag is overloaded, and its use is discouraged. Historically,
+ * that flag was used by the KAME implementation for allowing certain
+ * certain exceptions to be made in the IP6_EXTHDR_CHECK() logic; this
+ * was originally meant to be set as the packet is looped back to the
+ * system, and in some circumstances temporarily set in ip6_output().
+ * Over time, this flag was used by the pre-output routines to indicate
+ * to the DLIL frameout and output routines, that the packet may be
+ * looped back to the system under the right conditions. In addition,
+ * this is an mbuf flag rather than an mbuf packet header flag.
+ *
+ * - PKTF_LOOP is an mbuf packet header flag, which is set if and only
+ * if the packet was looped back to the system. This flag should be
+ * used instead for newer code.
+ */
+#define PKTF_FLOW_ID 0x1 /* pkt has valid flowid value */
+#define PKTF_FLOW_ADV 0x2 /* pkt triggers local flow advisory */
+#define PKTF_FLOW_LOCALSRC 0x4 /* pkt is locally originated */
+#define PKTF_FLOW_RAWSOCK 0x8 /* pkt locally generated by raw sock */
+#define PKTF_PRIO_PRIVILEGED 0x10 /* packet priority is privileged */
+#define PKTF_PROXY_DST 0x20 /* processed but not locally destined */
+#define PKTF_INET_RESOLVE 0x40 /* IPv4 resolver packet */
+#define PKTF_INET6_RESOLVE 0x80 /* IPv6 resolver packet */
+#define PKTF_RESOLVE_RTR 0x100 /* pkt is for resolving router */
+#define PKTF_SW_LRO_PKT 0x200 /* pkt is a large coalesced pkt */
+#define PKTF_SW_LRO_DID_CSUM 0x400 /* IP and TCP checksums done by LRO */
+#define PKTF_MPTCP 0x800 /* TCP with MPTCP metadata */
+#define PKTF_MPSO 0x1000 /* MPTCP socket meta data */
+#define PKTF_LOOP 0x2000 /* loopbacked packet */
+#define PKTF_IFAINFO 0x4000 /* pkt has valid interface addr info */
+#define PKTF_SO_BACKGROUND 0x8000 /* data is from background source */
+#define PKTF_FORWARDED 0x10000 /* pkt was forwarded from another i/f */
+#define PKTF_PRIV_GUARDED 0x20000 /* pkt_mpriv area guard enabled */
+#define PKTF_KEEPALIVE 0x40000 /* pkt is kernel-generated keepalive */
+/* flags related to flow control/advisory and identification */
+#define PKTF_FLOW_MASK \
+ (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC | PKTF_FLOW_RAWSOCK)
-/* description of external storage mapped into mbuf, valid if M_EXT set */
+/*
+ * Description of external storage mapped into mbuf, valid only if M_EXT set.
+ */
struct m_ext {
caddr_t ext_buf; /* start of buffer */
- void (*ext_free)(caddr_t, u_int, caddr_t); /* free routine if not the usual */
+ void (*ext_free) /* free routine if not the usual */
+ (caddr_t, u_int, caddr_t);
u_int ext_size; /* size of buffer, for ext_free */
caddr_t ext_arg; /* additional ext_free argument */
struct ext_refsq { /* references held */
/* define m_ext to a type since it gets redefined below */
typedef struct m_ext _m_ext_t;
+/*
+ * The mbuf object
+ */
struct mbuf {
struct m_hdr m_hdr;
union {
#define m_ext M_dat.MH.MH_dat.MH_ext
#define m_pktdat M_dat.MH.MH_dat.MH_databuf
#define m_dat M_dat.M_databuf
+#define m_pktlen(_m) ((_m)->m_pkthdr.len)
+#define m_pftag(_m) (&(_m)->m_pkthdr.pf_mtag)
/* mbuf flags (private) */
#define M_EXT 0x0001 /* has associated external storage */
#define M_PROTO1 0x0008 /* protocol-specific */
#define M_PROTO2 0x0010 /* protocol-specific */
#define M_PROTO3 0x0020 /* protocol-specific */
-#define M_LOOP 0x0040 /* packet is looped back */
+#define M_LOOP 0x0040 /* packet is looped back (also see PKTF_LOOP) */
#define M_PROTO5 0x0080 /* protocol-specific */
/* mbuf pkthdr flags, also in m_flags (private) */
M_LOOP|M_PROTO5|M_BCAST|M_MCAST|M_FRAG | \
M_FIRSTFRAG|M_LASTFRAG|M_PROMISC|M_HASFCS)
-/* flags indicating hw checksum support and sw checksum requirements [freebsd4.1] */
+/* flags indicating hw checksum support and sw checksum requirements */
#define CSUM_IP 0x0001 /* will csum IP */
#define CSUM_TCP 0x0002 /* will csum TCP */
#define CSUM_UDP 0x0004 /* will csum UDP */
#define CSUM_IP_VALID 0x0200 /* ... the csum is valid */
#define CSUM_DATA_VALID 0x0400 /* csum_data field is valid */
#define CSUM_PSEUDO_HDR 0x0800 /* csum_data has pseudo hdr */
-#define CSUM_TCP_SUM16 0x1000 /* simple TCP Sum16 computation */
+#define CSUM_PARTIAL 0x1000 /* simple Sum16 computation */
#define CSUM_DELAY_DATA (CSUM_TCP | CSUM_UDP)
#define CSUM_DELAY_IP (CSUM_IP) /* IPv4 only: no IPv6 IP cksum */
#define CSUM_DELAY_IPV6_DATA (CSUM_TCPIPV6 | CSUM_UDPIPV6)
#define CSUM_DATA_IPV6_VALID CSUM_DATA_VALID /* csum_data field is valid */
+
+#define CSUM_TX_FLAGS \
+ (CSUM_DELAY_IP | CSUM_DELAY_DATA | CSUM_DELAY_IPV6_DATA | \
+ CSUM_DATA_VALID | CSUM_PARTIAL)
+
+#define CSUM_RX_FLAGS \
+ (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_PSEUDO_HDR | \
+ CSUM_DATA_VALID | CSUM_PARTIAL)
+
/*
* Note: see also IF_HWASSIST_CSUM defined in <net/if_var.h>
*/
-/* bottom 16 bits reserved for hardware checksum */
-#define CSUM_CHECKSUM_MASK 0xffff
/* VLAN tag present */
#define CSUM_VLAN_TAG_VALID 0x10000 /* vlan_tag field is valid */
/* TCP Segment Offloading requested on this mbuf */
#define CSUM_TSO_IPV4 0x100000 /* This mbuf needs to be segmented by the NIC */
#define CSUM_TSO_IPV6 0x200000 /* This mbuf needs to be segmented by the NIC */
+
+#define TSO_IPV4_OK(_ifp, _m) \
+ (((_ifp)->if_hwassist & IFNET_TSO_IPV4) && \
+ ((_m)->m_pkthdr.csum_flags & CSUM_TSO_IPV4)) \
+
+#define TSO_IPV4_NOTOK(_ifp, _m) \
+ (!((_ifp)->if_hwassist & IFNET_TSO_IPV4) && \
+ ((_m)->m_pkthdr.csum_flags & CSUM_TSO_IPV4)) \
+
+#define TSO_IPV6_OK(_ifp, _m) \
+ (((_ifp)->if_hwassist & IFNET_TSO_IPV6) && \
+ ((_m)->m_pkthdr.csum_flags & CSUM_TSO_IPV6)) \
+
+#define TSO_IPV6_NOTOK(_ifp, _m) \
+ (!((_ifp)->if_hwassist & IFNET_TSO_IPV6) && \
+ ((_m)->m_pkthdr.csum_flags & CSUM_TSO_IPV6)) \
+
#endif /* XNU_KERNEL_PRIVATE */
/* mbuf types */
*/
#define M_COPY_PKTHDR(to, from) m_copy_pkthdr(to, from)
+#define M_COPY_PFTAG(to, from) m_copy_pftag(to, from)
+
+#define M_COPY_CLASSIFIER(to, from) m_copy_classifier(to, from)
+
/*
* Set the m_data pointer of a newly-allocated mbuf (m_get/MGET) to place
* an object of the specified size at the end of the mbuf, longword aligned.
/*
* M_STRUCT_GET ensures that intermediate protocol header (from "off" to
- * "len") is located in single mbuf, on contiguous memory region.
+ * "off+len") is located in single mbuf, on contiguous memory region.
* The pointer to the region will be returned to pointer variable "val",
* with type "typ".
*
do { \
struct mbuf *t; \
\
- if ((off) == 0) { \
- (val) = (typ)mtod(m, caddr_t); \
+ if ((off) == 0 && ((m)->m_len >= (len))) { \
+ (val) = (typ)(void *)mtod(m, caddr_t); \
} else { \
t = m_pulldown((m), (off), (len), NULL); \
if (t != NULL) { \
if (t->m_len < (len)) \
panic("m_pulldown malfunction"); \
- (val) = (typ)mtod(t, caddr_t); \
+ (val) = (typ)(void *)mtod(t, caddr_t); \
} else { \
(val) = (typ)NULL; \
(m) = NULL; \
m->m_len > ((njcl > 0) ? njclbytes : MBIGCLBYTES) || \
m->m_type == MT_FREE || \
((m->m_flags & M_EXT) != 0 && m->m_ext.ext_buf == NULL)) { \
- panic("Failed mbuf validity check: mbuf %p len %d " \
- "type %d flags 0x%x data %p rcvif %s%d ifflags 0x%x", \
+ panic_plain("Failed mbuf validity check: mbuf %p len %d " \
+ "type %d flags 0x%x data %p rcvif %s ifflags 0x%x", \
m, m->m_len, m->m_type, m->m_flags, \
((m->m_flags & M_EXT) ? m->m_ext.ext_buf : m->m_data), \
- rcvif->if_name, rcvif->if_unit, \
+ if_name(rcvif), \
(rcvif->if_flags & 0xffff)); \
} \
} while (0)
+/*
+ * Simple mbuf queueing system
+ *
+ * This is basically a SIMPLEQ adapted to mbuf use (i.e. using
+ * m_nextpkt instead of field.sqe_next).
+ *
+ * m_next is ignored, so queueing chains of mbufs is possible
+ */
+#define MBUFQ_HEAD(name) \
+struct name { \
+ struct mbuf *mq_first; /* first packet */ \
+ struct mbuf **mq_last; /* addr of last next packet */ \
+}
+
+#define MBUFQ_INIT(q) do { \
+ MBUFQ_FIRST(q) = NULL; \
+ (q)->mq_last = &MBUFQ_FIRST(q); \
+} while (0)
+
+#define MBUFQ_PREPEND(q, m) do { \
+ if ((MBUFQ_NEXT(m) = MBUFQ_FIRST(q)) == NULL) \
+ (q)->mq_last = &MBUFQ_NEXT(m); \
+ MBUFQ_FIRST(q) = (m); \
+} while (0)
+
+#define MBUFQ_ENQUEUE(q, m) do { \
+ MBUFQ_NEXT(m) = NULL; \
+ *(q)->mq_last = (m); \
+ (q)->mq_last = &MBUFQ_NEXT(m); \
+} while (0)
+
+#define MBUFQ_ENQUEUE_MULTI(q, m, n) do { \
+ MBUFQ_NEXT(n) = NULL; \
+ *(q)->mq_last = (m); \
+ (q)->mq_last = &MBUFQ_NEXT(n); \
+} while (0)
+
+#define MBUFQ_DEQUEUE(q, m) do { \
+ if (((m) = MBUFQ_FIRST(q)) != NULL) { \
+ if ((MBUFQ_FIRST(q) = MBUFQ_NEXT(m)) == NULL) \
+ (q)->mq_last = &MBUFQ_FIRST(q); \
+ else \
+ MBUFQ_NEXT(m) = NULL; \
+ } \
+} while (0)
+
+#define MBUFQ_REMOVE(q, m) do { \
+ if (MBUFQ_FIRST(q) == (m)) { \
+ MBUFQ_DEQUEUE(q, m); \
+ } else { \
+ struct mbuf *_m = MBUFQ_FIRST(q); \
+ while (MBUFQ_NEXT(_m) != (m)) \
+ _m = MBUFQ_NEXT(_m); \
+ if ((MBUFQ_NEXT(_m) = \
+ MBUFQ_NEXT(MBUFQ_NEXT(_m))) == NULL) \
+ (q)->mq_last = &MBUFQ_NEXT(_m); \
+ } \
+} while (0)
+
+#define MBUFQ_DRAIN(q) do { \
+ struct mbuf *__m0; \
+ while ((__m0 = MBUFQ_FIRST(q)) != NULL) { \
+ MBUFQ_FIRST(q) = MBUFQ_NEXT(__m0); \
+ MBUFQ_NEXT(__m0) = NULL; \
+ m_freem(__m0); \
+ } \
+ (q)->mq_last = &MBUFQ_FIRST(q); \
+} while (0)
+
+#define MBUFQ_FOREACH(m, q) \
+ for ((m) = MBUFQ_FIRST(q); \
+ (m); \
+ (m) = MBUFQ_NEXT(m))
+
+#define MBUFQ_FOREACH_SAFE(m, q, tvar) \
+ for ((m) = MBUFQ_FIRST(q); \
+ (m) && ((tvar) = MBUFQ_NEXT(m), 1); \
+ (m) = (tvar))
+
+#define MBUFQ_EMPTY(q) ((q)->mq_first == NULL)
+#define MBUFQ_FIRST(q) ((q)->mq_first)
+#define MBUFQ_NEXT(m) ((m)->m_nextpkt)
+#define MBUFQ_LAST(q) (*(q)->mq_last)
+
+#define max_linkhdr P2ROUNDUP(_max_linkhdr, sizeof (u_int32_t))
+#define max_protohdr P2ROUNDUP(_max_protohdr, sizeof (u_int32_t))
#endif /* XNU_KERNEL_PRIVATE */
/*
u_int64_t mbcl_purge_cnt; /* # of purges so far */
u_int64_t mbcl_fail_cnt; /* # of allocation failures */
u_int32_t mbcl_ctotal; /* total only for this class */
+ u_int32_t mbcl_release_cnt; /* amount of memory returned */
/*
* Cache layer statistics
*/
u_int64_t mbcl_purge_cnt; /* # of purges so far */
u_int64_t mbcl_fail_cnt; /* # of allocation failures */
u_int32_t mbcl_ctotal; /* total only for this class */
+ u_int32_t mbcl_release_cnt; /* amount of memory returned */
/*
* Cache layer statistics
*/
u_int32_t mbcl_mc_waiter_cnt; /* # waiters on the cache */
u_int32_t mbcl_mc_wretry_cnt; /* # of wait retries */
u_int32_t mbcl_mc_nwretry_cnt; /* # of no-wait retry attempts */
- u_int64_t mbcl_reserved[4]; /* for future use */
+ u_int32_t mbcl_peak_reported; /* last usage peak reported */
+ u_int32_t mbcl_reserved[7]; /* for future use */
} mb_class_stat_t;
#define MCS_DISABLED 0 /* cache is permanently disabled */
#define M_DONTWAIT M_NOWAIT
#define M_WAIT M_WAITOK
+/* modes for m_copym and variants */
+#define M_COPYM_NOOP_HDR 0 /* don't copy/move pkthdr contents */
+#define M_COPYM_COPY_HDR 1 /* copy pkthdr from old to new */
+#define M_COPYM_MOVE_HDR 2 /* move pkthdr from old to new */
+#define M_COPYM_MUST_COPY_HDR 3 /* MUST copy pkthdr from old to new */
+#define M_COPYM_MUST_MOVE_HDR 4 /* MUST move pkthdr from old to new */
+
/*
* These macros are mapped to the appropriate KPIs, so that private code
* can be simply recompiled in order to be forward-compatible with future
#define MINCLSIZE mbuf_get_minclsize() /* cluster usage threshold */
extern void m_freem(struct mbuf *);
-extern char *mcl_to_paddr(char *);
+extern u_int64_t mcl_to_paddr(char *);
extern void m_adj(struct mbuf *, int);
extern void m_cat(struct mbuf *, struct mbuf *);
extern void m_copydata(struct mbuf *, int, int, void *);
extern struct mbuf *m_copym(struct mbuf *, int, int, int);
+extern struct mbuf *m_copym_mode(struct mbuf *, int, int, int, uint32_t);
extern struct mbuf *m_get(int, int);
extern struct mbuf *m_gethdr(int, int);
extern struct mbuf *m_getpacket(void);
extern struct mbuf *m_split(struct mbuf *, int, int);
extern void m_mclfree(caddr_t p);
-__private_extern__ union mbigcluster *mbutl; /* start VA of mbuf pool */
-__private_extern__ union mbigcluster *embutl; /* end VA of mbuf pool */
-__private_extern__ unsigned int nmbclusters; /* number of mapped clusters */
-__private_extern__ int njcl; /* # of jumbo clusters */
-__private_extern__ int njclbytes; /* size of a jumbo cluster */
-__private_extern__ int max_linkhdr; /* largest link-level header */
-__private_extern__ int max_protohdr; /* largest protocol header */
-__private_extern__ int max_hdr; /* largest link+protocol header */
-__private_extern__ int max_datalen; /* MHLEN - max_hdr */
+/*
+ * On platforms which require strict alignment (currently for anything but
+ * i386 or x86_64), this macro checks whether the data pointer of an mbuf
+ * is 32-bit aligned (this is the expected minimum alignment for protocol
+ * headers), and assert otherwise.
+ */
+#if defined(__i386__) || defined(__x86_64__)
+#define MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(_m)
+#else /* !__i386__ && !__x86_64__ */
+#define MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(_m) do { \
+ if (!IS_P2ALIGNED((_m)->m_data, sizeof (u_int32_t))) { \
+ if (((_m)->m_flags & M_PKTHDR) && \
+ (_m)->m_pkthdr.rcvif != NULL) { \
+ panic_plain("\n%s: mbuf %p data ptr %p is not " \
+ "32-bit aligned [%s: alignerrs=%lld]\n", \
+ __func__, (_m), (_m)->m_data, \
+ if_name((_m)->m_pkthdr.rcvif), \
+ (_m)->m_pkthdr.rcvif->if_alignerrs); \
+ } else { \
+ panic_plain("\n%s: mbuf %p data ptr %p is not " \
+ "32-bit aligned\n", \
+ __func__, (_m), (_m)->m_data); \
+ } \
+ } \
+} while (0)
+#endif /* !__i386__ && !__x86_64__ */
+
+/* Maximum number of MBUF_SC values (excluding MBUF_SC_UNSPEC) */
+#define MBUF_SC_MAX_CLASSES 10
+
+/*
+ * These conversion macros rely on the corresponding MBUF_SC and
+ * MBUF_TC values in order to establish the following mapping:
+ *
+ * MBUF_SC_BK_SYS ] ==> MBUF_TC_BK
+ * MBUF_SC_BK ]
+ *
+ * MBUF_SC_BE ] ==> MBUF_TC_BE
+ * MBUF_SC_RD ]
+ * MBUF_SC_OAM ]
+ *
+ * MBUF_SC_AV ] ==> MBUF_TC_VI
+ * MBUF_SC_RV ]
+ * MBUF_SC_VI ]
+ *
+ * MBUF_SC_VO ] ==> MBUF_TC_VO
+ * MBUF_SC_CTL ]
+ *
+ * The values assigned to each service class allows for a fast mapping to
+ * the corresponding MBUF_TC traffic class values, as well as to retrieve the
+ * assigned index; therefore care must be taken when comparing against these
+ * values. Use the corresponding class and index macros to retrieve the
+ * corresponding portion, and never assume that a higher class corresponds
+ * to a higher index.
+ */
+#define MBUF_SCVAL(x) ((x) & 0xffff)
+#define MBUF_SCIDX(x) ((((x) >> 16) & 0xff) >> 3)
+#define MBUF_SC2TC(_sc) (MBUF_SCVAL(_sc) >> 7)
+#define MBUF_TC2SCVAL(_tc) ((_tc) << 7)
+#define IS_MBUF_SC_BACKGROUND(_sc) (((_sc) == MBUF_SC_BK_SYS) || \
+ ((_sc) == MBUF_SC_BK))
+
+#define SCIDX_BK_SYS MBUF_SCIDX(MBUF_SC_BK_SYS)
+#define SCIDX_BK MBUF_SCIDX(MBUF_SC_BK)
+#define SCIDX_BE MBUF_SCIDX(MBUF_SC_BE)
+#define SCIDX_RD MBUF_SCIDX(MBUF_SC_RD)
+#define SCIDX_OAM MBUF_SCIDX(MBUF_SC_OAM)
+#define SCIDX_AV MBUF_SCIDX(MBUF_SC_AV)
+#define SCIDX_RV MBUF_SCIDX(MBUF_SC_RV)
+#define SCIDX_VI MBUF_SCIDX(MBUF_SC_VI)
+#define SCIDX_VO MBUF_SCIDX(MBUF_SC_VO)
+#define SCIDX_CTL MBUF_SCIDX(MBUF_SC_CTL)
+
+#define SCVAL_BK_SYS MBUF_SCVAL(MBUF_SC_BK_SYS)
+#define SCVAL_BK MBUF_SCVAL(MBUF_SC_BK)
+#define SCVAL_BE MBUF_SCVAL(MBUF_SC_BE)
+#define SCVAL_RD MBUF_SCVAL(MBUF_SC_RD)
+#define SCVAL_OAM MBUF_SCVAL(MBUF_SC_OAM)
+#define SCVAL_AV MBUF_SCVAL(MBUF_SC_AV)
+#define SCVAL_RV MBUF_SCVAL(MBUF_SC_RV)
+#define SCVAL_VI MBUF_SCVAL(MBUF_SC_VI)
+#define SCVAL_VO MBUF_SCVAL(MBUF_SC_VO)
+#define SCVAL_CTL MBUF_SCVAL(MBUF_SC_CTL)
+
+#define MBUF_VALID_SC(c) \
+ (c == MBUF_SC_BK_SYS || c == MBUF_SC_BK || c == MBUF_SC_BE || \
+ c == MBUF_SC_RD || c == MBUF_SC_OAM || c == MBUF_SC_AV || \
+ c == MBUF_SC_RV || c == MBUF_SC_VI || c == MBUF_SC_VO || \
+ c == MBUF_SC_CTL)
+
+#define MBUF_VALID_SCIDX(c) \
+ (c == SCIDX_BK_SYS || c == SCIDX_BK || c == SCIDX_BE || \
+ c == SCIDX_RD || c == SCIDX_OAM || c == SCIDX_AV || \
+ c == SCIDX_RV || c == SCIDX_VI || c == SCIDX_VO || \
+ c == SCIDX_CTL)
+
+#define MBUF_VALID_SCVAL(c) \
+ (c == SCVAL_BK_SYS || c == SCVAL_BK || c == SCVAL_BE || \
+ c == SCVAL_RD || c == SCVAL_OAM || c == SCVAL_AV || \
+ c == SCVAL_RV || c == SCVAL_VI || c == SCVAL_VO || \
+ c == SCVAL_CTL)
+
+extern union mbigcluster *mbutl; /* start VA of mbuf pool */
+extern union mbigcluster *embutl; /* end VA of mbuf pool */
+extern unsigned int nmbclusters; /* number of mapped clusters */
+extern int njcl; /* # of jumbo clusters */
+extern int njclbytes; /* size of a jumbo cluster */
+extern int max_hdr; /* largest link+protocol header */
+extern int max_datalen; /* MHLEN - max_hdr */
+
+/* Use max_linkhdr instead of _max_linkhdr */
+extern int _max_linkhdr; /* largest link-level header */
+
+/* Use max_protohdr instead of _max_protohdr */
+extern int _max_protohdr; /* largest protocol header */
__private_extern__ unsigned int mbuf_default_ncl(int, u_int64_t);
__private_extern__ void mbinit(void);
__private_extern__ caddr_t m_16kalloc(int);
__private_extern__ void m_16kfree(caddr_t, u_int, caddr_t);
__private_extern__ struct mbuf *m_m16kget(struct mbuf *, int);
-
+__private_extern__ int m_reinit(struct mbuf *, int);
__private_extern__ struct mbuf *m_free(struct mbuf *);
__private_extern__ struct mbuf *m_getclr(int, int);
__private_extern__ struct mbuf *m_getptr(struct mbuf *, int, int *);
__private_extern__ unsigned int m_length(struct mbuf *);
+__private_extern__ unsigned int m_length2(struct mbuf *, struct mbuf **);
+__private_extern__ unsigned int m_fixhdr(struct mbuf *);
+__private_extern__ struct mbuf *m_defrag(struct mbuf *, int);
+__private_extern__ struct mbuf *m_defrag_offset(struct mbuf *, u_int32_t, int);
__private_extern__ struct mbuf *m_prepend(struct mbuf *, int, int);
__private_extern__ struct mbuf *m_copyup(struct mbuf *, int, int);
__private_extern__ struct mbuf *m_retry(int, int);
__private_extern__ caddr_t m_mclalloc(int);
__private_extern__ int m_mclhasreference(struct mbuf *);
__private_extern__ void m_copy_pkthdr(struct mbuf *, struct mbuf *);
+__private_extern__ void m_copy_pftag(struct mbuf *, struct mbuf *);
+__private_extern__ void m_copy_classifier(struct mbuf *, struct mbuf *);
__private_extern__ struct mbuf *m_dtom(void *);
__private_extern__ int m_mtocl(void *);
__private_extern__ int m_makewritable(struct mbuf **, int, int, int);
__private_extern__ struct mbuf *m_dup(struct mbuf *m, int how);
__private_extern__ struct mbuf *m_copym_with_hdrs(struct mbuf *, int, int, int,
- struct mbuf **, int *);
+ struct mbuf **, int *, uint32_t);
__private_extern__ struct mbuf *m_getpackethdrs(int, int);
__private_extern__ struct mbuf *m_getpacket_how(int);
__private_extern__ struct mbuf *m_getpackets_internal(unsigned int *, int,
__private_extern__ struct mbuf *m_allocpacket_internal(unsigned int *, size_t,
unsigned int *, int, int, size_t);
+__private_extern__ void m_drain(void);
+
/*
* Packets may have annotations attached by affixing a list of "packet
* tags" to the pkthdr structure. Packet tags are dynamically allocated
KERNEL_TAG_TYPE_ENCAP = 8,
KERNEL_TAG_TYPE_INET6 = 9,
KERNEL_TAG_TYPE_IPSEC = 10,
- KERNEL_TAG_TYPE_PF = 11
+ KERNEL_TAG_TYPE_DRVAUX = 11,
};
/* Packet tag routines */
u_int16_t, struct m_tag *);
__private_extern__ struct m_tag *m_tag_copy(struct m_tag *, int);
__private_extern__ int m_tag_copy_chain(struct mbuf *, struct mbuf *, int);
-__private_extern__ void m_tag_init(struct mbuf *);
+__private_extern__ void m_tag_init(struct mbuf *, int);
__private_extern__ struct m_tag *m_tag_first(struct mbuf *);
__private_extern__ struct m_tag *m_tag_next(struct mbuf *, struct m_tag *);
-__private_extern__ void m_prio_init(struct mbuf *);
-
__END_DECLS
#endif /* XNU_KERNEL_PRIVATE */
#ifdef KERNEL
#include <sys/kpi_mbuf.h>
+#ifdef XNU_KERNEL_PRIVATE
+__BEGIN_DECLS
+
+__private_extern__ void m_scratch_init(struct mbuf *);
+__private_extern__ u_int32_t m_scratch_get(struct mbuf *, u_int8_t **);
+
+__private_extern__ void m_classifier_init(struct mbuf *, uint32_t);
+
+__private_extern__ int m_set_service_class(struct mbuf *, mbuf_svc_class_t);
+__private_extern__ mbuf_svc_class_t m_get_service_class(struct mbuf *);
+__private_extern__ mbuf_svc_class_t m_service_class_from_idx(u_int32_t);
+__private_extern__ mbuf_svc_class_t m_service_class_from_val(u_int32_t);
+__private_extern__ int m_set_traffic_class(struct mbuf *, mbuf_traffic_class_t);
+__private_extern__ mbuf_traffic_class_t m_get_traffic_class(struct mbuf *);
+
+#define ADDCARRY(_x) do { \
+ while (((_x) >> 16) != 0) \
+ (_x) = ((_x) >> 16) + ((_x) & 0xffff); \
+} while (0)
+
+__private_extern__ u_int16_t m_adj_sum16(struct mbuf *, u_int32_t,
+ u_int32_t, u_int32_t);
+__private_extern__ u_int16_t m_sum16(struct mbuf *, u_int32_t, u_int32_t);
+
+__END_DECLS
+#endif /* XNU_KERNEL_PRIVATE */
#endif /* KERNEL */
#endif /* !_SYS_MBUF_H_ */