X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/316670eb35587141e969394ae8537d66b9211e80..ecc0ceb4089d506a0b8d16686a95817b331af9cb:/bsd/sys/mbuf.h diff --git a/bsd/sys/mbuf.h b/bsd/sys/mbuf.h index e3ad20c4a..b5b7ee802 100644 --- a/bsd/sys/mbuf.h +++ b/bsd/sys/mbuf.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2011 Apple Inc. All rights reserved. + * Copyright (c) 1999-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -86,7 +86,7 @@ #include #include - +#include /* * Mbufs are of a single size, MSIZE (machine/param.h), which * includes overhead. An mbuf may add a single "mbuf cluster" of size @@ -103,18 +103,27 @@ #define _MLEN (MSIZE - sizeof(struct m_hdr)) /* normal data len */ #define _MHLEN (_MLEN - sizeof(struct pkthdr)) /* data len w/pkthdr */ -#define NMBPBGSHIFT (MBIGCLSHIFT - MSIZESHIFT) -#define NMBPBG (1 << NMBPBGSHIFT) /* # of mbufs per big cl */ +#define NMBPGSHIFT (PAGE_SHIFT - MSIZESHIFT) +#define NMBPG (1 << NMBPGSHIFT) /* # of mbufs per page */ -#define NCLPBGSHIFT (MBIGCLSHIFT - MCLSHIFT) -#define NCLPBG (1 << NCLPBGSHIFT) /* # of cl per big cl */ +#define NCLPGSHIFT (PAGE_SHIFT - MCLSHIFT) +#define NCLPG (1 << NCLPGSHIFT) /* # of cl per page */ + +#define NBCLPGSHIFT (PAGE_SHIFT - MBIGCLSHIFT) +#define NBCLPG (1 << NBCLPGSHIFT) /* # of big cl per page */ -#define NMBPCLSHIFT (NMBPBGSHIFT - NCLPBGSHIFT) +#define NMBPCLSHIFT (MCLSHIFT - MSIZESHIFT) #define NMBPCL (1 << NMBPCLSHIFT) /* # of mbufs per cl */ -#define NCLPJCLSHIFT ((M16KCLSHIFT - MBIGCLSHIFT) + NCLPBGSHIFT) +#define NCLPJCLSHIFT (M16KCLSHIFT - MCLSHIFT) #define NCLPJCL (1 << NCLPJCLSHIFT) /* # of cl per jumbo cl */ +#define NCLPBGSHIFT (MBIGCLSHIFT - MCLSHIFT) +#define NCLPBG (1 << NCLPBGSHIFT) /* # of cl per big cl */ + +#define NMBPBGSHIFT (MBIGCLSHIFT - MSIZESHIFT) +#define NMBPBG (1 << NMBPBGSHIFT) /* # of mbufs per big cl */ + /* * Macros for type conversion * mtod(m,t) - convert mbuf pointer to data pointer of correct type @@ -125,12 +134,12 @@ /* header at beginning of each mbuf: */ struct m_hdr { - struct mbuf *mh_next; /* next buffer in chain */ - struct mbuf *mh_nextpkt; /* next chain in queue/record */ - int32_t mh_len; /* amount of data in this mbuf */ - caddr_t mh_data; /* location of data */ - short mh_type; /* type of data in this mbuf */ - short mh_flags; /* flags; see below */ + struct mbuf *mh_next; /* next buffer in chain */ + struct mbuf *mh_nextpkt; /* next chain in queue/record */ + caddr_t mh_data; /* location of data */ + int32_t mh_len; /* amount of data in this mbuf */ + u_int16_t mh_type; /* type of data in this mbuf */ + u_int16_t mh_flags; /* flags; see below */ }; /* @@ -164,82 +173,296 @@ struct m_taghdr { u_int64_t refcnt; /* Number of tags in this mbuf */ }; -/* Values for pftag_flags */ -#define PF_TAG_GENERATED 0x000001 /* pkt generated by PF */ -#define PF_TAG_FRAGCACHE 0x000002 -#define PF_TAG_TRANSLATE_LOCALHOST 0x000004 -#define PF_TAG_FLOWHASH 0x000100 /* valid flowhash value */ -#define PF_TAG_HDR_INET 0x000200 /* hdr points to IPv4 */ -#define PF_TAG_HDR_INET6 0x000400 /* hdr points to IPv6 */ -#define PF_TAG_TCP 0x000800 /* payload is TCP */ -#define PF_TAG_FLOWADV 0x010000 /* local flow advisory */ -#define PF_TAG_QUEUE1 0x100000 /* queue-specific */ - -#define IF_PKTSEQ_SHIFT 4 +/* + * Driver auxiliary metadata tag (KERNEL_TAG_TYPE_DRVAUX). + */ +struct m_drvaux_tag { + u_int32_t da_family; /* IFNET_FAMILY values */ + u_int32_t da_subfamily; /* IFNET_SUBFAMILY values */ + u_int32_t da_reserved; /* for future */ + u_int32_t da_length; /* length of following data */ +}; -/* PF mbuf tag */ +/* Values for pftag_flags (16-bit wide) */ +#define PF_TAG_GENERATED 0x1 /* pkt generated by PF */ +#define PF_TAG_FRAGCACHE 0x2 +#define PF_TAG_TRANSLATE_LOCALHOST 0x4 +#if PF_ECN +#define PF_TAG_HDR_INET 0x8 /* hdr points to IPv4 */ +#define PF_TAG_HDR_INET6 0x10 /* hdr points to IPv6 */ +#endif /* PF_ECN */ +/* + * PF mbuf tag + */ struct pf_mtag { + u_int16_t pftag_flags; /* PF_TAG flags */ + u_int16_t pftag_rtableid; /* alternate routing table id */ + u_int16_t pftag_tag; + u_int16_t pftag_routed; +#if PF_ALTQ + u_int32_t pftag_qid; +#endif /* PF_ALTQ */ +#if PF_ECN void *pftag_hdr; /* saved hdr pos in mbuf, for ECN */ - unsigned int pftag_rtableid; /* alternate routing table id */ +#endif /* PF_ECN */ +}; + +/* + * TCP mbuf tag + */ +struct tcp_pktinfo { union { struct { - u_int32_t qid; - union { - u_int8_t val8[4]; - u_int16_t val16[2]; - u_int32_t val32; - } __qpriv_u; /* for queue-specific use */ - } __pf_data; - u_int64_t pktseq; - } __pfifseq_u; /* Used for pf or interface bandwidth measurement */ -#define pftag_qid __pfifseq_u.__pf_data.qid -#define pftag_qpriv8 __pfifseq_u.__pf_data.__qpriv_u.val8 -#define pftag_qpriv16 __pfifseq_u.__pf_data.__qpriv_u.val16 -#define pftag_qpriv32 __pfifseq_u.__pf_data.__qpriv_u.val32 -#define pftag_pktseq __pfifseq_u.pktseq - u_int32_t pftag_flowhash; - u_int16_t pftag_tag; - u_int16_t pftag_routed; - u_int32_t pftag_flags; /* PF_TAG flags */ + u_int32_t segsz; /* segment size (actual MSS) */ + } __tx; + struct { + u_int16_t lro_pktlen; /* max seg size encountered */ + u_int8_t lro_npkts; /* # of coalesced TCP pkts */ + u_int8_t lro_timediff; /* time spent in LRO */ + } __rx; + } __offload; + union { + u_int32_t pri; /* send msg priority */ + u_int32_t seq; /* recv msg sequence # */ + } __msgattr; +#define tso_segsz proto_mtag.__pr_u.tcp.tm_tcp.__offload.__tx.segsz +#define lro_pktlen proto_mtag.__pr_u.tcp.tm_tcp.__offload.__rx.lro_pktlen +#define lro_npkts proto_mtag.__pr_u.tcp.tm_tcp.__offload.__rx.lro_npkts +#define lro_elapsed proto_mtag.__pr_u.tcp.tm_tcp.__offload.__rx.lro_timediff +#define msg_pri proto_mtag.__pr_u.tcp.tm_tcp.__msgattr.pri +#define msg_seq proto_mtag.__pr_u.tcp.tm_tcp.__msgattr.seq +}; + +/* + * MPTCP mbuf tag + */ +struct mptcp_pktinfo { + u_int64_t mtpi_dsn; /* MPTCP Data Sequence Number */ + union { + u_int64_t mtpi_dan; /* MPTCP Data Ack Number */ + struct { + u_int32_t mtpi_rel_seq; /* Relative Seq Number */ + u_int32_t mtpi_length; /* Length of mapping */ + } mtpi_subf; + }; +#define mp_dsn proto_mtag.__pr_u.tcp.tm_mptcp.mtpi_dsn +#define mp_rseq proto_mtag.__pr_u.tcp.tm_mptcp.mtpi_subf.mtpi_rel_seq +#define mp_rlen proto_mtag.__pr_u.tcp.tm_mptcp.mtpi_subf.mtpi_length +#define mp_dack proto_mtag.__pr_u.tcp.tm_mptcp.mtpi_subf.mtpi_dan }; -/* TCP specific mbuf tag */ +/* + * TCP specific mbuf tag. Note that the current implementation uses + * MPTCP metadata strictly between MPTCP and the TCP subflow layers, + * hence tm_tcp and tm_mptcp are mutually exclusive. This also means + * that TCP messages functionality is currently incompatible with MPTCP. + */ struct tcp_mtag { - u_int tm_tso_segz; /* TSO segment size (actual MSS) */ - u_int16_t tm_pktlen; /* LRO - max segment size encountered */ - u_int16_t tm_npkts; /* LRO - number of coalesced TCP pkts */ + union { + struct tcp_pktinfo tm_tcp; /* TCP and below */ + struct mptcp_pktinfo tm_mptcp; /* MPTCP-TCP only */ + }; +}; + +/* + * Protocol specific mbuf tag (at most one protocol metadata per mbuf). + * + * Care must be taken to ensure that they are mutually exclusive, e.g. + * IPSec policy ID implies no TCP segment offload (which is fine given + * that the former is used on the virtual ipsec interface that does + * not advertise the TSO capability.) + */ +struct proto_mtag { + union { + struct tcp_mtag tcp; /* TCP specific */ + } __pr_u; +}; + +/* + * NECP specific mbuf tag. + */ +struct necp_mtag { + u_int32_t necp_policy_id; + u_int32_t necp_last_interface_index; + u_int32_t necp_route_rule_id; }; -/* record/packet header in first mbuf of chain; valid if M_PKTHDR set */ +/* + * Record/packet header in first mbuf of chain; valid only if M_PKTHDR set. + */ struct pkthdr { - int len; /* total packet length */ struct ifnet *rcvif; /* rcv interface */ - /* variables for ip and tcp reassembly */ - void *header; /* pointer to packet header */ + void *pkt_hdr; /* pointer to packet header */ + int32_t len; /* total packet length */ /* variables for hardware checksum */ /* Note: csum_flags is used for hardware checksum and VLAN */ - int csum_flags; /* flags regarding checksum */ - int csum_data; /* data field used by csum routines */ - u_short vlan_tag; /* VLAN tag, host byte order */ - u_short socket_id; /* socket id */ - SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */ - struct pf_mtag pf_mtag; /* built-in PF tag */ -#define m_flowhash pf_mtag.pftag_flowhash -#define m_fhflags pf_mtag.pftag_flags - u_int32_t svc; /* MBUF_SVC value */ - u_int16_t vt_nrecs; /* # of IGMPv3/MLDv2 records */ - u_int16_t aux_flags; /* auxiliary packet flags */ - struct tcp_mtag tcp_mtag; /* tcp related data */ -#define tso_segsz tcp_mtag.tm_tso_segz -#define lro_pktlen tcp_mtag.tm_pktlen -#define lro_npkts tcp_mtag.tm_npkts + u_int32_t csum_flags; /* flags regarding checksum */ + union { + struct { + u_int16_t val; /* checksum value */ + u_int16_t start; /* checksum start offset */ + } _csum_rx; +#define csum_rx_val _csum_rx.val +#define csum_rx_start _csum_rx.start + struct { + u_int16_t start; /* checksum start offset */ + u_int16_t stuff; /* checksum stuff offset */ + } _csum_tx; +#define csum_tx_start _csum_tx.start +#define csum_tx_stuff _csum_tx.stuff + u_int32_t csum_data; /* data field used by csum routines */ + }; + u_int16_t vlan_tag; /* VLAN tag, host byte order */ + /* + * Packet classifier info + * + * PKTF_FLOW_ID set means valid flow ID. A non-zero flow ID value + * means the packet has been classified by one of the flow sources. + * It is also a prerequisite for flow control advisory, which is + * enabled by additionally setting PKTF_FLOW_ADV. + * + * The protocol value is a best-effort representation of the payload. + * It is opportunistically updated and used only for optimization. + * It is not a substitute for parsing the protocol header(s); use it + * only as a hint. + * + * If PKTF_IFAINFO is set, pkt_ifainfo contains one or both of the + * indices of interfaces which own the source and/or destination + * addresses of the packet. For the local/loopback case (PKTF_LOOP), + * both should be valid, and thus allows for the receiving end to + * quickly determine the actual interfaces used by the the addresses; + * they may not necessarily be the same or refer to the loopback + * interface. Otherwise, in the non-local/loopback case, the indices + * are opportunistically set, and because of that only one may be set + * (0 means the index has not been determined.) In addition, the + * interface address flags are also recorded. This allows us to avoid + * storing the corresponding {in,in6}_ifaddr in an mbuf tag. Ideally + * this would be a superset of {ia,ia6}_flags, but the namespaces are + * overlapping at present, so we'll need a new set of values in future + * to achieve this. For now, we will just rely on the address family + * related code paths examining this mbuf to interpret the flags. + */ + u_int8_t pkt_proto; /* IPPROTO value */ + u_int8_t pkt_flowsrc; /* FLOWSRC values */ + u_int32_t pkt_flowid; /* flow ID */ + u_int32_t pkt_flags; /* PKTF flags (see below) */ + u_int32_t pkt_svc; /* MBUF_SVC value */ + union { + struct { + u_int16_t src; /* ifindex of src addr i/f */ + u_int16_t src_flags; /* src PKT_IFAIFF flags */ + u_int16_t dst; /* ifindex of dst addr i/f */ + u_int16_t dst_flags; /* dst PKT_IFAIFF flags */ + } _pkt_iaif; +#define src_ifindex _pkt_iaif.src +#define src_iff _pkt_iaif.src_flags +#define dst_ifindex _pkt_iaif.dst +#define dst_iff _pkt_iaif.dst_flags + u_int64_t pkt_ifainfo; /* data field used by ifainfo */ + u_int32_t pkt_unsent_databytes; /* unsent data */ + }; +#if MEASURE_BW + u_int64_t pkt_bwseq; /* sequence # */ +#endif /* MEASURE_BW */ + u_int64_t pkt_enqueue_ts; /* enqueue time */ + + /* + * Tags (external and built-in) + */ + SLIST_HEAD(packet_tags, m_tag) tags; /* list of external tags */ + struct proto_mtag proto_mtag; /* built-in protocol-specific tag */ + struct pf_mtag pf_mtag; /* built-in PF tag */ + struct necp_mtag necp_mtag; /* built-in NECP tag */ + /* + * Module private scratch space (32-bit aligned), currently 16-bytes + * large. Anything stored here is not guaranteed to survive across + * modules. This should be the penultimate structure right before + * the red zone. Add new fields above this. + */ + struct { + union { + u_int8_t __mpriv8[16]; + u_int16_t __mpriv16[8]; + struct { + union { + u_int8_t __val8[4]; + u_int16_t __val16[2]; + u_int32_t __val32; + } __mpriv32_u; + } __mpriv32[4]; + u_int64_t __mpriv64[2]; + } __mpriv_u; + } pkt_mpriv __attribute__((aligned(4))); + u_int32_t redzone; /* red zone */ }; -/* description of external storage mapped into mbuf, valid if M_EXT set */ +/* + * Flow data source type. A data source module is responsible for generating + * a unique flow ID and associating it to each data flow as pkt_flowid. + * This is required for flow control/advisory, as it allows the output queue + * to identify the data source object and inform that it can resume its + * transmission (in the event it was flow controlled.) + */ +#define FLOWSRC_INPCB 1 /* flow ID generated by INPCB */ +#define FLOWSRC_IFNET 2 /* flow ID generated by interface */ +#define FLOWSRC_PF 3 /* flow ID generated by PF */ + +/* + * Packet flags. Unlike m_flags, all packet flags are copied along when + * copying m_pkthdr, i.e. no equivalent of M_COPYFLAGS here. These flags + * (and other classifier info) will be cleared during DLIL input. + * + * Some notes about M_LOOP and PKTF_LOOP: + * + * - M_LOOP flag is overloaded, and its use is discouraged. Historically, + * that flag was used by the KAME implementation for allowing certain + * certain exceptions to be made in the IP6_EXTHDR_CHECK() logic; this + * was originally meant to be set as the packet is looped back to the + * system, and in some circumstances temporarily set in ip6_output(). + * Over time, this flag was used by the pre-output routines to indicate + * to the DLIL frameout and output routines, that the packet may be + * looped back to the system under the right conditions. In addition, + * this is an mbuf flag rather than an mbuf packet header flag. + * + * - PKTF_LOOP is an mbuf packet header flag, which is set if and only + * if the packet was looped back to the system. This flag should be + * used instead for newer code. + */ +#define PKTF_FLOW_ID 0x1 /* pkt has valid flowid value */ +#define PKTF_FLOW_ADV 0x2 /* pkt triggers local flow advisory */ +#define PKTF_FLOW_LOCALSRC 0x4 /* pkt is locally originated */ +#define PKTF_FLOW_RAWSOCK 0x8 /* pkt locally generated by raw sock */ +#define PKTF_PRIO_PRIVILEGED 0x10 /* packet priority is privileged */ +#define PKTF_PROXY_DST 0x20 /* processed but not locally destined */ +#define PKTF_INET_RESOLVE 0x40 /* IPv4 resolver packet */ +#define PKTF_INET6_RESOLVE 0x80 /* IPv6 resolver packet */ +#define PKTF_RESOLVE_RTR 0x100 /* pkt is for resolving router */ +#define PKTF_SW_LRO_PKT 0x200 /* pkt is a large coalesced pkt */ +#define PKTF_SW_LRO_DID_CSUM 0x400 /* IP and TCP checksums done by LRO */ +#define PKTF_MPTCP 0x800 /* TCP with MPTCP metadata */ +#define PKTF_MPSO 0x1000 /* MPTCP socket meta data */ +#define PKTF_LOOP 0x2000 /* loopbacked packet */ +#define PKTF_IFAINFO 0x4000 /* pkt has valid interface addr info */ +#define PKTF_SO_BACKGROUND 0x8000 /* data is from background source */ +#define PKTF_FORWARDED 0x10000 /* pkt was forwarded from another i/f */ +#define PKTF_PRIV_GUARDED 0x20000 /* pkt_mpriv area guard enabled */ +#define PKTF_KEEPALIVE 0x40000 /* pkt is kernel-generated keepalive */ +#define PKTF_SO_REALTIME 0x80000 /* data is realtime traffic */ +#define PKTF_VALID_UNSENT_DATA 0x100000 /* unsent data is valid */ +#define PKTF_TCP_REXMT 0x200000 /* packet is TCP retransmission */ + +/* flags related to flow control/advisory and identification */ +#define PKTF_FLOW_MASK \ + (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC | PKTF_FLOW_RAWSOCK) + +/* + * Description of external storage mapped into mbuf, valid only if M_EXT set. + */ struct m_ext { caddr_t ext_buf; /* start of buffer */ - void (*ext_free)(caddr_t, u_int, caddr_t); /* free routine if not the usual */ + void (*ext_free) /* free routine if not the usual */ + (caddr_t, u_int, caddr_t); u_int ext_size; /* size of buffer, for ext_free */ caddr_t ext_arg; /* additional ext_free argument */ struct ext_refsq { /* references held */ @@ -254,6 +477,9 @@ struct m_ext { /* define m_ext to a type since it gets redefined below */ typedef struct m_ext _m_ext_t; +/* + * The mbuf object + */ struct mbuf { struct m_hdr m_hdr; union { @@ -289,7 +515,7 @@ struct mbuf { #define M_PROTO1 0x0008 /* protocol-specific */ #define M_PROTO2 0x0010 /* protocol-specific */ #define M_PROTO3 0x0020 /* protocol-specific */ -#define M_LOOP 0x0040 /* packet is looped back */ +#define M_LOOP 0x0040 /* packet is looped back (also see PKTF_LOOP) */ #define M_PROTO5 0x0080 /* protocol-specific */ /* mbuf pkthdr flags, also in m_flags (private) */ @@ -328,17 +554,24 @@ struct mbuf { #define CSUM_IP_VALID 0x0200 /* ... the csum is valid */ #define CSUM_DATA_VALID 0x0400 /* csum_data field is valid */ #define CSUM_PSEUDO_HDR 0x0800 /* csum_data has pseudo hdr */ -#define CSUM_TCP_SUM16 0x1000 /* simple TCP Sum16 computation */ +#define CSUM_PARTIAL 0x1000 /* simple Sum16 computation */ #define CSUM_DELAY_DATA (CSUM_TCP | CSUM_UDP) #define CSUM_DELAY_IP (CSUM_IP) /* IPv4 only: no IPv6 IP cksum */ #define CSUM_DELAY_IPV6_DATA (CSUM_TCPIPV6 | CSUM_UDPIPV6) #define CSUM_DATA_IPV6_VALID CSUM_DATA_VALID /* csum_data field is valid */ + +#define CSUM_TX_FLAGS \ + (CSUM_DELAY_IP | CSUM_DELAY_DATA | CSUM_DELAY_IPV6_DATA | \ + CSUM_DATA_VALID | CSUM_PARTIAL) + +#define CSUM_RX_FLAGS \ + (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_PSEUDO_HDR | \ + CSUM_DATA_VALID | CSUM_PARTIAL) + /* * Note: see also IF_HWASSIST_CSUM defined in */ -/* bottom 16 bits reserved for hardware checksum */ -#define CSUM_CHECKSUM_MASK 0xffff /* VLAN tag present */ #define CSUM_VLAN_TAG_VALID 0x10000 /* vlan_tag field is valid */ @@ -347,17 +580,22 @@ struct mbuf { #define CSUM_TSO_IPV4 0x100000 /* This mbuf needs to be segmented by the NIC */ #define CSUM_TSO_IPV6 0x200000 /* This mbuf needs to be segmented by the NIC */ -/* - * Auxiliary packet flags. Unlike m_flags, all auxiliary flags are copied - * along when copying m_pkthdr, i.e. no equivalent of M_COPYFLAGS here. - * Note that this flag is 16-bit wide. - */ -#define MAUXF_PRIO_PRIVILEGED 0x0001 /* packet priority is privileged */ -#define MAUXF_PROXY_DST 0x0002 /* processed but not locally destined */ -#define MAUXF_INET_RESOLVE_RTR 0x0004 /* pkt is for resolving IPv4 router */ -#define MAUXF_INET6_RESOLVE_RTR 0x0008 /* pkt is for resolving IPv6 router */ -#define MAUXF_SW_LRO_PKT 0x0010 /* pkt is a large coalesced pkt */ -#define MAUXF_SW_LRO_DID_CSUM 0x0020 /* IP and TCP checksums done by LRO*/ +#define TSO_IPV4_OK(_ifp, _m) \ + (((_ifp)->if_hwassist & IFNET_TSO_IPV4) && \ + ((_m)->m_pkthdr.csum_flags & CSUM_TSO_IPV4)) \ + +#define TSO_IPV4_NOTOK(_ifp, _m) \ + (!((_ifp)->if_hwassist & IFNET_TSO_IPV4) && \ + ((_m)->m_pkthdr.csum_flags & CSUM_TSO_IPV4)) \ + +#define TSO_IPV6_OK(_ifp, _m) \ + (((_ifp)->if_hwassist & IFNET_TSO_IPV6) && \ + ((_m)->m_pkthdr.csum_flags & CSUM_TSO_IPV6)) \ + +#define TSO_IPV6_NOTOK(_ifp, _m) \ + (!((_ifp)->if_hwassist & IFNET_TSO_IPV6) && \ + ((_m)->m_pkthdr.csum_flags & CSUM_TSO_IPV6)) \ + #endif /* XNU_KERNEL_PRIVATE */ /* mbuf types */ @@ -459,6 +697,8 @@ union m16kcluster { #define M_COPY_PFTAG(to, from) m_copy_pftag(to, from) +#define M_COPY_CLASSIFIER(to, from) m_copy_classifier(to, from) + /* * Set the m_data pointer of a newly-allocated mbuf (m_get/MGET) to place * an object of the specified size at the end of the mbuf, longword aligned. @@ -497,7 +737,8 @@ do { \ * If how is M_DONTWAIT and allocation fails, the original mbuf chain * is freed and m is set to NULL. */ -#define M_PREPEND(m, plen, how) ((m) = m_prepend_2((m), (plen), (how))) +#define M_PREPEND(m, plen, how, align) \ + ((m) = m_prepend_2((m), (plen), (how), (align))) /* change mbuf to new type */ #define MCHTYPE(m, t) m_mchtype(m, t) @@ -566,10 +807,10 @@ do { \ m->m_type == MT_FREE || \ ((m->m_flags & M_EXT) != 0 && m->m_ext.ext_buf == NULL)) { \ panic_plain("Failed mbuf validity check: mbuf %p len %d " \ - "type %d flags 0x%x data %p rcvif %s%d ifflags 0x%x", \ + "type %d flags 0x%x data %p rcvif %s ifflags 0x%x", \ m, m->m_len, m->m_type, m->m_flags, \ ((m->m_flags & M_EXT) ? m->m_ext.ext_buf : m->m_data), \ - rcvif->if_name, rcvif->if_unit, \ + if_name(rcvif), \ (rcvif->if_flags & 0xffff)); \ } \ } while (0) @@ -725,6 +966,7 @@ struct omb_class_stat { u_int64_t mbcl_purge_cnt; /* # of purges so far */ u_int64_t mbcl_fail_cnt; /* # of allocation failures */ u_int32_t mbcl_ctotal; /* total only for this class */ + u_int32_t mbcl_release_cnt; /* amount of memory returned */ /* * Cache layer statistics */ @@ -753,6 +995,7 @@ typedef struct mb_class_stat { u_int64_t mbcl_purge_cnt; /* # of purges so far */ u_int64_t mbcl_fail_cnt; /* # of allocation failures */ u_int32_t mbcl_ctotal; /* total only for this class */ + u_int32_t mbcl_release_cnt; /* amount of memory returned */ /* * Cache layer statistics */ @@ -761,7 +1004,8 @@ typedef struct mb_class_stat { u_int32_t mbcl_mc_waiter_cnt; /* # waiters on the cache */ u_int32_t mbcl_mc_wretry_cnt; /* # of wait retries */ u_int32_t mbcl_mc_nwretry_cnt; /* # of no-wait retry attempts */ - u_int64_t mbcl_reserved[4]; /* for future use */ + u_int32_t mbcl_peak_reported; /* last usage peak reported */ + u_int32_t mbcl_reserved[7]; /* for future use */ } mb_class_stat_t; #define MCS_DISABLED 0 /* cache is permanently disabled */ @@ -857,6 +1101,13 @@ struct mbuf; #define M_DONTWAIT M_NOWAIT #define M_WAIT M_WAITOK +/* modes for m_copym and variants */ +#define M_COPYM_NOOP_HDR 0 /* don't copy/move pkthdr contents */ +#define M_COPYM_COPY_HDR 1 /* copy pkthdr from old to new */ +#define M_COPYM_MOVE_HDR 2 /* move pkthdr from old to new */ +#define M_COPYM_MUST_COPY_HDR 3 /* MUST copy pkthdr from old to new */ +#define M_COPYM_MUST_MOVE_HDR 4 /* MUST move pkthdr from old to new */ + /* * These macros are mapped to the appropriate KPIs, so that private code * can be simply recompiled in order to be forward-compatible with future @@ -868,18 +1119,19 @@ struct mbuf; #define MINCLSIZE mbuf_get_minclsize() /* cluster usage threshold */ extern void m_freem(struct mbuf *); -extern char *mcl_to_paddr(char *); +extern u_int64_t mcl_to_paddr(char *); extern void m_adj(struct mbuf *, int); extern void m_cat(struct mbuf *, struct mbuf *); extern void m_copydata(struct mbuf *, int, int, void *); extern struct mbuf *m_copym(struct mbuf *, int, int, int); +extern struct mbuf *m_copym_mode(struct mbuf *, int, int, int, uint32_t); extern struct mbuf *m_get(int, int); extern struct mbuf *m_gethdr(int, int); extern struct mbuf *m_getpacket(void); extern struct mbuf *m_getpackets(int, int, int); extern struct mbuf *m_mclget(struct mbuf *, int); extern void *m_mtod(struct mbuf *); -extern struct mbuf *m_prepend_2(struct mbuf *, int, int); +extern struct mbuf *m_prepend_2(struct mbuf *, int, int, int); extern struct mbuf *m_pullup(struct mbuf *, int); extern struct mbuf *m_split(struct mbuf *, int, int); extern void m_mclfree(caddr_t p); @@ -898,10 +1150,9 @@ extern void m_mclfree(caddr_t p); if (((_m)->m_flags & M_PKTHDR) && \ (_m)->m_pkthdr.rcvif != NULL) { \ panic_plain("\n%s: mbuf %p data ptr %p is not " \ - "32-bit aligned [%s%d: alignerrs=%lld]\n", \ + "32-bit aligned [%s: alignerrs=%lld]\n", \ __func__, (_m), (_m)->m_data, \ - (_m)->m_pkthdr.rcvif->if_name, \ - (_m)->m_pkthdr.rcvif->if_unit, \ + if_name((_m)->m_pkthdr.rcvif), \ (_m)->m_pkthdr.rcvif->if_alignerrs); \ } else { \ panic_plain("\n%s: mbuf %p data ptr %p is not " \ @@ -946,6 +1197,9 @@ extern void m_mclfree(caddr_t p); #define MBUF_TC2SCVAL(_tc) ((_tc) << 7) #define IS_MBUF_SC_BACKGROUND(_sc) (((_sc) == MBUF_SC_BK_SYS) || \ ((_sc) == MBUF_SC_BK)) +#define IS_MBUF_SC_REALTIME(_sc) ((_sc) >= MBUF_SC_AV && (_sc) <= MBUF_SC_VO) +#define IS_MBUF_SC_BESTEFFORT(_sc) ((_sc) == MBUF_SC_BE || \ + (_sc) == MBUF_SC_RD || (_sc) == MBUF_SC_OAM) #define SCIDX_BK_SYS MBUF_SCIDX(MBUF_SC_BK_SYS) #define SCIDX_BK MBUF_SCIDX(MBUF_SC_BK) @@ -987,19 +1241,19 @@ extern void m_mclfree(caddr_t p); c == SCVAL_RV || c == SCVAL_VI || c == SCVAL_VO || \ c == SCVAL_CTL) -__private_extern__ union mbigcluster *mbutl; /* start VA of mbuf pool */ -__private_extern__ union mbigcluster *embutl; /* end VA of mbuf pool */ -__private_extern__ unsigned int nmbclusters; /* number of mapped clusters */ -__private_extern__ int njcl; /* # of jumbo clusters */ -__private_extern__ int njclbytes; /* size of a jumbo cluster */ -__private_extern__ int max_hdr; /* largest link+protocol header */ -__private_extern__ int max_datalen; /* MHLEN - max_hdr */ +extern unsigned char *mbutl; /* start VA of mbuf pool */ +extern unsigned char *embutl; /* end VA of mbuf pool */ +extern unsigned int nmbclusters; /* number of mapped clusters */ +extern int njcl; /* # of jumbo clusters */ +extern int njclbytes; /* size of a jumbo cluster */ +extern int max_hdr; /* largest link+protocol header */ +extern int max_datalen; /* MHLEN - max_hdr */ /* Use max_linkhdr instead of _max_linkhdr */ -__private_extern__ int _max_linkhdr; /* largest link-level header */ +extern int _max_linkhdr; /* largest link-level header */ /* Use max_protohdr instead of _max_protohdr */ -__private_extern__ int _max_protohdr; /* largest protocol header */ +extern int _max_protohdr; /* largest protocol header */ __private_extern__ unsigned int mbuf_default_ncl(int, u_int64_t); __private_extern__ void mbinit(void); @@ -1011,7 +1265,7 @@ __private_extern__ struct mbuf *m_mbigget(struct mbuf *, int); __private_extern__ caddr_t m_16kalloc(int); __private_extern__ void m_16kfree(caddr_t, u_int, caddr_t); __private_extern__ struct mbuf *m_m16kget(struct mbuf *, int); - +__private_extern__ int m_reinit(struct mbuf *, int); __private_extern__ struct mbuf *m_free(struct mbuf *); __private_extern__ struct mbuf *m_getclr(int, int); __private_extern__ struct mbuf *m_getptr(struct mbuf *, int, int *); @@ -1036,6 +1290,7 @@ __private_extern__ caddr_t m_mclalloc(int); __private_extern__ int m_mclhasreference(struct mbuf *); __private_extern__ void m_copy_pkthdr(struct mbuf *, struct mbuf *); __private_extern__ void m_copy_pftag(struct mbuf *, struct mbuf *); +__private_extern__ void m_copy_classifier(struct mbuf *, struct mbuf *); __private_extern__ struct mbuf *m_dtom(void *); __private_extern__ int m_mtocl(void *); @@ -1054,7 +1309,7 @@ __private_extern__ struct mbuf *m_copyback_cow(struct mbuf *, int, int, __private_extern__ int m_makewritable(struct mbuf **, int, int, int); __private_extern__ struct mbuf *m_dup(struct mbuf *m, int how); __private_extern__ struct mbuf *m_copym_with_hdrs(struct mbuf *, int, int, int, - struct mbuf **, int *); + struct mbuf **, int *, uint32_t); __private_extern__ struct mbuf *m_getpackethdrs(int, int); __private_extern__ struct mbuf *m_getpacket_how(int); __private_extern__ struct mbuf *m_getpackets_internal(unsigned int *, int, @@ -1062,6 +1317,8 @@ __private_extern__ struct mbuf *m_getpackets_internal(unsigned int *, int, __private_extern__ struct mbuf *m_allocpacket_internal(unsigned int *, size_t, unsigned int *, int, int, size_t); +__private_extern__ void m_drain(void); + /* * Packets may have annotations attached by affixing a list of "packet * tags" to the pkthdr structure. Packet tags are dynamically allocated @@ -1102,7 +1359,7 @@ enum { KERNEL_TAG_TYPE_ENCAP = 8, KERNEL_TAG_TYPE_INET6 = 9, KERNEL_TAG_TYPE_IPSEC = 10, - KERNEL_TAG_TYPE_PF = 11 + KERNEL_TAG_TYPE_DRVAUX = 11, }; /* Packet tag routines */ @@ -1118,7 +1375,7 @@ __private_extern__ struct m_tag *m_tag_locate(struct mbuf *, u_int32_t, u_int16_t, struct m_tag *); __private_extern__ struct m_tag *m_tag_copy(struct m_tag *, int); __private_extern__ int m_tag_copy_chain(struct mbuf *, struct mbuf *, int); -__private_extern__ void m_tag_init(struct mbuf *); +__private_extern__ void m_tag_init(struct mbuf *, int); __private_extern__ struct m_tag *m_tag_first(struct mbuf *); __private_extern__ struct m_tag *m_tag_next(struct mbuf *, struct m_tag *); @@ -1129,7 +1386,11 @@ __END_DECLS #ifdef XNU_KERNEL_PRIVATE __BEGIN_DECLS -__private_extern__ void m_service_class_init(struct mbuf *); +__private_extern__ void m_scratch_init(struct mbuf *); +__private_extern__ u_int32_t m_scratch_get(struct mbuf *, u_int8_t **); + +__private_extern__ void m_classifier_init(struct mbuf *, uint32_t); + __private_extern__ int m_set_service_class(struct mbuf *, mbuf_svc_class_t); __private_extern__ mbuf_svc_class_t m_get_service_class(struct mbuf *); __private_extern__ mbuf_svc_class_t m_service_class_from_idx(u_int32_t); @@ -1137,6 +1398,15 @@ __private_extern__ mbuf_svc_class_t m_service_class_from_val(u_int32_t); __private_extern__ int m_set_traffic_class(struct mbuf *, mbuf_traffic_class_t); __private_extern__ mbuf_traffic_class_t m_get_traffic_class(struct mbuf *); +#define ADDCARRY(_x) do { \ + while (((_x) >> 16) != 0) \ + (_x) = ((_x) >> 16) + ((_x) & 0xffff); \ +} while (0) + +__private_extern__ u_int16_t m_adj_sum16(struct mbuf *, u_int32_t, + u_int32_t, u_int32_t); +__private_extern__ u_int16_t m_sum16(struct mbuf *, u_int32_t, u_int32_t); + __END_DECLS #endif /* XNU_KERNEL_PRIVATE */ #endif /* KERNEL */