X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/d1ecb069dfe24481e4a83f44cb5217a2b06746d7..0a7de7458d150b5d4dffc935ba399be265ef0a1a:/bsd/net/pf_norm.c diff --git a/bsd/net/pf_norm.c b/bsd/net/pf_norm.c index 91e0083f4..9c28415de 100644 --- a/bsd/net/pf_norm.c +++ b/bsd/net/pf_norm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2008 Apple Inc. All rights reserved. + * Copyright (c) 2007-2016 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -89,94 +89,116 @@ #include struct pf_frent { - LIST_ENTRY(pf_frent) fr_next; - struct ip *fr_ip; - struct mbuf *fr_m; + LIST_ENTRY(pf_frent) fr_next; + struct mbuf *fr_m; +#define fr_ip fr_u.fru_ipv4 +#define fr_ip6 fr_u.fru_ipv6 + union { + struct ip *fru_ipv4; + struct ip6_hdr *fru_ipv6; + } fr_u; + struct ip6_frag fr_ip6f_opt; + int fr_ip6f_hlen; }; struct pf_frcache { LIST_ENTRY(pf_frcache) fr_next; - uint16_t fr_off; - uint16_t fr_end; + uint16_t fr_off; + uint16_t fr_end; }; -#define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */ -#define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */ -#define PFFRAG_DROP 0x0004 /* Drop all fragments */ -#define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER)) +#define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */ +#define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */ +#define PFFRAG_DROP 0x0004 /* Drop all fragments */ +#define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER)) struct pf_fragment { RB_ENTRY(pf_fragment) fr_entry; TAILQ_ENTRY(pf_fragment) frag_next; - struct in_addr fr_src; - struct in_addr fr_dst; - u_int8_t fr_p; /* protocol of this fragment */ - u_int8_t fr_flags; /* status flags */ - u_int16_t fr_id; /* fragment id for reassemble */ - u_int16_t fr_max; /* fragment data max */ - u_int32_t fr_timeout; -#define fr_queue fr_u.fru_queue -#define fr_cache fr_u.fru_cache + struct pf_addr fr_srcx; + struct pf_addr fr_dstx; + u_int8_t fr_p; /* protocol of this fragment */ + u_int8_t fr_flags; /* status flags */ + u_int16_t fr_max; /* fragment data max */ +#define fr_id fr_uid.fru_id4 +#define fr_id6 fr_uid.fru_id6 union { - LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */ - LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */ + u_int16_t fru_id4; + u_int32_t fru_id6; + } fr_uid; + int fr_af; + u_int32_t fr_timeout; +#define fr_queue fr_u.fru_queue +#define fr_cache fr_u.fru_cache + union { + LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */ + LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */ } fr_u; + uint32_t fr_csum_flags; /* checksum flags */ + uint32_t fr_csum; /* partial checksum value */ }; -static TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue; -static TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue; +static TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue; +static TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue; static __inline int pf_frag_compare(struct pf_fragment *, struct pf_fragment *); -static RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree; +static RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree; RB_PROTOTYPE_SC(__private_extern__, pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); /* Private prototypes */ +static void pf_ip6hdr2key(struct pf_fragment *, struct ip6_hdr *, + struct ip6_frag *); static void pf_ip2key(struct pf_fragment *, struct ip *); static void pf_remove_fragment(struct pf_fragment *); static void pf_flush_fragments(void); static void pf_free_fragment(struct pf_fragment *); -static struct pf_fragment *pf_find_fragment(struct ip *, struct pf_frag_tree *); -static struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **, +static struct pf_fragment *pf_find_fragment_by_key(struct pf_fragment *, + struct pf_frag_tree *); +static __inline struct pf_fragment * +pf_find_fragment_by_ipv4_header(struct ip *, struct pf_frag_tree *); +static __inline struct pf_fragment * +pf_find_fragment_by_ipv6_header(struct ip6_hdr *, struct ip6_frag *, + struct pf_frag_tree *); +static struct mbuf *pf_reassemble(struct mbuf *, struct pf_fragment **, struct pf_frent *, int); static struct mbuf *pf_fragcache(struct mbuf **, struct ip *, struct pf_fragment **, int, int, int *); -#ifndef NO_APPLE_MODIFICATIONS +static struct mbuf *pf_reassemble6(struct mbuf **, struct pf_fragment **, + struct pf_frent *, int); +static struct mbuf *pf_frag6cache(struct mbuf **, struct ip6_hdr*, + struct ip6_frag *, struct pf_fragment **, int, int, int, int *); static int pf_normalize_tcpopt(struct pf_rule *, int, struct pfi_kif *, - struct pf_pdesc *, struct mbuf *, struct tcphdr *, int, int *); -#else -static int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *, - struct tcphdr *, int, sa_family_t); -#endif + struct pf_pdesc *, pbuf_t *, struct tcphdr *, int, int *); -#define DPFPRINTF(x) do { \ - if (pf_status.debug >= PF_DEBUG_MISC) { \ - printf("%s: ", __func__); \ - printf x ; \ - } \ +#define DPFPRINTF(x) do { \ + if (pf_status.debug >= PF_DEBUG_MISC) { \ + printf("%s: ", __func__); \ + printf x ; \ + } \ } while (0) /* Globals */ -struct pool pf_frent_pl, pf_frag_pl; -static struct pool pf_cache_pl, pf_cent_pl; -struct pool pf_state_scrub_pl; +struct pool pf_frent_pl, pf_frag_pl; +static struct pool pf_cache_pl, pf_cent_pl; +struct pool pf_state_scrub_pl; -static int pf_nfrents, pf_ncache; +static int pf_nfrents, pf_ncache; void pf_normalize_init(void) { - pool_init(&pf_frent_pl, sizeof (struct pf_frent), 0, 0, 0, "pffrent", + pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent", NULL); - pool_init(&pf_frag_pl, sizeof (struct pf_fragment), 0, 0, 0, "pffrag", + pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag", NULL); - pool_init(&pf_cache_pl, sizeof (struct pf_fragment), 0, 0, 0, + pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrcache", NULL); - pool_init(&pf_cent_pl, sizeof (struct pf_frcache), 0, 0, 0, "pffrcent", + pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent", NULL); - pool_init(&pf_state_scrub_pl, sizeof (struct pf_state_scrub), 0, 0, 0, + pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0, "pfstscr", NULL); pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT); @@ -203,27 +225,85 @@ pf_normalize_destroy(void) int pf_normalize_isempty(void) { - return (TAILQ_EMPTY(&pf_fragqueue) && TAILQ_EMPTY(&pf_cachequeue)); + return TAILQ_EMPTY(&pf_fragqueue) && TAILQ_EMPTY(&pf_cachequeue); } static __inline int pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) { - int diff; - - if ((diff = a->fr_id - b->fr_id)) - return (diff); - else if ((diff = a->fr_p - b->fr_p)) - return (diff); - else if (a->fr_src.s_addr < b->fr_src.s_addr) - return (-1); - else if (a->fr_src.s_addr > b->fr_src.s_addr) - return (1); - else if (a->fr_dst.s_addr < b->fr_dst.s_addr) - return (-1); - else if (a->fr_dst.s_addr > b->fr_dst.s_addr) - return (1); - return (0); + int diff; + + if ((diff = a->fr_af - b->fr_af)) { + return diff; + } else if ((diff = a->fr_p - b->fr_p)) { + return diff; + } else { + struct pf_addr *sa = &a->fr_srcx; + struct pf_addr *sb = &b->fr_srcx; + struct pf_addr *da = &a->fr_dstx; + struct pf_addr *db = &b->fr_dstx; + + switch (a->fr_af) { +#ifdef INET + case AF_INET: + if ((diff = a->fr_id - b->fr_id)) { + return diff; + } else if (sa->v4addr.s_addr < sb->v4addr.s_addr) { + return -1; + } else if (sa->v4addr.s_addr > sb->v4addr.s_addr) { + return 1; + } else if (da->v4addr.s_addr < db->v4addr.s_addr) { + return -1; + } else if (da->v4addr.s_addr > db->v4addr.s_addr) { + return 1; + } + break; +#endif +#ifdef INET6 + case AF_INET6: + if ((diff = a->fr_id6 - b->fr_id6)) { + return diff; + } else if (sa->addr32[3] < sb->addr32[3]) { + return -1; + } else if (sa->addr32[3] > sb->addr32[3]) { + return 1; + } else if (sa->addr32[2] < sb->addr32[2]) { + return -1; + } else if (sa->addr32[2] > sb->addr32[2]) { + return 1; + } else if (sa->addr32[1] < sb->addr32[1]) { + return -1; + } else if (sa->addr32[1] > sb->addr32[1]) { + return 1; + } else if (sa->addr32[0] < sb->addr32[0]) { + return -1; + } else if (sa->addr32[0] > sb->addr32[0]) { + return 1; + } else if (da->addr32[3] < db->addr32[3]) { + return -1; + } else if (da->addr32[3] > db->addr32[3]) { + return 1; + } else if (da->addr32[2] < db->addr32[2]) { + return -1; + } else if (da->addr32[2] > db->addr32[2]) { + return 1; + } else if (da->addr32[1] < db->addr32[1]) { + return -1; + } else if (da->addr32[1] > db->addr32[1]) { + return 1; + } else if (da->addr32[0] < db->addr32[0]) { + return -1; + } else if (da->addr32[0] > db->addr32[0]) { + return 1; + } + break; +#endif + default: + VERIFY(!0 && "only IPv4 and IPv6 supported!"); + break; + } + } + return 0; } void @@ -235,19 +315,49 @@ pf_purge_expired_fragments(void) while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { VERIFY(BUFFER_FRAGMENTS(frag)); - if (frag->fr_timeout > expire) + if (frag->fr_timeout > expire) { break; + } - DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag)); + switch (frag->fr_af) { + case AF_INET: + DPFPRINTF(("expiring IPv4 %d(0x%llx) from queue.\n", + ntohs(frag->fr_id), + (uint64_t)VM_KERNEL_ADDRPERM(frag))); + break; + case AF_INET6: + DPFPRINTF(("expiring IPv6 %d(0x%llx) from queue.\n", + ntohl(frag->fr_id6), + (uint64_t)VM_KERNEL_ADDRPERM(frag))); + break; + default: + VERIFY(0 && "only IPv4 and IPv6 supported"); + break; + } pf_free_fragment(frag); } while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) { VERIFY(!BUFFER_FRAGMENTS(frag)); - if (frag->fr_timeout > expire) + if (frag->fr_timeout > expire) { break; + } - DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag)); + switch (frag->fr_af) { + case AF_INET: + DPFPRINTF(("expiring IPv4 %d(0x%llx) from cache.\n", + ntohs(frag->fr_id), + (uint64_t)VM_KERNEL_ADDRPERM(frag))); + break; + case AF_INET6: + DPFPRINTF(("expiring IPv6 %d(0x%llx) from cache.\n", + ntohl(frag->fr_id6), + (uint64_t)VM_KERNEL_ADDRPERM(frag))); + break; + default: + VERIFY(0 && "only IPv4 and IPv6 supported"); + break; + } pf_free_fragment(frag); VERIFY(TAILQ_EMPTY(&pf_cachequeue) || TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag); @@ -261,16 +371,17 @@ pf_purge_expired_fragments(void) static void pf_flush_fragments(void) { - struct pf_fragment *frag; - int goal; + struct pf_fragment *frag; + int goal; goal = pf_nfrents * 9 / 10; DPFPRINTF(("trying to free > %d frents\n", pf_nfrents - goal)); while (goal < pf_nfrents) { frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue); - if (frag == NULL) + if (frag == NULL) { break; + } pf_free_fragment(frag); } @@ -280,8 +391,9 @@ pf_flush_fragments(void) pf_ncache - goal)); while (goal < pf_ncache) { frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue); - if (frag == NULL) + if (frag == NULL) { break; + } pf_free_fragment(frag); } } @@ -291,8 +403,8 @@ pf_flush_fragments(void) static void pf_free_fragment(struct pf_fragment *frag) { - struct pf_frent *frent; - struct pf_frcache *frcache; + struct pf_frent *frent; + struct pf_frcache *frcache; /* Free all fragments */ if (BUFFER_FRAGMENTS(frag)) { @@ -321,24 +433,33 @@ pf_free_fragment(struct pf_fragment *frag) pf_remove_fragment(frag); } +static void +pf_ip6hdr2key(struct pf_fragment *key, struct ip6_hdr *ip6, + struct ip6_frag *fh) +{ + key->fr_p = fh->ip6f_nxt; + key->fr_id6 = fh->ip6f_ident; + key->fr_af = AF_INET6; + key->fr_srcx.v6addr = ip6->ip6_src; + key->fr_dstx.v6addr = ip6->ip6_dst; +} + static void pf_ip2key(struct pf_fragment *key, struct ip *ip) { key->fr_p = ip->ip_p; key->fr_id = ip->ip_id; - key->fr_src.s_addr = ip->ip_src.s_addr; - key->fr_dst.s_addr = ip->ip_dst.s_addr; + key->fr_af = AF_INET; + key->fr_srcx.v4addr.s_addr = ip->ip_src.s_addr; + key->fr_dstx.v4addr.s_addr = ip->ip_dst.s_addr; } static struct pf_fragment * -pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree) +pf_find_fragment_by_key(struct pf_fragment *key, struct pf_frag_tree *tree) { - struct pf_fragment key; - struct pf_fragment *frag; - - pf_ip2key(&key, ip); + struct pf_fragment *frag; - frag = RB_FIND(pf_frag_tree, tree, &key); + frag = RB_FIND(pf_frag_tree, tree, key); if (frag != NULL) { /* XXX Are we sure we want to update the timeout? */ frag->fr_timeout = pf_time_second(); @@ -351,7 +472,24 @@ pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree) } } - return (frag); + return frag; +} + +static __inline struct pf_fragment * +pf_find_fragment_by_ipv4_header(struct ip *ip, struct pf_frag_tree *tree) +{ + struct pf_fragment key; + pf_ip2key(&key, ip); + return pf_find_fragment_by_key(&key, tree); +} + +static __inline struct pf_fragment * +pf_find_fragment_by_ipv6_header(struct ip6_hdr *ip6, struct ip6_frag *fh, + struct pf_frag_tree *tree) +{ + struct pf_fragment key; + pf_ip6hdr2key(&key, ip6, fh); + return pf_find_fragment_by_key(&key, tree); } /* Removes a fragment from the fragment queue and frees the fragment */ @@ -370,22 +508,84 @@ pf_remove_fragment(struct pf_fragment *frag) } } -#define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3) +#define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3) static struct mbuf * -pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, +pf_reassemble(struct mbuf *m0, struct pf_fragment **frag, struct pf_frent *frent, int mff) { - struct mbuf *m = *m0, *m2; - struct pf_frent *frea, *next; - struct pf_frent *frep = NULL; - struct ip *ip = frent->fr_ip; - int hlen = ip->ip_hl << 2; - u_int16_t off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; - u_int16_t ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4; - u_int16_t fr_max = ip_len + off; + struct mbuf *m = m0, *m2; + struct pf_frent *frea, *next; + struct pf_frent *frep = NULL; + struct ip *ip = frent->fr_ip; + uint32_t hlen = ip->ip_hl << 2; + u_int16_t off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; + u_int16_t ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4; + u_int16_t fr_max = ip_len + off; + uint32_t csum, csum_flags; VERIFY(*frag == NULL || BUFFER_FRAGMENTS(*frag)); + /* + * Leverage partial checksum offload for IP fragments. Narrow down + * the scope to cover only UDP without IP options, as that is the + * most common case. + * + * Perform 1's complement adjustment of octets that got included/ + * excluded in the hardware-calculated checksum value. Ignore cases + * where the value includes the entire IPv4 header span, as the sum + * for those octets would already be 0 by the time we get here; IP + * has already performed its header checksum validation. Also take + * care of any trailing bytes and subtract out their partial sum. + */ + if (ip->ip_p == IPPROTO_UDP && hlen == sizeof(struct ip) && + (m->m_pkthdr.csum_flags & + (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) == + (CSUM_DATA_VALID | CSUM_PARTIAL)) { + uint32_t start = m->m_pkthdr.csum_rx_start; + int32_t trailer = (m_pktlen(m) - ntohs(ip->ip_len)); + uint32_t swbytes = (uint32_t)trailer; + + csum = m->m_pkthdr.csum_rx_val; + + ASSERT(trailer >= 0); + if ((start != 0 && start != hlen) || trailer != 0) { +#if BYTE_ORDER != BIG_ENDIAN + if (start < hlen) { + HTONS(ip->ip_len); + HTONS(ip->ip_off); + } +#endif /* BYTE_ORDER != BIG_ENDIAN */ + /* callee folds in sum */ + csum = m_adj_sum16(m, start, hlen, + (ip->ip_len - hlen), csum); + if (hlen > start) { + swbytes += (hlen - start); + } else { + swbytes += (start - hlen); + } +#if BYTE_ORDER != BIG_ENDIAN + if (start < hlen) { + NTOHS(ip->ip_off); + NTOHS(ip->ip_len); + } +#endif /* BYTE_ORDER != BIG_ENDIAN */ + } + csum_flags = m->m_pkthdr.csum_flags; + + if (swbytes != 0) { + udp_in_cksum_stats(swbytes); + } + if (trailer != 0) { + m_adj(m, -trailer); + } + } else { + csum = 0; + csum_flags = 0; + } + + /* Invalidate checksum */ + m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID; + /* Strip off ip header */ m->m_data += hlen; m->m_len -= hlen; @@ -396,17 +596,23 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, if (*frag == NULL) { pf_flush_fragments(); *frag = pool_get(&pf_frag_pl, PR_NOWAIT); - if (*frag == NULL) + if (*frag == NULL) { goto drop_fragment; + } } (*frag)->fr_flags = 0; (*frag)->fr_max = 0; - (*frag)->fr_src = frent->fr_ip->ip_src; - (*frag)->fr_dst = frent->fr_ip->ip_dst; + (*frag)->fr_af = AF_INET; + (*frag)->fr_srcx.v4addr = frent->fr_ip->ip_src; + (*frag)->fr_dstx.v4addr = frent->fr_ip->ip_dst; (*frag)->fr_p = frent->fr_ip->ip_p; (*frag)->fr_id = frent->fr_ip->ip_id; (*frag)->fr_timeout = pf_time_second(); + if (csum_flags != 0) { + (*frag)->fr_csum_flags = csum_flags; + (*frag)->fr_csum = csum; + } LIST_INIT(&(*frag)->fr_queue); RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag); @@ -417,13 +623,25 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, goto insert; } + /* + * If this fragment contains similar checksum offload info + * as that of the existing ones, accumulate checksum. Otherwise, + * invalidate checksum offload info for the entire datagram. + */ + if (csum_flags != 0 && csum_flags == (*frag)->fr_csum_flags) { + (*frag)->fr_csum += csum; + } else if ((*frag)->fr_csum_flags != 0) { + (*frag)->fr_csum_flags = 0; + } + /* * Find a fragment after the current one: * - off contains the real shifted offset. */ LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) { - if (FR_IP_OFF(frea) > off) + if (FR_IP_OFF(frea) > off) { break; + } frep = frea; } @@ -432,12 +650,13 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, if (frep != NULL && FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4 > off) { - u_int16_t precut; + u_int16_t precut; precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4 - off; - if (precut >= ip_len) + if (precut >= ip_len) { goto drop_fragment; + } m_adj(frent->fr_m, precut); DPFPRINTF(("overlap -%d\n", precut)); /* Enforce 8 byte boundaries */ @@ -449,7 +668,7 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, for (; frea != NULL && ip_len + off > FR_IP_OFF(frea); frea = next) { - u_int16_t aftercut; + u_int16_t aftercut; aftercut = ip_len + off - FR_IP_OFF(frea); DPFPRINTF(("adjust overlap %d\n", aftercut)); @@ -473,20 +692,24 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, insert: /* Update maximum data size */ - if ((*frag)->fr_max < fr_max) + if ((*frag)->fr_max < fr_max) { (*frag)->fr_max = fr_max; + } /* This is the last segment */ - if (!mff) + if (!mff) { (*frag)->fr_flags |= PFFRAG_SEENLAST; + } - if (frep == NULL) + if (frep == NULL) { LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next); - else + } else { LIST_INSERT_AFTER(frep, frent, fr_next); + } /* Check if we are completely reassembled */ - if (!((*frag)->fr_flags & PFFRAG_SEENLAST)) - return (NULL); + if (!((*frag)->fr_flags & PFFRAG_SEENLAST)) { + return NULL; + } /* Check if we have all the data */ off = 0; @@ -499,12 +722,13 @@ insert: DPFPRINTF(("missing fragment at %d, next %d, max %d\n", off, next == NULL ? -1 : FR_IP_OFF(next), (*frag)->fr_max)); - return (NULL); + return NULL; } } DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max)); - if (off < (*frag)->fr_max) - return (NULL); + if (off < (*frag)->fr_max) { + return NULL; + } /* We have all the data */ frent = LIST_FIRST(&(*frag)->fr_queue); @@ -513,7 +737,7 @@ insert: DPFPRINTF(("drop: too big: %d\n", off)); pf_free_fragment(*frag); *frag = NULL; - return (NULL); + return NULL; } next = LIST_NEXT(frent, fr_next); @@ -534,8 +758,26 @@ insert: m_cat(m, m2); } - ip->ip_src = (*frag)->fr_src; - ip->ip_dst = (*frag)->fr_dst; + ip->ip_src = (*frag)->fr_srcx.v4addr; + ip->ip_dst = (*frag)->fr_dstx.v4addr; + + if ((*frag)->fr_csum_flags != 0) { + csum = (*frag)->fr_csum; + + ADDCARRY(csum); + + m->m_pkthdr.csum_rx_val = csum; + m->m_pkthdr.csum_rx_start = sizeof(struct ip); + m->m_pkthdr.csum_flags = (*frag)->fr_csum_flags; + } else if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) || + (m->m_pkthdr.pkt_flags & PKTF_LOOP)) { + /* loopback checksums are always OK */ + m->m_pkthdr.csum_data = 0xffff; + m->m_pkthdr.csum_flags &= ~CSUM_PARTIAL; + m->m_pkthdr.csum_flags = + CSUM_DATA_VALID | CSUM_PSEUDO_HDR | + CSUM_IP_CHECKED | CSUM_IP_VALID; + } /* Remove from fragment queue */ pf_remove_fragment(*frag); @@ -550,32 +792,34 @@ insert: /* XXX this should be done elsewhere */ if (m->m_flags & M_PKTHDR) { int plen = 0; - for (m2 = m; m2; m2 = m2->m_next) + for (m2 = m; m2; m2 = m2->m_next) { plen += m2->m_len; + } m->m_pkthdr.len = plen; } - DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len))); - return (m); + DPFPRINTF(("complete: 0x%llx(%d)\n", + (uint64_t)VM_KERNEL_ADDRPERM(m), ntohs(ip->ip_len))); + return m; drop_fragment: /* Oops - fail safe - drop packet */ pool_put(&pf_frent_pl, frent); pf_nfrents--; m_freem(m); - return (NULL); + return NULL; } static struct mbuf * pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, int drop, int *nomem) { - struct mbuf *m = *m0; - struct pf_frcache *frp, *fra, *cur = NULL; - int ip_len = ntohs(h->ip_len) - (h->ip_hl << 2); - u_int16_t off = ntohs(h->ip_off) << 3; - u_int16_t fr_max = ip_len + off; - int hosed = 0; + struct mbuf *m = *m0; + struct pf_frcache *frp, *fra, *cur = NULL; + int ip_len = ntohs(h->ip_len) - (h->ip_hl << 2); + u_int16_t off = ntohs(h->ip_off) << 3; + u_int16_t fr_max = ip_len + off; + int hosed = 0; VERIFY(*frag == NULL || !BUFFER_FRAGMENTS(*frag)); @@ -585,8 +829,9 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, if (*frag == NULL) { pf_flush_fragments(); *frag = pool_get(&pf_cache_pl, PR_NOWAIT); - if (*frag == NULL) + if (*frag == NULL) { goto no_mem; + } } /* Get an entry for the queue */ @@ -600,8 +845,9 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, (*frag)->fr_flags = PFFRAG_NOBUFFER; (*frag)->fr_max = 0; - (*frag)->fr_src = h->ip_src; - (*frag)->fr_dst = h->ip_dst; + (*frag)->fr_af = AF_INET; + (*frag)->fr_srcx.v4addr = h->ip_src; + (*frag)->fr_dstx.v4addr = h->ip_dst; (*frag)->fr_p = h->ip_p; (*frag)->fr_id = h->ip_id; (*frag)->fr_timeout = pf_time_second(); @@ -626,15 +872,16 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, */ frp = NULL; LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) { - if (fra->fr_off > off) + if (fra->fr_off > off) { break; + } frp = fra; } VERIFY(frp != NULL || fra != NULL); if (frp != NULL) { - int precut; + int precut; precut = frp->fr_end - off; if (precut >= ip_len) { @@ -676,8 +923,9 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, * I'll pull a rabbit out of my laptop. */ *m0 = m_copym(m, 0, h->ip_hl << 2, M_NOWAIT); - if (*m0 == NULL) + if (*m0 == NULL) { goto no_mem; + } VERIFY((*m0)->m_next == NULL); m_adj(m, precut + (h->ip_hl << 2)); m_cat(*m0, m); @@ -685,8 +933,9 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, if (m->m_flags & M_PKTHDR) { int plen = 0; struct mbuf *t; - for (t = m; t; t = t->m_next) + for (t = m; t; t = t->m_next) { plen += t->m_len; + } m->m_pkthdr.len = plen; } @@ -710,8 +959,658 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, fr_max)); cur = pool_get(&pf_cent_pl, PR_NOWAIT); - if (cur == NULL) + if (cur == NULL) { + goto no_mem; + } + pf_ncache++; + + cur->fr_off = off; + cur->fr_end = fr_max; + LIST_INSERT_AFTER(frp, cur, fr_next); + } + } + + if (fra != NULL) { + int aftercut; + int merge = 0; + + aftercut = fr_max - fra->fr_off; + if (aftercut == 0) { + /* Adjacent fragments */ + DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n", + h->ip_id, off, fr_max, fra->fr_off, fra->fr_end)); + fra->fr_off = off; + merge = 1; + } else if (aftercut > 0) { + /* Need to chop off the tail of this fragment */ + DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n", + h->ip_id, aftercut, off, fr_max, fra->fr_off, + fra->fr_end)); + fra->fr_off = off; + fr_max -= aftercut; + + merge = 1; + + if (!drop) { + m_adj(m, -aftercut); + if (m->m_flags & M_PKTHDR) { + int plen = 0; + struct mbuf *t; + for (t = m; t; t = t->m_next) { + plen += t->m_len; + } + m->m_pkthdr.len = plen; + } + h = mtod(m, struct ip *); + VERIFY((int)m->m_len == + ntohs(h->ip_len) - aftercut); + h->ip_len = htons(ntohs(h->ip_len) - aftercut); + } else { + hosed++; + } + } else if (frp == NULL) { + /* There is a gap between fragments */ + DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n", + h->ip_id, -aftercut, off, fr_max, fra->fr_off, + fra->fr_end)); + + cur = pool_get(&pf_cent_pl, PR_NOWAIT); + if (cur == NULL) { + goto no_mem; + } + pf_ncache++; + + cur->fr_off = off; + cur->fr_end = fr_max; + LIST_INSERT_BEFORE(fra, cur, fr_next); + } + + + /* Need to glue together two separate fragment descriptors */ + if (merge) { + if (cur && fra->fr_off <= cur->fr_end) { + /* Need to merge in a previous 'cur' */ + DPFPRINTF(("fragcache[%d]: adjacent(merge " + "%d-%d) %d-%d (%d-%d)\n", + h->ip_id, cur->fr_off, cur->fr_end, off, + fr_max, fra->fr_off, fra->fr_end)); + fra->fr_off = cur->fr_off; + LIST_REMOVE(cur, fr_next); + pool_put(&pf_cent_pl, cur); + pf_ncache--; + cur = NULL; + } else if (frp && fra->fr_off <= frp->fr_end) { + /* Need to merge in a modified 'frp' */ + VERIFY(cur == NULL); + DPFPRINTF(("fragcache[%d]: adjacent(merge " + "%d-%d) %d-%d (%d-%d)\n", + h->ip_id, frp->fr_off, frp->fr_end, off, + fr_max, fra->fr_off, fra->fr_end)); + fra->fr_off = frp->fr_off; + LIST_REMOVE(frp, fr_next); + pool_put(&pf_cent_pl, frp); + pf_ncache--; + frp = NULL; + } + } + } + + if (hosed) { + /* + * We must keep tracking the overall fragment even when + * we're going to drop it anyway so that we know when to + * free the overall descriptor. Thus we drop the frag late. + */ + goto drop_fragment; + } + + +pass: + /* Update maximum data size */ + if ((*frag)->fr_max < fr_max) { + (*frag)->fr_max = fr_max; + } + + /* This is the last segment */ + if (!mff) { + (*frag)->fr_flags |= PFFRAG_SEENLAST; + } + + /* Check if we are completely reassembled */ + if (((*frag)->fr_flags & PFFRAG_SEENLAST) && + LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 && + LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) { + /* Remove from fragment queue */ + DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id, + (*frag)->fr_max)); + pf_free_fragment(*frag); + *frag = NULL; + } + + return m; + +no_mem: + *nomem = 1; + + /* Still need to pay attention to !IP_MF */ + if (!mff && *frag != NULL) { + (*frag)->fr_flags |= PFFRAG_SEENLAST; + } + + m_freem(m); + return NULL; + +drop_fragment: + + /* Still need to pay attention to !IP_MF */ + if (!mff && *frag != NULL) { + (*frag)->fr_flags |= PFFRAG_SEENLAST; + } + + if (drop) { + /* This fragment has been deemed bad. Don't reass */ + if (((*frag)->fr_flags & PFFRAG_DROP) == 0) { + DPFPRINTF(("fragcache[%d]: dropping overall fragment\n", + h->ip_id)); + } + (*frag)->fr_flags |= PFFRAG_DROP; + } + + m_freem(m); + return NULL; +} + +#define FR_IP6_OFF(fr) \ + (ntohs((fr)->fr_ip6f_opt.ip6f_offlg & IP6F_OFF_MASK)) +#define FR_IP6_PLEN(fr) (ntohs((fr)->fr_ip6->ip6_plen)) +struct mbuf * +pf_reassemble6(struct mbuf **m0, struct pf_fragment **frag, + struct pf_frent *frent, int mff) +{ + struct mbuf *m, *m2; + struct pf_frent *frea, *frep, *next; + struct ip6_hdr *ip6; + struct ip6_frag *ip6f; + int plen, off, fr_max; + uint32_t uoff, csum, csum_flags; + + VERIFY(*frag == NULL || BUFFER_FRAGMENTS(*frag)); + m = *m0; + frep = NULL; + ip6 = frent->fr_ip6; + ip6f = &frent->fr_ip6f_opt; + off = FR_IP6_OFF(frent); + uoff = frent->fr_ip6f_hlen; + plen = FR_IP6_PLEN(frent); + fr_max = off + plen - (frent->fr_ip6f_hlen - sizeof *ip6); + + DPFPRINTF(("0x%llx IPv6 frag plen %u off %u fr_ip6f_hlen %u " + "fr_max %u m_len %u\n", (uint64_t)VM_KERNEL_ADDRPERM(m), plen, off, + frent->fr_ip6f_hlen, fr_max, m->m_len)); + + /* + * Leverage partial checksum offload for simple UDP/IP fragments, + * as that is the most common case. + * + * Perform 1's complement adjustment of octets that got included/ + * excluded in the hardware-calculated checksum value. Also take + * care of any trailing bytes and subtract out their partial sum. + */ + if (ip6f->ip6f_nxt == IPPROTO_UDP && + uoff == (sizeof(*ip6) + sizeof(*ip6f)) && + (m->m_pkthdr.csum_flags & + (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) == + (CSUM_DATA_VALID | CSUM_PARTIAL)) { + uint32_t start = m->m_pkthdr.csum_rx_start; + uint32_t ip_len = (sizeof(*ip6) + ntohs(ip6->ip6_plen)); + int32_t trailer = (m_pktlen(m) - ip_len); + uint32_t swbytes = (uint32_t)trailer; + + csum = m->m_pkthdr.csum_rx_val; + + ASSERT(trailer >= 0); + if (start != uoff || trailer != 0) { + uint16_t s = 0, d = 0; + + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) { + s = ip6->ip6_src.s6_addr16[1]; + ip6->ip6_src.s6_addr16[1] = 0; + } + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) { + d = ip6->ip6_dst.s6_addr16[1]; + ip6->ip6_dst.s6_addr16[1] = 0; + } + + /* callee folds in sum */ + csum = m_adj_sum16(m, start, uoff, + (ip_len - uoff), csum); + if (uoff > start) { + swbytes += (uoff - start); + } else { + swbytes += (start - uoff); + } + + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) { + ip6->ip6_src.s6_addr16[1] = s; + } + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) { + ip6->ip6_dst.s6_addr16[1] = d; + } + } + csum_flags = m->m_pkthdr.csum_flags; + + if (swbytes != 0) { + udp_in6_cksum_stats(swbytes); + } + if (trailer != 0) { + m_adj(m, -trailer); + } + } else { + csum = 0; + csum_flags = 0; + } + + /* Invalidate checksum */ + m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID; + + /* strip off headers up to the fragment payload */ + m->m_data += frent->fr_ip6f_hlen; + m->m_len -= frent->fr_ip6f_hlen; + + /* Create a new reassembly queue for this packet */ + if (*frag == NULL) { + *frag = pool_get(&pf_frag_pl, PR_NOWAIT); + if (*frag == NULL) { + pf_flush_fragments(); + *frag = pool_get(&pf_frag_pl, PR_NOWAIT); + if (*frag == NULL) { + goto drop_fragment; + } + } + + (*frag)->fr_flags = 0; + (*frag)->fr_max = 0; + (*frag)->fr_af = AF_INET6; + (*frag)->fr_srcx.v6addr = frent->fr_ip6->ip6_src; + (*frag)->fr_dstx.v6addr = frent->fr_ip6->ip6_dst; + (*frag)->fr_p = frent->fr_ip6f_opt.ip6f_nxt; + (*frag)->fr_id6 = frent->fr_ip6f_opt.ip6f_ident; + (*frag)->fr_timeout = pf_time_second(); + if (csum_flags != 0) { + (*frag)->fr_csum_flags = csum_flags; + (*frag)->fr_csum = csum; + } + LIST_INIT(&(*frag)->fr_queue); + + RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag); + TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next); + + /* We do not have a previous fragment */ + frep = NULL; + goto insert; + } + + /* + * If this fragment contains similar checksum offload info + * as that of the existing ones, accumulate checksum. Otherwise, + * invalidate checksum offload info for the entire datagram. + */ + if (csum_flags != 0 && csum_flags == (*frag)->fr_csum_flags) { + (*frag)->fr_csum += csum; + } else if ((*frag)->fr_csum_flags != 0) { + (*frag)->fr_csum_flags = 0; + } + + /* + * Find a fragment after the current one: + * - off contains the real shifted offset. + */ + LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) { + if (FR_IP6_OFF(frea) > off) { + break; + } + frep = frea; + } + + VERIFY(frep != NULL || frea != NULL); + + if (frep != NULL && + FR_IP6_OFF(frep) + FR_IP6_PLEN(frep) - frep->fr_ip6f_hlen > off) { + u_int16_t precut; + + precut = FR_IP6_OFF(frep) + FR_IP6_PLEN(frep) - + frep->fr_ip6f_hlen - off; + if (precut >= plen) { + goto drop_fragment; + } + m_adj(frent->fr_m, precut); + DPFPRINTF(("overlap -%d\n", precut)); + /* Enforce 8 byte boundaries */ + frent->fr_ip6f_opt.ip6f_offlg = + htons(ntohs(frent->fr_ip6f_opt.ip6f_offlg) + + (precut >> 3)); + off = FR_IP6_OFF(frent); + plen -= precut; + ip6->ip6_plen = htons(plen); + } + + for (; frea != NULL && plen + off > FR_IP6_OFF(frea); frea = next) { + u_int16_t aftercut; + + aftercut = plen + off - FR_IP6_OFF(frea); + DPFPRINTF(("adjust overlap %d\n", aftercut)); + if (aftercut < FR_IP6_PLEN(frea) - frea->fr_ip6f_hlen) { + frea->fr_ip6->ip6_plen = htons(FR_IP6_PLEN(frea) - + aftercut); + frea->fr_ip6f_opt.ip6f_offlg = + htons(ntohs(frea->fr_ip6f_opt.ip6f_offlg) + + (aftercut >> 3)); + m_adj(frea->fr_m, aftercut); + break; + } + + /* This fragment is completely overlapped, lose it */ + next = LIST_NEXT(frea, fr_next); + m_freem(frea->fr_m); + LIST_REMOVE(frea, fr_next); + pool_put(&pf_frent_pl, frea); + pf_nfrents--; + } + +insert: + /* Update maximum data size */ + if ((*frag)->fr_max < fr_max) { + (*frag)->fr_max = fr_max; + } + /* This is the last segment */ + if (!mff) { + (*frag)->fr_flags |= PFFRAG_SEENLAST; + } + + if (frep == NULL) { + LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next); + } else { + LIST_INSERT_AFTER(frep, frent, fr_next); + } + + /* Check if we are completely reassembled */ + if (!((*frag)->fr_flags & PFFRAG_SEENLAST)) { + return NULL; + } + + /* Check if we have all the data */ + off = 0; + for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) { + next = LIST_NEXT(frep, fr_next); + off += FR_IP6_PLEN(frep) - (frent->fr_ip6f_hlen - sizeof *ip6); + DPFPRINTF(("frep at %d, next %d, max %d\n", + off, next == NULL ? -1 : FR_IP6_OFF(next), + (*frag)->fr_max)); + if (off < (*frag)->fr_max && + (next == NULL || FR_IP6_OFF(next) != off)) { + DPFPRINTF(("missing fragment at %d, next %d, max %d\n", + off, next == NULL ? -1 : FR_IP6_OFF(next), + (*frag)->fr_max)); + return NULL; + } + } + DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max)); + if (off < (*frag)->fr_max) { + return NULL; + } + + /* We have all the data */ + frent = LIST_FIRST(&(*frag)->fr_queue); + VERIFY(frent != NULL); + if (frent->fr_ip6f_hlen + off > IP_MAXPACKET) { + DPFPRINTF(("drop: too big: %d\n", off)); + pf_free_fragment(*frag); + *frag = NULL; + return NULL; + } + + ip6 = frent->fr_ip6; + ip6->ip6_nxt = (*frag)->fr_p; + ip6->ip6_plen = htons(off); + ip6->ip6_src = (*frag)->fr_srcx.v6addr; + ip6->ip6_dst = (*frag)->fr_dstx.v6addr; + + if ((*frag)->fr_csum_flags != 0) { + csum = (*frag)->fr_csum; + + ADDCARRY(csum); + + m->m_pkthdr.csum_rx_val = csum; + m->m_pkthdr.csum_rx_start = sizeof(struct ip6_hdr); + m->m_pkthdr.csum_flags = (*frag)->fr_csum_flags; + } else if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) || + (m->m_pkthdr.pkt_flags & PKTF_LOOP)) { + /* loopback checksums are always OK */ + m->m_pkthdr.csum_data = 0xffff; + m->m_pkthdr.csum_flags &= ~CSUM_PARTIAL; + m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR; + } + + /* Remove from fragment queue */ + pf_remove_fragment(*frag); + *frag = NULL; + + m = frent->fr_m; + m->m_len += sizeof(struct ip6_hdr); + m->m_data -= sizeof(struct ip6_hdr); + memmove(m->m_data, ip6, sizeof(struct ip6_hdr)); + + next = LIST_NEXT(frent, fr_next); + pool_put(&pf_frent_pl, frent); + pf_nfrents--; + for (frent = next; next != NULL; frent = next) { + m2 = frent->fr_m; + + m_cat(m, m2); + next = LIST_NEXT(frent, fr_next); + pool_put(&pf_frent_pl, frent); + pf_nfrents--; + } + + /* XXX this should be done elsewhere */ + if (m->m_flags & M_PKTHDR) { + int pktlen = 0; + for (m2 = m; m2; m2 = m2->m_next) { + pktlen += m2->m_len; + } + m->m_pkthdr.len = pktlen; + } + + DPFPRINTF(("complete: 0x%llx ip6_plen %d m_pkthdr.len %d\n", + (uint64_t)VM_KERNEL_ADDRPERM(m), ntohs(ip6->ip6_plen), + m->m_pkthdr.len)); + + return m; + +drop_fragment: + /* Oops - fail safe - drop packet */ + pool_put(&pf_frent_pl, frent); + --pf_nfrents; + m_freem(m); + return NULL; +} + +static struct mbuf * +pf_frag6cache(struct mbuf **m0, struct ip6_hdr *h, struct ip6_frag *fh, + struct pf_fragment **frag, int hlen, int mff, int drop, int *nomem) +{ + struct mbuf *m = *m0; + u_int16_t plen, off, fr_max; + struct pf_frcache *frp, *fra, *cur = NULL; + int hosed = 0; + + VERIFY(*frag == NULL || !BUFFER_FRAGMENTS(*frag)); + m = *m0; + off = ntohs(fh->ip6f_offlg & IP6F_OFF_MASK); + plen = ntohs(h->ip6_plen) - (hlen - sizeof *h); + + /* + * Apple Modification: dimambro@apple.com. The hlen, being passed + * into this function Includes all the headers associated with + * the packet, and may include routing headers, so to get to + * the data payload as stored in the original IPv6 header we need + * to subtract al those headers and the IP header. + * + * The 'max' local variable should also contain the offset from the start + * of the reassembled packet to the octet just past the end of the octets + * in the current fragment where: + * - 'off' is the offset from the start of the reassembled packet to the + * first octet in the fragment, + * - 'plen' is the length of the "payload data length" Excluding all the + * IPv6 headers of the fragment. + * - 'hlen' is computed in pf_normalize_ip6() as the offset from the start + * of the IPv6 packet to the beginning of the data. + */ + fr_max = off + plen; + + DPFPRINTF(("0x%llx plen %u off %u fr_max %u\n", + (uint64_t)VM_KERNEL_ADDRPERM(m), plen, off, fr_max)); + + /* Create a new range queue for this packet */ + if (*frag == NULL) { + *frag = pool_get(&pf_cache_pl, PR_NOWAIT); + if (*frag == NULL) { + pf_flush_fragments(); + *frag = pool_get(&pf_cache_pl, PR_NOWAIT); + if (*frag == NULL) { + goto no_mem; + } + } + + /* Get an entry for the queue */ + cur = pool_get(&pf_cent_pl, PR_NOWAIT); + if (cur == NULL) { + pool_put(&pf_cache_pl, *frag); + *frag = NULL; + goto no_mem; + } + pf_ncache++; + + (*frag)->fr_flags = PFFRAG_NOBUFFER; + (*frag)->fr_max = 0; + (*frag)->fr_af = AF_INET6; + (*frag)->fr_srcx.v6addr = h->ip6_src; + (*frag)->fr_dstx.v6addr = h->ip6_dst; + (*frag)->fr_p = fh->ip6f_nxt; + (*frag)->fr_id6 = fh->ip6f_ident; + (*frag)->fr_timeout = pf_time_second(); + + cur->fr_off = off; + cur->fr_end = fr_max; + LIST_INIT(&(*frag)->fr_cache); + LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next); + + RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag); + TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next); + + DPFPRINTF(("frag6cache[%d]: new %d-%d\n", ntohl(fh->ip6f_ident), + off, fr_max)); + + goto pass; + } + + /* + * Find a fragment after the current one: + * - off contains the real shifted offset. + */ + frp = NULL; + LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) { + if (fra->fr_off > off) { + break; + } + frp = fra; + } + + VERIFY(frp != NULL || fra != NULL); + + if (frp != NULL) { + int precut; + + precut = frp->fr_end - off; + if (precut >= plen) { + /* Fragment is entirely a duplicate */ + DPFPRINTF(("frag6cache[%u]: dead (%d-%d) %d-%d\n", + ntohl(fh->ip6f_ident), frp->fr_off, frp->fr_end, + off, fr_max)); + goto drop_fragment; + } + if (precut == 0) { + /* They are adjacent. Fixup cache entry */ + DPFPRINTF(("frag6cache[%u]: adjacent (%d-%d) %d-%d\n", + ntohl(fh->ip6f_ident), frp->fr_off, frp->fr_end, + off, fr_max)); + frp->fr_end = fr_max; + } else if (precut > 0) { + /* The first part of this payload overlaps with a + * fragment that has already been passed. + * Need to trim off the first part of the payload. + * But to do so easily, we need to create another + * mbuf to throw the original header into. + */ + + DPFPRINTF(("frag6cache[%u]: chop %d (%d-%d) %d-%d\n", + ntohl(fh->ip6f_ident), precut, frp->fr_off, + frp->fr_end, off, fr_max)); + + off += precut; + fr_max -= precut; + /* Update the previous frag to encompass this one */ + frp->fr_end = fr_max; + + if (!drop) { + /* XXX Optimization opportunity + * This is a very heavy way to trim the payload. + * we could do it much faster by diddling mbuf + * internals but that would be even less legible + * than this mbuf magic. For my next trick, + * I'll pull a rabbit out of my laptop. + */ + *m0 = m_copym(m, 0, hlen, M_NOWAIT); + if (*m0 == NULL) { + goto no_mem; + } + VERIFY((*m0)->m_next == NULL); + m_adj(m, precut + hlen); + m_cat(*m0, m); + m = *m0; + if (m->m_flags & M_PKTHDR) { + int pktlen = 0; + struct mbuf *t; + for (t = m; t; t = t->m_next) { + pktlen += t->m_len; + } + m->m_pkthdr.len = pktlen; + } + + h = mtod(m, struct ip6_hdr *); + + VERIFY((int)m->m_len == + ntohs(h->ip6_plen) - precut); + fh->ip6f_offlg &= ~IP6F_OFF_MASK; + fh->ip6f_offlg |= + htons(ntohs(fh->ip6f_offlg & IP6F_OFF_MASK) + + (precut >> 3)); + h->ip6_plen = htons(ntohs(h->ip6_plen) - + precut); + } else { + hosed++; + } + } else { + /* There is a gap between fragments */ + + DPFPRINTF(("frag6cache[%u]: gap %d (%d-%d) %d-%d\n", + ntohl(fh->ip6f_ident), -precut, frp->fr_off, + frp->fr_end, off, fr_max)); + + cur = pool_get(&pf_cent_pl, PR_NOWAIT); + if (cur == NULL) { goto no_mem; + } pf_ncache++; cur->fr_off = off; @@ -721,21 +1620,22 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, } if (fra != NULL) { - int aftercut; - int merge = 0; + int aftercut; + int merge = 0; aftercut = fr_max - fra->fr_off; if (aftercut == 0) { /* Adjacent fragments */ - DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n", - h->ip_id, off, fr_max, fra->fr_off, fra->fr_end)); + DPFPRINTF(("frag6cache[%u]: adjacent %d-%d (%d-%d)\n", + ntohl(fh->ip6f_ident), off, fr_max, fra->fr_off, + fra->fr_end)); fra->fr_off = off; merge = 1; } else if (aftercut > 0) { /* Need to chop off the tail of this fragment */ - DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n", - h->ip_id, aftercut, off, fr_max, fra->fr_off, - fra->fr_end)); + DPFPRINTF(("frag6cache[%u]: chop %d %d-%d (%d-%d)\n", + ntohl(fh->ip6f_ident), aftercut, off, fr_max, + fra->fr_off, fra->fr_end)); fra->fr_off = off; fr_max -= aftercut; @@ -744,28 +1644,31 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, if (!drop) { m_adj(m, -aftercut); if (m->m_flags & M_PKTHDR) { - int plen = 0; + int pktlen = 0; struct mbuf *t; - for (t = m; t; t = t->m_next) - plen += t->m_len; - m->m_pkthdr.len = plen; + for (t = m; t; t = t->m_next) { + pktlen += t->m_len; + } + m->m_pkthdr.len = pktlen; } - h = mtod(m, struct ip *); + h = mtod(m, struct ip6_hdr *); VERIFY((int)m->m_len == - ntohs(h->ip_len) - aftercut); - h->ip_len = htons(ntohs(h->ip_len) - aftercut); + ntohs(h->ip6_plen) - aftercut); + h->ip6_plen = + htons(ntohs(h->ip6_plen) - aftercut); } else { hosed++; } } else if (frp == NULL) { /* There is a gap between fragments */ - DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n", - h->ip_id, -aftercut, off, fr_max, fra->fr_off, - fra->fr_end)); + DPFPRINTF(("frag6cache[%u]: gap %d %d-%d (%d-%d)\n", + ntohl(fh->ip6f_ident), -aftercut, off, fr_max, + fra->fr_off, fra->fr_end)); cur = pool_get(&pf_cent_pl, PR_NOWAIT); - if (cur == NULL) + if (cur == NULL) { goto no_mem; + } pf_ncache++; cur->fr_off = off; @@ -773,34 +1676,33 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, LIST_INSERT_BEFORE(fra, cur, fr_next); } - /* Need to glue together two separate fragment descriptors */ if (merge) { if (cur && fra->fr_off <= cur->fr_end) { /* Need to merge in a previous 'cur' */ - DPFPRINTF(("fragcache[%d]: adjacent(merge " + DPFPRINTF(("frag6cache[%u]: adjacent(merge " "%d-%d) %d-%d (%d-%d)\n", - h->ip_id, cur->fr_off, cur->fr_end, off, - fr_max, fra->fr_off, fra->fr_end)); + ntohl(fh->ip6f_ident), cur->fr_off, + cur->fr_end, off, fr_max, fra->fr_off, + fra->fr_end)); fra->fr_off = cur->fr_off; LIST_REMOVE(cur, fr_next); pool_put(&pf_cent_pl, cur); pf_ncache--; cur = NULL; - } else if (frp && fra->fr_off <= frp->fr_end) { /* Need to merge in a modified 'frp' */ VERIFY(cur == NULL); - DPFPRINTF(("fragcache[%d]: adjacent(merge " + DPFPRINTF(("frag6cache[%u]: adjacent(merge " "%d-%d) %d-%d (%d-%d)\n", - h->ip_id, frp->fr_off, frp->fr_end, off, - fr_max, fra->fr_off, fra->fr_end)); + ntohl(fh->ip6f_ident), frp->fr_off, + frp->fr_end, off, fr_max, fra->fr_off, + fra->fr_end)); fra->fr_off = frp->fr_off; LIST_REMOVE(frp, fr_next); pool_put(&pf_cent_pl, frp); pf_ncache--; frp = NULL; - } } } @@ -814,109 +1716,128 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, goto drop_fragment; } - pass: /* Update maximum data size */ - if ((*frag)->fr_max < fr_max) + if ((*frag)->fr_max < fr_max) { (*frag)->fr_max = fr_max; + } /* This is the last segment */ - if (!mff) + if (!mff) { (*frag)->fr_flags |= PFFRAG_SEENLAST; + } /* Check if we are completely reassembled */ if (((*frag)->fr_flags & PFFRAG_SEENLAST) && LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 && LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) { /* Remove from fragment queue */ - DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id, - (*frag)->fr_max)); + DPFPRINTF(("frag6cache[%u]: done 0-%d\n", + ntohl(fh->ip6f_ident), (*frag)->fr_max)); pf_free_fragment(*frag); *frag = NULL; } - return (m); + return m; no_mem: *nomem = 1; /* Still need to pay attention to !IP_MF */ - if (!mff && *frag != NULL) + if (!mff && *frag != NULL) { (*frag)->fr_flags |= PFFRAG_SEENLAST; + } m_freem(m); - return (NULL); + return NULL; drop_fragment: /* Still need to pay attention to !IP_MF */ - if (!mff && *frag != NULL) + if (!mff && *frag != NULL) { (*frag)->fr_flags |= PFFRAG_SEENLAST; + } if (drop) { /* This fragment has been deemed bad. Don't reass */ - if (((*frag)->fr_flags & PFFRAG_DROP) == 0) - DPFPRINTF(("fragcache[%d]: dropping overall fragment\n", - h->ip_id)); + if (((*frag)->fr_flags & PFFRAG_DROP) == 0) { + DPFPRINTF(("frag6cache[%u]: dropping overall fragment\n", + ntohl(fh->ip6f_ident))); + } (*frag)->fr_flags |= PFFRAG_DROP; } m_freem(m); - return (NULL); + return NULL; } int -pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, +pf_normalize_ip(pbuf_t *pbuf, int dir, struct pfi_kif *kif, u_short *reason, struct pf_pdesc *pd) { - struct mbuf *m = *m0; - struct pf_rule *r; - struct pf_frent *frent; - struct pf_fragment *frag = NULL; - struct ip *h = mtod(m, struct ip *); - int mff = (ntohs(h->ip_off) & IP_MF); - int hlen = h->ip_hl << 2; - u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; - u_int16_t fr_max; - int ip_len; - int ip_off; + struct mbuf *m; + struct pf_rule *r; + struct pf_frent *frent; + struct pf_fragment *frag = NULL; + struct ip *h = pbuf->pb_data; + int mff = (ntohs(h->ip_off) & IP_MF); + int hlen = h->ip_hl << 2; + u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; + u_int16_t fr_max; + int ip_len; + int ip_off; + int asd = 0; + struct pf_ruleset *ruleset = NULL; + struct ifnet *ifp = pbuf->pb_ifp; r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); while (r != NULL) { r->evaluations++; - if (pfi_kif_match(r->kif, kif) == r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) { r = r->skip[PF_SKIP_IFP].ptr; - else if (r->direction && r->direction != dir) + } else if (r->direction && r->direction != dir) { r = r->skip[PF_SKIP_DIR].ptr; - else if (r->af && r->af != AF_INET) + } else if (r->af && r->af != AF_INET) { r = r->skip[PF_SKIP_AF].ptr; - else if (r->proto && r->proto != h->ip_p) + } else if (r->proto && r->proto != h->ip_p) { r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, + } else if (PF_MISMATCHAW(&r->src.addr, (struct pf_addr *)&h->ip_src.s_addr, AF_INET, - r->src.neg, kif)) + r->src.neg, kif)) { r = r->skip[PF_SKIP_SRC_ADDR].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, + } else if (PF_MISMATCHAW(&r->dst.addr, (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, - r->dst.neg, NULL)) + r->dst.neg, NULL)) { r = r->skip[PF_SKIP_DST_ADDR].ptr; - else + } else { + if (r->anchor == NULL) { + break; + } else { + pf_step_into_anchor(&asd, &ruleset, + PF_RULESET_SCRUB, &r, NULL, NULL); + } + } + if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_SCRUB, &r, NULL, NULL)) { break; + } } - if (r == NULL || r->action == PF_NOSCRUB) - return (PF_PASS); - else { + if (r == NULL || r->action == PF_NOSCRUB) { + return PF_PASS; + } else { r->packets[dir == PF_OUT]++; r->bytes[dir == PF_OUT] += pd->tot_len; } /* Check for illegal packets */ - if (hlen < (int)sizeof (struct ip)) + if (hlen < (int)sizeof(struct ip)) { goto drop; + } - if (hlen > ntohs(h->ip_len)) + if (hlen > ntohs(h->ip_len)) { goto drop; + } /* Clear IP_DF if the rule uses the no-df option */ if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) { @@ -927,8 +1848,9 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, } /* We will need other tests here */ - if (!fragoff && !mff) + if (!fragoff && !mff) { goto no_fragment; + } /* * We're dealing with a fragment now. Don't allow fragments @@ -956,55 +1878,75 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, } fr_max = fragoff + ip_len; - if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) { + if ((r->rule_flag & (PFRULE_FRAGCROP | PFRULE_FRAGDROP)) == 0) { /* Fully buffer all of the fragments */ - frag = pf_find_fragment(h, &pf_frag_tree); - + frag = pf_find_fragment_by_ipv4_header(h, &pf_frag_tree); /* Check if we saw the last fragment already */ if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && - fr_max > frag->fr_max) + fr_max > frag->fr_max) { goto bad; + } + + if ((m = pbuf_to_mbuf(pbuf, TRUE)) == NULL) { + REASON_SET(reason, PFRES_MEMORY); + return PF_DROP; + } + + VERIFY(!pbuf_is_valid(pbuf)); + + /* Restore iph pointer after pbuf_to_mbuf() */ + h = mtod(m, struct ip *); /* Get an entry for the fragment queue */ frent = pool_get(&pf_frent_pl, PR_NOWAIT); if (frent == NULL) { REASON_SET(reason, PFRES_MEMORY); - return (PF_DROP); + m_freem(m); + return PF_DROP; } pf_nfrents++; frent->fr_ip = h; frent->fr_m = m; /* Might return a completely reassembled mbuf, or NULL */ - DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, - fr_max)); - *m0 = m = pf_reassemble(m0, &frag, frent, mff); + DPFPRINTF(("reass IPv4 frag %d @ %d-%d\n", ntohs(h->ip_id), + fragoff, fr_max)); + m = pf_reassemble(m, &frag, frent, mff); + + if (m == NULL) { + return PF_DROP; + } - if (m == NULL) - return (PF_DROP); + VERIFY(m->m_flags & M_PKTHDR); + pbuf_init_mbuf(pbuf, m, ifp); /* use mtag from concatenated mbuf chain */ - pd->pf_mtag = pf_find_mtag(m); -#ifdef DIAGNOSTIC + pd->pf_mtag = pf_find_mtag_pbuf(pbuf); +#if 0 +// SCW: This check is superfluous +#if DIAGNOSTIC if (pd->pf_mtag == NULL) { printf("%s: pf_find_mtag returned NULL(1)\n", __func__); if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) { m_freem(m); - *m0 = NULL; + m = NULL; goto no_mem; } } #endif - if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) - goto drop; +#endif h = mtod(m, struct ip *); + + if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) { + goto drop; + } } else { /* non-buffering fragment cache (drops or masks overlaps) */ - int nomem = 0; + int nomem = 0; - if (dir == PF_OUT && (pd->pf_mtag->flags & PF_TAG_FRAGCACHE)) { + if (dir == PF_OUT && (pd->pf_mtag->pftag_flags & PF_TAG_FRAGCACHE)) { /* * Already passed the fragment cache in the * input direction. If we continued, it would @@ -1013,41 +1955,63 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, goto fragment_pass; } - frag = pf_find_fragment(h, &pf_cache_tree); + frag = pf_find_fragment_by_ipv4_header(h, &pf_cache_tree); /* Check if we saw the last fragment already */ if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && fr_max > frag->fr_max) { - if (r->rule_flag & PFRULE_FRAGDROP) + if (r->rule_flag & PFRULE_FRAGDROP) { frag->fr_flags |= PFFRAG_DROP; + } goto bad; } - *m0 = m = pf_fragcache(m0, h, &frag, mff, + if ((m = pbuf_to_mbuf(pbuf, TRUE)) == NULL) { + REASON_SET(reason, PFRES_MEMORY); + goto bad; + } + + VERIFY(!pbuf_is_valid(pbuf)); + + /* Restore iph pointer after pbuf_to_mbuf() */ + h = mtod(m, struct ip *); + + m = pf_fragcache(&m, h, &frag, mff, (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem); if (m == NULL) { - if (nomem) + // Note: pf_fragcache() has already m_freem'd the mbuf + if (nomem) { goto no_mem; + } goto drop; } + VERIFY(m->m_flags & M_PKTHDR); + pbuf_init_mbuf(pbuf, m, ifp); + /* use mtag from copied and trimmed mbuf chain */ - pd->pf_mtag = pf_find_mtag(m); -#ifdef DIAGNOSTIC + pd->pf_mtag = pf_find_mtag_pbuf(pbuf); +#if 0 +// SCW: This check is superfluous +#if DIAGNOSTIC if (pd->pf_mtag == NULL) { printf("%s: pf_find_mtag returned NULL(2)\n", __func__); if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) { m_freem(m); - *m0 = NULL; + m = NULL; goto no_mem; } } #endif - if (dir == PF_IN) - pd->pf_mtag->flags |= PF_TAG_FRAGCACHE; +#endif + if (dir == PF_IN) { + pd->pf_mtag->pftag_flags |= PF_TAG_FRAGCACHE; + } - if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) + if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) { goto drop; + } + goto fragment_pass; } @@ -1067,18 +2031,21 @@ no_fragment: h->ip_ttl = r->min_ttl; h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); } -#if RANDOM_IP_ID if (r->rule_flag & PFRULE_RANDOMID) { - u_int16_t ip_id = h->ip_id; + u_int16_t oip_id = h->ip_id; - h->ip_id = ip_randomid(); - h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0); + if (rfc6864 && IP_OFF_IS_ATOMIC(ntohs(h->ip_off))) { + h->ip_id = 0; + } else { + h->ip_id = ip_randomid(); + } + h->ip_sum = pf_cksum_fixup(h->ip_sum, oip_id, h->ip_id, 0); } -#endif /* RANDOM_IP_ID */ - if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) + if ((r->rule_flag & (PFRULE_FRAGCROP | PFRULE_FRAGDROP)) == 0) { pd->flags |= PFDESC_IP_REAS; + } - return (PF_PASS); + return PF_PASS; fragment_pass: /* Enforce a minimum ttl, may cause endless packet loops */ @@ -1088,173 +2055,210 @@ fragment_pass: h->ip_ttl = r->min_ttl; h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); } - if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) + if ((r->rule_flag & (PFRULE_FRAGCROP | PFRULE_FRAGDROP)) == 0) { pd->flags |= PFDESC_IP_REAS; - return (PF_PASS); + } + return PF_PASS; no_mem: REASON_SET(reason, PFRES_MEMORY); - if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, + if (r != NULL && r->log && pbuf_is_valid(pbuf)) { + PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, *reason, r, NULL, NULL, pd); - return (PF_DROP); + } + return PF_DROP; drop: REASON_SET(reason, PFRES_NORM); - if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, + if (r != NULL && r->log && pbuf_is_valid(pbuf)) { + PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, *reason, r, NULL, NULL, pd); - return (PF_DROP); + } + return PF_DROP; bad: - DPFPRINTF(("dropping bad fragment\n")); + DPFPRINTF(("dropping bad IPv4 fragment\n")); /* Free associated fragments */ - if (frag != NULL) + if (frag != NULL) { pf_free_fragment(frag); + } REASON_SET(reason, PFRES_FRAG); - if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd); + if (r != NULL && r->log && pbuf_is_valid(pbuf)) { + PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, *reason, r, NULL, NULL, pd); + } - return (PF_DROP); + return PF_DROP; } #if INET6 int -pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, +pf_normalize_ip6(pbuf_t *pbuf, int dir, struct pfi_kif *kif, u_short *reason, struct pf_pdesc *pd) { - struct mbuf *m = *m0; - struct pf_rule *r; - struct ip6_hdr *h = mtod(m, struct ip6_hdr *); - int off; - struct ip6_ext ext; + struct mbuf *m; + struct pf_rule *r; + struct ip6_hdr *h = pbuf->pb_data; + int off; + struct ip6_ext ext; /* adi XXX */ #if 0 - struct ip6_opt opt; - struct ip6_opt_jumbo jumbo; - int optend; - int ooff; + struct ip6_opt opt; + struct ip6_opt_jumbo jumbo; + int optend; + int ooff; #endif - struct ip6_frag frag; - u_int32_t jumbolen = 0, plen; - u_int16_t fragoff = 0; - u_int8_t proto; - int terminal; + struct ip6_frag frag; + u_int32_t jumbolen = 0, plen; + u_int16_t fragoff = 0; + u_int8_t proto; + int terminal; + struct pf_frent *frent; + struct pf_fragment *pff = NULL; + int mff = 0, rh_cnt = 0; + u_int16_t fr_max; + int asd = 0; + struct pf_ruleset *ruleset = NULL; + struct ifnet *ifp = pbuf->pb_ifp; r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); while (r != NULL) { r->evaluations++; - if (pfi_kif_match(r->kif, kif) == r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) { r = r->skip[PF_SKIP_IFP].ptr; - else if (r->direction && r->direction != dir) + } else if (r->direction && r->direction != dir) { r = r->skip[PF_SKIP_DIR].ptr; - else if (r->af && r->af != AF_INET6) + } else if (r->af && r->af != AF_INET6) { r = r->skip[PF_SKIP_AF].ptr; + } #if 0 /* header chain! */ - else if (r->proto && r->proto != h->ip6_nxt) + else if (r->proto && r->proto != h->ip6_nxt) { r = r->skip[PF_SKIP_PROTO].ptr; + } #endif else if (PF_MISMATCHAW(&r->src.addr, - (struct pf_addr *)&h->ip6_src, AF_INET6, - r->src.neg, kif)) + (struct pf_addr *)(uintptr_t)&h->ip6_src, AF_INET6, + r->src.neg, kif)) { r = r->skip[PF_SKIP_SRC_ADDR].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, - (struct pf_addr *)&h->ip6_dst, AF_INET6, - r->dst.neg, NULL)) + } else if (PF_MISMATCHAW(&r->dst.addr, + (struct pf_addr *)(uintptr_t)&h->ip6_dst, AF_INET6, + r->dst.neg, NULL)) { r = r->skip[PF_SKIP_DST_ADDR].ptr; - else + } else { + if (r->anchor == NULL) { + break; + } else { + pf_step_into_anchor(&asd, &ruleset, + PF_RULESET_SCRUB, &r, NULL, NULL); + } + } + if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_SCRUB, &r, NULL, NULL)) { break; + } } - if (r == NULL || r->action == PF_NOSCRUB) - return (PF_PASS); - else { + if (r == NULL || r->action == PF_NOSCRUB) { + return PF_PASS; + } else { r->packets[dir == PF_OUT]++; r->bytes[dir == PF_OUT] += pd->tot_len; } /* Check for illegal packets */ - if ((int)(sizeof (struct ip6_hdr) + IPV6_MAXPACKET) < m->m_pkthdr.len) + if ((uint32_t)(sizeof(struct ip6_hdr) + IPV6_MAXPACKET) < + pbuf->pb_packet_len) { goto drop; + } - off = sizeof (struct ip6_hdr); + off = sizeof(struct ip6_hdr); proto = h->ip6_nxt; terminal = 0; do { + pd->proto = proto; switch (proto) { case IPPROTO_FRAGMENT: goto fragment; - break; case IPPROTO_AH: case IPPROTO_ROUTING: case IPPROTO_DSTOPTS: - if (!pf_pull_hdr(m, off, &ext, sizeof (ext), NULL, - NULL, AF_INET6)) + if (!pf_pull_hdr(pbuf, off, &ext, sizeof(ext), NULL, + NULL, AF_INET6)) { goto shortpkt; -#ifndef NO_APPLE_EXTENSIONS + } /* * + * Multiple routing headers not allowed. * Routing header type zero considered harmful. */ if (proto == IPPROTO_ROUTING) { const struct ip6_rthdr *rh = (const struct ip6_rthdr *)&ext; - if (rh->ip6r_type == IPV6_RTHDR_TYPE_0) + if (rh_cnt++) { goto drop; - } - else -#endif - if (proto == IPPROTO_AH) + } + if (rh->ip6r_type == IPV6_RTHDR_TYPE_0) { + goto drop; + } + } else if (proto == IPPROTO_AH) { off += (ext.ip6e_len + 2) * 4; - else + } else { off += (ext.ip6e_len + 1) * 8; + } proto = ext.ip6e_nxt; break; case IPPROTO_HOPOPTS: /* adi XXX */ #if 0 - if (!pf_pull_hdr(m, off, &ext, sizeof (ext), NULL, - NULL, AF_INET6)) + if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL, + NULL, AF_INET6)) { goto shortpkt; + } optend = off + (ext.ip6e_len + 1) * 8; - ooff = off + sizeof (ext); + ooff = off + sizeof(ext); do { if (!pf_pull_hdr(m, ooff, &opt.ip6o_type, - sizeof (opt.ip6o_type), NULL, NULL, - AF_INET6)) + sizeof(opt.ip6o_type), NULL, NULL, + AF_INET6)) { goto shortpkt; + } if (opt.ip6o_type == IP6OPT_PAD1) { ooff++; continue; } - if (!pf_pull_hdr(m, ooff, &opt, sizeof (opt), - NULL, NULL, AF_INET6)) + if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt), + NULL, NULL, AF_INET6)) { goto shortpkt; - if (ooff + sizeof (opt) + opt.ip6o_len > optend) + } + if (ooff + sizeof(opt) + opt.ip6o_len > optend) { goto drop; + } switch (opt.ip6o_type) { case IP6OPT_JUMBO: - if (h->ip6_plen != 0) + if (h->ip6_plen != 0) { goto drop; + } if (!pf_pull_hdr(m, ooff, &jumbo, - sizeof (jumbo), NULL, NULL, - AF_INET6)) + sizeof(jumbo), NULL, NULL, + AF_INET6)) { goto shortpkt; + } memcpy(&jumbolen, jumbo.ip6oj_jumbo_len, - sizeof (jumbolen)); + sizeof(jumbolen)); jumbolen = ntohl(jumbolen); - if (jumbolen <= IPV6_MAXPACKET) + if (jumbolen <= IPV6_MAXPACKET) { goto drop; - if (sizeof (struct ip6_hdr) + - jumbolen != m->m_pkthdr.len) + } + if (sizeof(struct ip6_hdr) + + jumbolen != m->m_pkthdr.len) { goto drop; + } break; default: break; } - ooff += sizeof (opt) + opt.ip6o_len; + ooff += sizeof(opt) + opt.ip6o_len; } while (ooff < optend); off = optend; @@ -1268,159 +2272,282 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, } while (!terminal); /* jumbo payload option must be present, or plen > 0 */ - if (ntohs(h->ip6_plen) == 0) + if (ntohs(h->ip6_plen) == 0) { plen = jumbolen; - else + } else { plen = ntohs(h->ip6_plen); - if (plen == 0) + } + if (plen == 0) { goto drop; - if ((int)(sizeof (struct ip6_hdr) + plen) > m->m_pkthdr.len) + } + if ((uint32_t)(sizeof(struct ip6_hdr) + plen) > pbuf->pb_packet_len) { goto shortpkt; + } /* Enforce a minimum ttl, may cause endless packet loops */ - if (r->min_ttl && h->ip6_hlim < r->min_ttl) + if (r->min_ttl && h->ip6_hlim < r->min_ttl) { h->ip6_hlim = r->min_ttl; + } - return (PF_PASS); + return PF_PASS; fragment: - if (ntohs(h->ip6_plen) == 0 || jumbolen) + if (ntohs(h->ip6_plen) == 0 || jumbolen) { goto drop; + } plen = ntohs(h->ip6_plen); - if (!pf_pull_hdr(m, off, &frag, sizeof (frag), NULL, NULL, AF_INET6)) + if (!pf_pull_hdr(pbuf, off, &frag, sizeof(frag), NULL, NULL, AF_INET6)) { goto shortpkt; + } fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK); - if (fragoff + (plen - off - sizeof (frag)) > IPV6_MAXPACKET) + pd->proto = frag.ip6f_nxt; + mff = ntohs(frag.ip6f_offlg & IP6F_MORE_FRAG); + off += sizeof frag; + if (fragoff + (plen - off) > IPV6_MAXPACKET) { goto badfrag; + } + + fr_max = fragoff + plen - (off - sizeof(struct ip6_hdr)); +// XXX SCW: mbuf-specific +// DPFPRINTF(("0x%llx IPv6 frag plen %u mff %d off %u fragoff %u " +// "fr_max %u\n", (uint64_t)VM_KERNEL_ADDRPERM(m), plen, mff, off, +// fragoff, fr_max)); + + if ((r->rule_flag & (PFRULE_FRAGCROP | PFRULE_FRAGDROP)) == 0) { + /* Fully buffer all of the fragments */ + pd->flags |= PFDESC_IP_REAS; + + pff = pf_find_fragment_by_ipv6_header(h, &frag, + &pf_frag_tree); + + /* Check if we saw the last fragment already */ + if (pff != NULL && (pff->fr_flags & PFFRAG_SEENLAST) && + fr_max > pff->fr_max) { + goto badfrag; + } + + if ((m = pbuf_to_mbuf(pbuf, TRUE)) == NULL) { + REASON_SET(reason, PFRES_MEMORY); + return PF_DROP; + } + + /* Restore iph pointer after pbuf_to_mbuf() */ + h = mtod(m, struct ip6_hdr *); + + /* Get an entry for the fragment queue */ + frent = pool_get(&pf_frent_pl, PR_NOWAIT); + if (frent == NULL) { + REASON_SET(reason, PFRES_MEMORY); + return PF_DROP; + } + + pf_nfrents++; + frent->fr_ip6 = h; + frent->fr_m = m; + frent->fr_ip6f_opt = frag; + frent->fr_ip6f_hlen = off; + + /* Might return a completely reassembled mbuf, or NULL */ + DPFPRINTF(("reass IPv6 frag %d @ %d-%d\n", + ntohl(frag.ip6f_ident), fragoff, fr_max)); + m = pf_reassemble6(&m, &pff, frent, mff); + + if (m == NULL) { + return PF_DROP; + } + + pbuf_init_mbuf(pbuf, m, ifp); + h = pbuf->pb_data; + + if (pff != NULL && (pff->fr_flags & PFFRAG_DROP)) { + goto drop; + } + } else if (dir == PF_IN || !(pd->pf_mtag->pftag_flags & PF_TAG_FRAGCACHE)) { + /* non-buffering fragment cache (overlaps: see RFC 5722) */ + int nomem = 0; + + pff = pf_find_fragment_by_ipv6_header(h, &frag, + &pf_cache_tree); + + /* Check if we saw the last fragment already */ + if (pff != NULL && (pff->fr_flags & PFFRAG_SEENLAST) && + fr_max > pff->fr_max) { + if (r->rule_flag & PFRULE_FRAGDROP) { + pff->fr_flags |= PFFRAG_DROP; + } + goto badfrag; + } + + if ((m = pbuf_to_mbuf(pbuf, TRUE)) == NULL) { + goto no_mem; + } + + /* Restore iph pointer after pbuf_to_mbuf() */ + h = mtod(m, struct ip6_hdr *); + + m = pf_frag6cache(&m, h, &frag, &pff, off, mff, + (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem); + if (m == NULL) { + // Note: pf_frag6cache() has already m_freem'd the mbuf + if (nomem) { + goto no_mem; + } + goto drop; + } + + pbuf_init_mbuf(pbuf, m, ifp); + pd->pf_mtag = pf_find_mtag_pbuf(pbuf); + h = pbuf->pb_data; + + if (dir == PF_IN) { + pd->pf_mtag->pftag_flags |= PF_TAG_FRAGCACHE; + } + + if (pff != NULL && (pff->fr_flags & PFFRAG_DROP)) { + goto drop; + } + } + + /* Enforce a minimum ttl, may cause endless packet loops */ + if (r->min_ttl && h->ip6_hlim < r->min_ttl) { + h->ip6_hlim = r->min_ttl; + } + return PF_PASS; - /* do something about it */ - /* remember to set pd->flags |= PFDESC_IP_REAS */ - return (PF_PASS); +no_mem: + REASON_SET(reason, PFRES_MEMORY); + goto dropout; shortpkt: REASON_SET(reason, PFRES_SHORT); - if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, - NULL, NULL, pd); - return (PF_DROP); + goto dropout; drop: REASON_SET(reason, PFRES_NORM); - if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, - NULL, NULL, pd); - return (PF_DROP); + goto dropout; badfrag: + DPFPRINTF(("dropping bad IPv6 fragment\n")); REASON_SET(reason, PFRES_FRAG); - if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, - NULL, NULL, pd); - return (PF_DROP); + goto dropout; + +dropout: + if (pff != NULL) { + pf_free_fragment(pff); + } + if (r != NULL && r->log && pbuf_is_valid(pbuf)) { + PFLOG_PACKET(kif, h, pbuf, AF_INET6, dir, *reason, r, NULL, NULL, pd); + } + return PF_DROP; } #endif /* INET6 */ int -pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, +pf_normalize_tcp(int dir, struct pfi_kif *kif, pbuf_t *pbuf, int ipoff, int off, void *h, struct pf_pdesc *pd) { #pragma unused(ipoff, h) - struct pf_rule *r, *rm = NULL; - struct tcphdr *th = pd->hdr.tcp; - int rewrite = 0; - u_short reason; - u_int8_t flags; - sa_family_t af = pd->af; -#ifndef NO_APPLE_EXTENSIONS + struct pf_rule *r, *rm = NULL; + struct tcphdr *th = pd->hdr.tcp; + int rewrite = 0; + int asd = 0; + u_short reason; + u_int8_t flags; + sa_family_t af = pd->af; + struct pf_ruleset *ruleset = NULL; union pf_state_xport sxport, dxport; sxport.port = th->th_sport; dxport.port = th->th_dport; -#endif r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); while (r != NULL) { r->evaluations++; - if (pfi_kif_match(r->kif, kif) == r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) { r = r->skip[PF_SKIP_IFP].ptr; - else if (r->direction && r->direction != dir) + } else if (r->direction && r->direction != dir) { r = r->skip[PF_SKIP_DIR].ptr; - else if (r->af && r->af != af) + } else if (r->af && r->af != af) { r = r->skip[PF_SKIP_AF].ptr; - else if (r->proto && r->proto != pd->proto) + } else if (r->proto && r->proto != pd->proto) { r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, - r->src.neg, kif)) + } else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, + r->src.neg, kif)) { r = r->skip[PF_SKIP_SRC_ADDR].ptr; -#ifndef NO_APPLE_EXTENSIONS - else if (r->src.xport.range.op && + } else if (r->src.xport.range.op && !pf_match_xport(r->src.xport.range.op, r->proto_variant, - &r->src.xport, &sxport)) -#else - else if (r->src.port_op && !pf_match_port(r->src.port_op, - r->src.port[0], r->src.port[1], th->th_sport)) -#endif + &r->src.xport, &sxport)) { r = r->skip[PF_SKIP_SRC_PORT].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, - r->dst.neg, NULL)) + } else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, + r->dst.neg, NULL)) { r = r->skip[PF_SKIP_DST_ADDR].ptr; -#ifndef NO_APPLE_EXTENSIONS - else if (r->dst.xport.range.op && + } else if (r->dst.xport.range.op && !pf_match_xport(r->dst.xport.range.op, r->proto_variant, - &r->dst.xport, &dxport)) -#else - else if (r->dst.port_op && !pf_match_port(r->dst.port_op, - r->dst.port[0], r->dst.port[1], th->th_dport)) -#endif + &r->dst.xport, &dxport)) { r = r->skip[PF_SKIP_DST_PORT].ptr; - else if (r->os_fingerprint != PF_OSFP_ANY && - !pf_osfp_match(pf_osfp_fingerprint(pd, m, off, th), - r->os_fingerprint)) + } else if (r->os_fingerprint != PF_OSFP_ANY && + !pf_osfp_match(pf_osfp_fingerprint(pd, pbuf, off, th), + r->os_fingerprint)) { r = TAILQ_NEXT(r, entries); - else { - rm = r; + } else { + if (r->anchor == NULL) { + rm = r; + break; + } else { + pf_step_into_anchor(&asd, &ruleset, + PF_RULESET_SCRUB, &r, NULL, NULL); + } + } + if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_SCRUB, &r, NULL, NULL)) { break; } } - if (rm == NULL || rm->action == PF_NOSCRUB) - return (PF_PASS); - else { + if (rm == NULL || rm->action == PF_NOSCRUB) { + return PF_PASS; + } else { r->packets[dir == PF_OUT]++; r->bytes[dir == PF_OUT] += pd->tot_len; } - if (rm->rule_flag & PFRULE_REASSEMBLE_TCP) + if (rm->rule_flag & PFRULE_REASSEMBLE_TCP) { pd->flags |= PFDESC_TCP_NORM; + } flags = th->th_flags; if (flags & TH_SYN) { /* Illegal packet */ - if (flags & TH_RST) + if (flags & TH_RST) { goto tcp_drop; + } - if (flags & TH_FIN) + if (flags & TH_FIN) { flags &= ~TH_FIN; + } } else { /* Illegal packet */ - if (!(flags & (TH_ACK|TH_RST))) + if (!(flags & (TH_ACK | TH_RST))) { goto tcp_drop; + } } if (!(flags & TH_ACK)) { /* These flags are only valid if ACK is set */ - if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG)) + if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG)) { goto tcp_drop; + } } /* Check for illegal header length */ - if (th->th_off < (sizeof (struct tcphdr) >> 2)) + if (th->th_off < (sizeof(struct tcphdr) >> 2)) { goto tcp_drop; + } /* If flags changed, or reserved data set, then adjust */ if (flags != th->th_flags || th->th_x2 != 0) { - u_int16_t ov, nv; + u_int16_t ov, nv; ov = *(u_int16_t *)(&th->th_ack + 1); th->th_flags = flags; @@ -1439,49 +2566,42 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, } /* copy back packet headers if we sanitized */ -#ifndef NO_APPLE_EXTENSIONS /* Process options */ if (r->max_mss) { - int rv = pf_normalize_tcpopt(r, dir, kif, pd, m, th, off, + int rv = pf_normalize_tcpopt(r, dir, kif, pd, pbuf, th, off, &rewrite); - if (rv == PF_DROP) + if (rv == PF_DROP) { return rv; - m = pd->mp; + } + pbuf = pd->mp; } if (rewrite) { - struct mbuf *mw = pf_lazy_makewritable(pd, m, - off + sizeof (*th)); - if (!mw) { + if (pf_lazy_makewritable(pd, pbuf, + off + sizeof(*th)) == NULL) { REASON_SET(&reason, PFRES_MEMORY); - if (r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, + if (r->log) { + PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, reason, r, 0, 0, pd); + } return PF_DROP; } - m_copyback(mw, off, sizeof (*th), th); + pbuf_copy_back(pbuf, off, sizeof(*th), th); } -#else - /* Process options */ - if (r->max_mss && pf_normalize_tcpopt(r, m, th, off, pd->af)) - rewrite = 1; - - if (rewrite) - m_copyback(m, off, sizeof (*th), th); -#endif - return (PF_PASS); + return PF_PASS; tcp_drop: REASON_SET(&reason, PFRES_NORM); - if (rm != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL, pd); - return (PF_DROP); + if (rm != NULL && r->log) { + PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, reason, r, NULL, NULL, pd); + } + return PF_DROP; } int -pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, +pf_normalize_tcp_init(pbuf_t *pbuf, int off, struct pf_pdesc *pd, struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst) { #pragma unused(dst) @@ -1492,21 +2612,22 @@ pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, VERIFY(src->scrub == NULL); src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); - if (src->scrub == NULL) - return (1); - bzero(src->scrub, sizeof (*src->scrub)); + if (src->scrub == NULL) { + return 1; + } + bzero(src->scrub, sizeof(*src->scrub)); switch (pd->af) { #if INET case AF_INET: { - struct ip *h = mtod(m, struct ip *); + struct ip *h = pbuf->pb_data; src->scrub->pfss_ttl = h->ip_ttl; break; } #endif /* INET */ #if INET6 case AF_INET6: { - struct ip6_hdr *h = mtod(m, struct ip6_hdr *); + struct ip6_hdr *h = pbuf->pb_data; src->scrub->pfss_ttl = h->ip6_hlim; break; } @@ -1518,19 +2639,20 @@ pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, * All normalizations below are only begun if we see the start of * the connections. They must all set an enabled bit in pfss_flags */ - if ((th->th_flags & TH_SYN) == 0) - return (0); + if ((th->th_flags & TH_SYN) == 0) { + return 0; + } - if (th->th_off > (sizeof (struct tcphdr) >> 2) && src->scrub && - pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { + if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub && + pf_pull_hdr(pbuf, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { /* Diddle with TCP options */ int hlen; - opt = hdr + sizeof (struct tcphdr); - hlen = (th->th_off << 2) - sizeof (struct tcphdr); + opt = hdr + sizeof(struct tcphdr); + hlen = (th->th_off << 2) - sizeof(struct tcphdr); while (hlen >= TCPOLEN_TIMESTAMP) { switch (*opt) { - case TCPOPT_EOL: /* FALLTHROUGH */ + case TCPOPT_EOL: /* FALLTHROUGH */ case TCPOPT_NOP: opt++; hlen--; @@ -1544,15 +2666,15 @@ pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, /* note PFSS_PAWS not set yet */ memcpy(&tsval, &opt[2], - sizeof (u_int32_t)); + sizeof(u_int32_t)); memcpy(&tsecr, &opt[6], - sizeof (u_int32_t)); + sizeof(u_int32_t)); src->scrub->pfss_tsval0 = ntohl(tsval); src->scrub->pfss_tsval = ntohl(tsval); src->scrub->pfss_tsecr = ntohl(tsecr); getmicrouptime(&src->scrub->pfss_last); } - /* FALLTHROUGH */ + /* FALLTHROUGH */ default: hlen -= MAX(opt[1], 2); opt += MAX(opt[1], 2); @@ -1561,27 +2683,29 @@ pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, } } - return (0); + return 0; } void pf_normalize_tcp_cleanup(struct pf_state *state) { - if (state->src.scrub) + if (state->src.scrub) { pool_put(&pf_state_scrub_pl, state->src.scrub); - if (state->dst.scrub) + } + if (state->dst.scrub) { pool_put(&pf_state_scrub_pl, state->dst.scrub); + } /* Someday... flush the TCP segment reassembly descriptors. */ } int -pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, +pf_normalize_tcp_stateful(pbuf_t *pbuf, int off, struct pf_pdesc *pd, u_short *reason, struct tcphdr *th, struct pf_state *state, struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback) { struct timeval uptime; - u_int32_t tsval, tsecr; + u_int32_t tsval = 0, tsecr = 0; u_int tsval_from_last; u_int8_t hdr[60]; u_int8_t *opt; @@ -1599,9 +2723,10 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, #if INET case AF_INET: { if (src->scrub) { - struct ip *h = mtod(m, struct ip *); - if (h->ip_ttl > src->scrub->pfss_ttl) + struct ip *h = pbuf->pb_data; + if (h->ip_ttl > src->scrub->pfss_ttl) { src->scrub->pfss_ttl = h->ip_ttl; + } h->ip_ttl = src->scrub->pfss_ttl; } break; @@ -1610,9 +2735,10 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, #if INET6 case AF_INET6: { if (src->scrub) { - struct ip6_hdr *h = mtod(m, struct ip6_hdr *); - if (h->ip6_hlim > src->scrub->pfss_ttl) + struct ip6_hdr *h = pbuf->pb_data; + if (h->ip6_hlim > src->scrub->pfss_ttl) { src->scrub->pfss_ttl = h->ip6_hlim; + } h->ip6_hlim = src->scrub->pfss_ttl; } break; @@ -1620,17 +2746,17 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, #endif /* INET6 */ } - if (th->th_off > (sizeof (struct tcphdr) >> 2) && + if (th->th_off > (sizeof(struct tcphdr) >> 2) && ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) || (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) && - pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { + pf_pull_hdr(pbuf, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { /* Diddle with TCP options */ int hlen; - opt = hdr + sizeof (struct tcphdr); - hlen = (th->th_off << 2) - sizeof (struct tcphdr); + opt = hdr + sizeof(struct tcphdr); + hlen = (th->th_off << 2) - sizeof(struct tcphdr); while (hlen >= TCPOLEN_TIMESTAMP) { switch (*opt) { - case TCPOPT_EOL: /* FALLTHROUGH */ + case TCPOPT_EOL: /* FALLTHROUGH */ case TCPOPT_NOP: opt++; hlen--; @@ -1650,11 +2776,11 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, printf("\n"); } REASON_SET(reason, PFRES_TS); - return (PF_DROP); + return PF_DROP; } if (opt[1] >= TCPOLEN_TIMESTAMP) { memcpy(&tsval, &opt[2], - sizeof (u_int32_t)); + sizeof(u_int32_t)); if (tsval && src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) { @@ -1669,7 +2795,7 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, /* Modulate TS reply iff valid (!0) */ memcpy(&tsecr, &opt[6], - sizeof (u_int32_t)); + sizeof(u_int32_t)); if (tsecr && dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { @@ -1682,7 +2808,7 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, } got_ts = 1; } - /* FALLTHROUGH */ + /* FALLTHROUGH */ default: hlen -= MAX(opt[1], 2); opt += MAX(opt[1], 2); @@ -1691,22 +2817,15 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, } if (copyback) { /* Copyback the options, caller copys back header */ -#ifndef NO_APPLE_EXTENSIONS - int optoff = off + sizeof (*th); - int optlen = (th->th_off << 2) - sizeof (*th); - m = pf_lazy_makewritable(pd, m, optoff + optlen); - if (!m) { + int optoff = off + sizeof(*th); + int optlen = (th->th_off << 2) - sizeof(*th); + if (pf_lazy_makewritable(pd, pbuf, optoff + optlen) == + NULL) { REASON_SET(reason, PFRES_MEMORY); return PF_DROP; } *writeback = optoff + optlen; - m_copyback(m, optoff, optlen, hdr + sizeof (*th)); -#else - *writeback = 1; - m_copyback(m, off + sizeof (struct tcphdr), - (th->th_off << 2) - sizeof (struct tcphdr), hdr + - sizeof (struct tcphdr)); -#endif + pbuf_copy_back(pbuf, optoff, optlen, hdr + sizeof(*th)); } } @@ -1718,13 +2837,13 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, * TS echo check only works for the first 12 days of a connection * when the TS has exhausted half its 32bit space */ -#define TS_MAX_IDLE (24*24*60*60) -#define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */ +#define TS_MAX_IDLE (24*24*60*60) +#define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */ getmicrouptime(&uptime); if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE || - pf_time_second() - state->creation > TS_MAX_CONN)) { + pf_time_second() - state->creation > TS_MAX_CONN)) { if (pf_status.debug >= PF_DEBUG_MISC) { DPFPRINTF(("src idled out of PAWS\n")); pf_print_state(state); @@ -1819,16 +2938,17 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, * packet got delayed in transit for much longer than * this packet. */ - if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0) + if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0) { ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF]; + } /* Calculate max ticks since the last timestamp */ -#define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */ -#define TS_MICROSECS 1000000 /* microseconds per second */ +#define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */ +#define TS_MICROSECS 1000000 /* microseconds per second */ timersub(&uptime, &src->scrub->pfss_last, &delta_ts); tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ; - tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ); + tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS / TS_MAXFREQ); if ((src->state >= TCPS_ESTABLISHED && @@ -1866,11 +2986,10 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, printf("\n"); } REASON_SET(reason, PFRES_TS); - return (PF_DROP); + return PF_DROP; } /* XXX I'd really like to require tsecr but it's optional */ - } else if (!got_ts && (th->th_flags & TH_RST) == 0 && ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED) || pd->p_len > 0 || (th->th_flags & TH_SYN)) && @@ -1917,7 +3036,7 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, printf("\n"); } REASON_SET(reason, PFRES_TS); - return (PF_DROP); + return PF_DROP; } } @@ -1931,10 +3050,10 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, * packets (seen in a WWW accelerator or cache). */ if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags & - (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) { - if (got_ts) + (PFSS_TIMESTAMP | PFSS_DATA_TS | PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) { + if (got_ts) { src->scrub->pfss_flags |= PFSS_DATA_TS; - else { + } else { src->scrub->pfss_flags |= PFSS_DATA_NOTS; if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { @@ -1954,16 +3073,18 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, * Update PAWS values */ if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags & - (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) { + (PFSS_PAWS_IDLED | PFSS_TIMESTAMP))) { getmicrouptime(&src->scrub->pfss_last); if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) || - (src->scrub->pfss_flags & PFSS_PAWS) == 0) + (src->scrub->pfss_flags & PFSS_PAWS) == 0) { src->scrub->pfss_tsval = tsval; + } if (tsecr) { if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) || - (src->scrub->pfss_flags & PFSS_PAWS) == 0) + (src->scrub->pfss_flags & PFSS_PAWS) == 0) { src->scrub->pfss_tsecr = tsecr; + } if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 && (SEQ_LT(tsval, src->scrub->pfss_tsval0) || @@ -1973,80 +3094,68 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, } /* Only fully initialized after a TS gets echoed */ - if ((src->scrub->pfss_flags & PFSS_PAWS) == 0) + if ((src->scrub->pfss_flags & PFSS_PAWS) == 0) { src->scrub->pfss_flags |= PFSS_PAWS; + } } } /* I have a dream.... TCP segment reassembly.... */ - return (0); + return 0; } -#ifndef NO_APPLE_EXTENSIONS static int pf_normalize_tcpopt(struct pf_rule *r, int dir, struct pfi_kif *kif, - struct pf_pdesc *pd, struct mbuf *m, struct tcphdr *th, int off, + struct pf_pdesc *pd, pbuf_t *pbuf, struct tcphdr *th, int off, int *rewrptr) { #pragma unused(dir, kif) sa_family_t af = pd->af; -#else -static int -pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th, - int off, sa_family_t af) -{ -#endif - u_int16_t *mss; - int thoff; - int opt, cnt, optlen = 0; - int rewrite = 0; - u_char opts[MAX_TCPOPTLEN]; - u_char *optp = opts; + u_int16_t *mss; + int thoff; + int opt, cnt, optlen = 0; + int rewrite = 0; + u_char opts[MAX_TCPOPTLEN]; + u_char *optp = opts; thoff = th->th_off << 2; - cnt = thoff - sizeof (struct tcphdr); + cnt = thoff - sizeof(struct tcphdr); -#ifndef NO_APPLE_MODIFICATIONS - if (cnt > 0 && !pf_pull_hdr(m, off + sizeof (*th), opts, cnt, - NULL, NULL, af)) + if (cnt > 0 && !pf_pull_hdr(pbuf, off + sizeof(*th), opts, cnt, + NULL, NULL, af)) { return PF_DROP; -#else - if (cnt > 0 && !pf_pull_hdr(m, off + sizeof (*th), opts, cnt, - NULL, NULL, af)) - return (rewrite); -#endif + } for (; cnt > 0; cnt -= optlen, optp += optlen) { opt = optp[0]; - if (opt == TCPOPT_EOL) + if (opt == TCPOPT_EOL) { break; - if (opt == TCPOPT_NOP) + } + if (opt == TCPOPT_NOP) { optlen = 1; - else { - if (cnt < 2) + } else { + if (cnt < 2) { break; + } optlen = optp[1]; - if (optlen < 2 || optlen > cnt) + if (optlen < 2 || optlen > cnt) { break; + } } switch (opt) { case TCPOPT_MAXSEG: - mss = (u_int16_t *)(optp + 2); + mss = (u_int16_t *)(void *)(optp + 2); if ((ntohs(*mss)) > r->max_mss) { -#ifndef NO_APPLE_MODIFICATIONS /* * * Only do the TCP checksum fixup if delayed * checksum calculation will not be performed. */ - if (m->m_pkthdr.rcvif || - !(m->m_pkthdr.csum_flags & CSUM_TCP)) + if (pbuf->pb_ifp || + !(*pbuf->pb_csum_flags & CSUM_TCP)) { th->th_sum = pf_cksum_fixup(th->th_sum, *mss, htons(r->max_mss), 0); -#else - th->th_sum = pf_cksum_fixup(th->th_sum, - *mss, htons(r->max_mss), 0); -#endif + } *mss = htons(r->max_mss); rewrite = 1; } @@ -2056,30 +3165,24 @@ pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th, } } -#ifndef NO_APPLE_MODIFICATIONS if (rewrite) { - struct mbuf *mw; u_short reason; - mw = pf_lazy_makewritable(pd, pd->mp, - off + sizeof (*th) + thoff); - if (!mw) { + VERIFY(pbuf == pd->mp); + + if (pf_lazy_makewritable(pd, pd->mp, + off + sizeof(*th) + thoff) == NULL) { REASON_SET(&reason, PFRES_MEMORY); - if (r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, + if (r->log) { + PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, reason, r, 0, 0, pd); + } return PF_DROP; } *rewrptr = 1; - m_copyback(mw, off + sizeof (*th), thoff - sizeof (*th), opts); + pbuf_copy_back(pd->mp, off + sizeof(*th), thoff - sizeof(*th), opts); } return PF_PASS; -#else - if (rewrite) - m_copyback(m, off + sizeof (*th), thoff - sizeof (*th), opts); - - return (rewrite); -#endif }