git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2007-2020 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28
	29	/* $apfw: pf_norm.c,v 1.10 2008/08/28 19:10:53 jhw Exp $ */
	30	/* $OpenBSD: pf_norm.c,v 1.107 2006/04/16 00:59:52 pascoe Exp $ */
	31
	32	/*
	33	* Copyright 2001 Niels Provos <provos@citi.umich.edu>
	34	* All rights reserved.
	35	*
	36	* Redistribution and use in source and binary forms, with or without
	37	* modification, are permitted provided that the following conditions
	38	* are met:
	39	* 1. Redistributions of source code must retain the above copyright
	40	* notice, this list of conditions and the following disclaimer.
	41	* 2. Redistributions in binary form must reproduce the above copyright
	42	* notice, this list of conditions and the following disclaimer in the
	43	* documentation and/or other materials provided with the distribution.
	44	*
	45	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	46	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	47	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	48	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	49	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	50	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	51	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	52	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	53	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	54	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	55	*/
	56
	57	#include <sys/param.h>
	58	#include <sys/systm.h>
	59	#include <sys/mbuf.h>
	60	#include <sys/filio.h>
	61	#include <sys/fcntl.h>
	62	#include <sys/socket.h>
	63	#include <sys/kernel.h>
	64	#include <sys/time.h>
	65	#include <sys/random.h>
	66	#include <sys/mcache.h>
	67
	68	#include <net/if.h>
	69	#include <net/if_types.h>
	70	#include <net/bpf.h>
	71	#include <net/route.h>
	72	#include <net/if_pflog.h>
	73
	74	#include <netinet/in.h>
	75	#include <netinet/in_var.h>
	76	#include <netinet/in_systm.h>
	77	#include <netinet/ip.h>
	78	#include <netinet/ip_var.h>
	79	#include <netinet/tcp.h>
	80	#include <netinet/tcp_seq.h>
	81	#include <netinet/tcp_fsm.h>
	82	#include <netinet/udp.h>
	83	#include <netinet/ip_icmp.h>
	84
	85	#include <netinet/ip6.h>
	86	#include <netinet6/ip6_var.h>
	87
	88	#include <net/pfvar.h>
	89
	90	struct pf_frent {
	91	LIST_ENTRY(pf_frent) fr_next;
	92	struct mbuf *fr_m;
	93	#define fr_ip fr_u.fru_ipv4
	94	#define fr_ip6 fr_u.fru_ipv6
	95	union {
	96	struct ip *fru_ipv4;
	97	struct ip6_hdr *fru_ipv6;
	98	} fr_u;
	99	struct ip6_frag fr_ip6f_opt;
	100	uint16_t fr_ip6f_hlen; /* total header length */
	101	uint16_t fr_ip6f_extoff; /* last extension header offset or 0 */
	102	};
	103
	104	struct pf_frcache {
	105	LIST_ENTRY(pf_frcache) fr_next;
	106	uint16_t fr_off;
	107	uint16_t fr_end;
	108	};
	109
	110	#define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */
	111	#define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */
	112	#define PFFRAG_DROP 0x0004 /* Drop all fragments */
	113	#define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER))
	114
	115	struct pf_fragment {
	116	RB_ENTRY(pf_fragment) fr_entry;
	117	TAILQ_ENTRY(pf_fragment) frag_next;
	118	struct pf_addr fr_srcx;
	119	struct pf_addr fr_dstx;
	120	u_int8_t fr_p; /* protocol of this fragment */
	121	u_int8_t fr_flags; /* status flags */
	122	u_int16_t fr_max; /* fragment data max */
	123	#define fr_id fr_uid.fru_id4
	124	#define fr_id6 fr_uid.fru_id6
	125	union {
	126	u_int16_t fru_id4;
	127	u_int32_t fru_id6;
	128	} fr_uid;
	129	int fr_af;
	130	u_int32_t fr_timeout;
	131	#define fr_queue fr_u.fru_queue
	132	#define fr_cache fr_u.fru_cache
	133	union {
	134	LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */
	135	LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */
	136	} fr_u;
	137	uint32_t fr_csum_flags; /* checksum flags */
	138	uint32_t fr_csum; /* partial checksum value */
	139	uint16_t fr_ip6_maxlen; /* maximum length of a single fragment in IPv6 */
	140	};
	141
	142	static TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue;
	143	static TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue;
	144
	145	static __inline int pf_frag_compare(struct pf_fragment *,
	146	struct pf_fragment *);
	147	static RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree;
	148	RB_PROTOTYPE_SC(__private_extern__, pf_frag_tree, pf_fragment, fr_entry,
	149	pf_frag_compare);
	150	RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
	151
	152	/* Private prototypes */
	153	static void pf_ip6hdr2key(struct pf_fragment , struct ip6_hdr ,
	154	struct ip6_frag *);
	155	static void pf_ip2key(struct pf_fragment , struct ip );
	156	static void pf_remove_fragment(struct pf_fragment *);
	157	static void pf_flush_fragments(void);
	158	static void pf_free_fragment(struct pf_fragment *);
	159	static struct pf_fragment pf_find_fragment_by_key(struct pf_fragment ,
	160	struct pf_frag_tree *);
	161	static __inline struct pf_fragment *
	162	pf_find_fragment_by_ipv4_header(struct ip , struct pf_frag_tree );
	163	static struct mbuf pf_reassemble(struct mbuf , struct pf_fragment **,
	164	struct pf_frent *, int);
	165	static struct mbuf pf_fragcache(struct mbuf , struct ip ,
	166	struct pf_fragment *, int, int, int );
	167	static int pf_normalize_tcpopt(struct pf_rule , int, struct pfi_kif ,
	168	struct pf_pdesc , pbuf_t , struct tcphdr , int, int );
	169	static __inline struct pf_fragment *
	170	pf_find_fragment_by_ipv6_header(struct ip6_hdr , struct ip6_frag ,
	171	struct pf_frag_tree *);
	172	static struct mbuf pf_reassemble6(struct mbuf , struct pf_fragment *,
	173	struct pf_frent *, int);
	174	static struct mbuf pf_frag6cache(struct mbuf , struct ip6_hdr,
	175	struct ip6_frag , struct pf_fragment , int, int, int, int );
	176
	177	#define DPFPRINTF(x) do { \
	178	if (pf_status.debug >= PF_DEBUG_MISC) { \
	179	printf("%s: ", __func__); \
	180	printf x ; \
	181	} \
	182	} while (0)
	183
	184	/* Globals */
	185	struct pool pf_frent_pl, pf_frag_pl;
	186	static struct pool pf_cache_pl, pf_cent_pl;
	187	struct pool pf_state_scrub_pl;
	188
	189	static int pf_nfrents, pf_ncache;
	190
	191	void
	192	pf_normalize_init(void)
	193	{
	194	pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent",
	195	NULL);
	196	pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag",
	197	NULL);
	198	pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0,
	199	"pffrcache", NULL);
	200	pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent",
	201	NULL);
	202	pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0,
	203	"pfstscr", NULL);
	204
	205	pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT);
	206	pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0);
	207	pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0);
	208	pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0);
	209
	210	TAILQ_INIT(&pf_fragqueue);
	211	TAILQ_INIT(&pf_cachequeue);
	212	}
	213
	214	#if 0
	215	void
	216	pf_normalize_destroy(void)
	217	{
	218	pool_destroy(&pf_state_scrub_pl);
	219	pool_destroy(&pf_cent_pl);
	220	pool_destroy(&pf_cache_pl);
	221	pool_destroy(&pf_frag_pl);
	222	pool_destroy(&pf_frent_pl);
	223	}
	224	#endif
	225
	226	int
	227	pf_normalize_isempty(void)
	228	{
	229	return TAILQ_EMPTY(&pf_fragqueue) && TAILQ_EMPTY(&pf_cachequeue);
	230	}
	231
	232	static __inline int
	233	pf_frag_compare(struct pf_fragment a, struct pf_fragment b)
	234	{
	235	int diff;
	236
	237	if ((diff = a->fr_af - b->fr_af)) {
	238	return diff;
	239	} else if ((diff = a->fr_p - b->fr_p)) {
	240	return diff;
	241	} else {
	242	struct pf_addr *sa = &a->fr_srcx;
	243	struct pf_addr *sb = &b->fr_srcx;
	244	struct pf_addr *da = &a->fr_dstx;
	245	struct pf_addr *db = &b->fr_dstx;
	246
	247	switch (a->fr_af) {
	248	#ifdef INET
	249	case AF_INET:
	250	if ((diff = a->fr_id - b->fr_id)) {
	251	return diff;
	252	} else if (sa->v4addr.s_addr < sb->v4addr.s_addr) {
	253	return -1;
	254	} else if (sa->v4addr.s_addr > sb->v4addr.s_addr) {
	255	return 1;
	256	} else if (da->v4addr.s_addr < db->v4addr.s_addr) {
	257	return -1;
	258	} else if (da->v4addr.s_addr > db->v4addr.s_addr) {
	259	return 1;
	260	}
	261	break;
	262	#endif
	263	case AF_INET6:
	264	if ((diff = a->fr_id6 - b->fr_id6)) {
	265	return diff;
	266	} else if (sa->addr32[3] < sb->addr32[3]) {
	267	return -1;
	268	} else if (sa->addr32[3] > sb->addr32[3]) {
	269	return 1;
	270	} else if (sa->addr32[2] < sb->addr32[2]) {
	271	return -1;
	272	} else if (sa->addr32[2] > sb->addr32[2]) {
	273	return 1;
	274	} else if (sa->addr32[1] < sb->addr32[1]) {
	275	return -1;
	276	} else if (sa->addr32[1] > sb->addr32[1]) {
	277	return 1;
	278	} else if (sa->addr32[0] < sb->addr32[0]) {
	279	return -1;
	280	} else if (sa->addr32[0] > sb->addr32[0]) {
	281	return 1;
	282	} else if (da->addr32[3] < db->addr32[3]) {
	283	return -1;
	284	} else if (da->addr32[3] > db->addr32[3]) {
	285	return 1;
	286	} else if (da->addr32[2] < db->addr32[2]) {
	287	return -1;
	288	} else if (da->addr32[2] > db->addr32[2]) {
	289	return 1;
	290	} else if (da->addr32[1] < db->addr32[1]) {
	291	return -1;
	292	} else if (da->addr32[1] > db->addr32[1]) {
	293	return 1;
	294	} else if (da->addr32[0] < db->addr32[0]) {
	295	return -1;
	296	} else if (da->addr32[0] > db->addr32[0]) {
	297	return 1;
	298	}
	299	break;
	300	default:
	301	VERIFY(!0 && "only IPv4 and IPv6 supported!");
	302	break;
	303	}
	304	}
	305	return 0;
	306	}
	307
	308	void
	309	pf_purge_expired_fragments(void)
	310	{
	311	struct pf_fragment *frag;
	312	u_int32_t expire = pf_time_second() -
	313	pf_default_rule.timeout[PFTM_FRAG];
	314
	315	while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
	316	VERIFY(BUFFER_FRAGMENTS(frag));
	317	if (frag->fr_timeout > expire) {
	318	break;
	319	}
	320
	321	switch (frag->fr_af) {
	322	case AF_INET:
	323	DPFPRINTF(("expiring IPv4 %d(0x%llx) from queue.\n",
	324	ntohs(frag->fr_id),
	325	(uint64_t)VM_KERNEL_ADDRPERM(frag)));
	326	break;
	327	case AF_INET6:
	328	DPFPRINTF(("expiring IPv6 %d(0x%llx) from queue.\n",
	329	ntohl(frag->fr_id6),
	330	(uint64_t)VM_KERNEL_ADDRPERM(frag)));
	331	break;
	332	default:
	333	VERIFY(0 && "only IPv4 and IPv6 supported");
	334	break;
	335	}
	336	pf_free_fragment(frag);
	337	}
	338
	339	while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) {
	340	VERIFY(!BUFFER_FRAGMENTS(frag));
	341	if (frag->fr_timeout > expire) {
	342	break;
	343	}
	344
	345	switch (frag->fr_af) {
	346	case AF_INET:
	347	DPFPRINTF(("expiring IPv4 %d(0x%llx) from cache.\n",
	348	ntohs(frag->fr_id),
	349	(uint64_t)VM_KERNEL_ADDRPERM(frag)));
	350	break;
	351	case AF_INET6:
	352	DPFPRINTF(("expiring IPv6 %d(0x%llx) from cache.\n",
	353	ntohl(frag->fr_id6),
	354	(uint64_t)VM_KERNEL_ADDRPERM(frag)));
	355	break;
	356	default:
	357	VERIFY(0 && "only IPv4 and IPv6 supported");
	358	break;
	359	}
	360	pf_free_fragment(frag);
	361	VERIFY(TAILQ_EMPTY(&pf_cachequeue) \|\|
	362	TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag);
	363	}
	364	}
	365
	366	/*
	367	* Try to flush old fragments to make space for new ones
	368	*/
	369
	370	static void
	371	pf_flush_fragments(void)
	372	{
	373	struct pf_fragment *frag;
	374	int goal;
	375
	376	goal = pf_nfrents * 9 / 10;
	377	DPFPRINTF(("trying to free > %d frents\n",
	378	pf_nfrents - goal));
	379	while (goal < pf_nfrents) {
	380	frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue);
	381	if (frag == NULL) {
	382	break;
	383	}
	384	pf_free_fragment(frag);
	385	}
	386
	387
	388	goal = pf_ncache * 9 / 10;
	389	DPFPRINTF(("trying to free > %d cache entries\n",
	390	pf_ncache - goal));
	391	while (goal < pf_ncache) {
	392	frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue);
	393	if (frag == NULL) {
	394	break;
	395	}
	396	pf_free_fragment(frag);
	397	}
	398	}
	399
	400	/* Frees the fragments and all associated entries */
	401
	402	static void
	403	pf_free_fragment(struct pf_fragment *frag)
	404	{
	405	struct pf_frent *frent;
	406	struct pf_frcache *frcache;
	407
	408	/* Free all fragments */
	409	if (BUFFER_FRAGMENTS(frag)) {
	410	for (frent = LIST_FIRST(&frag->fr_queue); frent;
	411	frent = LIST_FIRST(&frag->fr_queue)) {
	412	LIST_REMOVE(frent, fr_next);
	413
	414	m_freem(frent->fr_m);
	415	pool_put(&pf_frent_pl, frent);
	416	pf_nfrents--;
	417	}
	418	} else {
	419	for (frcache = LIST_FIRST(&frag->fr_cache); frcache;
	420	frcache = LIST_FIRST(&frag->fr_cache)) {
	421	LIST_REMOVE(frcache, fr_next);
	422
	423	VERIFY(LIST_EMPTY(&frag->fr_cache) \|\|
	424	LIST_FIRST(&frag->fr_cache)->fr_off >
	425	frcache->fr_end);
	426
	427	pool_put(&pf_cent_pl, frcache);
	428	pf_ncache--;
	429	}
	430	}
	431
	432	pf_remove_fragment(frag);
	433	}
	434
	435	static void
	436	pf_ip6hdr2key(struct pf_fragment key, struct ip6_hdr ip6,
	437	struct ip6_frag *fh)
	438	{
	439	key->fr_p = fh->ip6f_nxt;
	440	key->fr_id6 = fh->ip6f_ident;
	441	key->fr_af = AF_INET6;
	442	key->fr_srcx.v6addr = ip6->ip6_src;
	443	key->fr_dstx.v6addr = ip6->ip6_dst;
	444	}
	445
	446	static void
	447	pf_ip2key(struct pf_fragment key, struct ip ip)
	448	{
	449	key->fr_p = ip->ip_p;
	450	key->fr_id = ip->ip_id;
	451	key->fr_af = AF_INET;
	452	key->fr_srcx.v4addr.s_addr = ip->ip_src.s_addr;
	453	key->fr_dstx.v4addr.s_addr = ip->ip_dst.s_addr;
	454	}
	455
	456	static struct pf_fragment *
	457	pf_find_fragment_by_key(struct pf_fragment key, struct pf_frag_tree tree)
	458	{
	459	struct pf_fragment *frag;
	460
	461	frag = RB_FIND(pf_frag_tree, tree, key);
	462	if (frag != NULL) {
	463	/* XXX Are we sure we want to update the timeout? */
	464	frag->fr_timeout = pf_time_second();
	465	if (BUFFER_FRAGMENTS(frag)) {
	466	TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
	467	TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
	468	} else {
	469	TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
	470	TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next);
	471	}
	472	}
	473
	474	return frag;
	475	}
	476
	477	static __inline struct pf_fragment *
	478	pf_find_fragment_by_ipv4_header(struct ip ip, struct pf_frag_tree tree)
	479	{
	480	struct pf_fragment key;
	481	pf_ip2key(&key, ip);
	482	return pf_find_fragment_by_key(&key, tree);
	483	}
	484
	485	/* Removes a fragment from the fragment queue and frees the fragment */
	486	static void
	487	pf_remove_fragment(struct pf_fragment *frag)
	488	{
	489	if (BUFFER_FRAGMENTS(frag)) {
	490	RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag);
	491	TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
	492	pool_put(&pf_frag_pl, frag);
	493	} else {
	494	RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag);
	495	TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
	496	pool_put(&pf_cache_pl, frag);
	497	}
	498	}
	499
	500	#define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
	501	static struct mbuf *
	502	pf_reassemble(struct mbuf m0, struct pf_fragment *frag,
	503	struct pf_frent *frent, int mff)
	504	{
	505	struct mbuf m = m0, m2;
	506	struct pf_frent frea, next;
	507	struct pf_frent *frep = NULL;
	508	struct ip *ip = frent->fr_ip;
	509	uint32_t hlen = ip->ip_hl << 2;
	510	u_int16_t off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
	511	u_int16_t ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4;
	512	u_int16_t fr_max = ip_len + off;
	513	uint32_t csum, csum_flags;
	514
	515	VERIFY(frag == NULL \|\| BUFFER_FRAGMENTS(frag));
	516
	517	/*
	518	* Leverage partial checksum offload for IP fragments. Narrow down
	519	* the scope to cover only UDP without IP options, as that is the
	520	* most common case.
	521	*
	522	* Perform 1's complement adjustment of octets that got included/
	523	* excluded in the hardware-calculated checksum value. Ignore cases
	524	* where the value includes the entire IPv4 header span, as the sum
	525	* for those octets would already be 0 by the time we get here; IP
	526	* has already performed its header checksum validation. Also take
	527	* care of any trailing bytes and subtract out their partial sum.
	528	*/
	529	if (ip->ip_p == IPPROTO_UDP && hlen == sizeof(struct ip) &&
	530	(m->m_pkthdr.csum_flags &
	531	(CSUM_DATA_VALID \| CSUM_PARTIAL \| CSUM_PSEUDO_HDR)) ==
	532	(CSUM_DATA_VALID \| CSUM_PARTIAL)) {
	533	uint32_t start = m->m_pkthdr.csum_rx_start;
	534	int32_t trailer = (m_pktlen(m) - ntohs(ip->ip_len));
	535	uint32_t swbytes = (uint32_t)trailer;
	536
	537	csum = m->m_pkthdr.csum_rx_val;
	538
	539	ASSERT(trailer >= 0);
	540	if ((start != 0 && start != hlen) \|\| trailer != 0) {
	541	#if BYTE_ORDER != BIG_ENDIAN
	542	if (start < hlen) {
	543	HTONS(ip->ip_len);
	544	HTONS(ip->ip_off);
	545	}
	546	#endif /* BYTE_ORDER != BIG_ENDIAN */
	547	/* callee folds in sum */
	548	csum = m_adj_sum16(m, start, hlen,
	549	(ip->ip_len - hlen), csum);
	550	if (hlen > start) {
	551	swbytes += (hlen - start);
	552	} else {
	553	swbytes += (start - hlen);
	554	}
	555	#if BYTE_ORDER != BIG_ENDIAN
	556	if (start < hlen) {
	557	NTOHS(ip->ip_off);
	558	NTOHS(ip->ip_len);
	559	}
	560	#endif /* BYTE_ORDER != BIG_ENDIAN */
	561	}
	562	csum_flags = m->m_pkthdr.csum_flags;
	563
	564	if (swbytes != 0) {
	565	udp_in_cksum_stats(swbytes);
	566	}
	567	if (trailer != 0) {
	568	m_adj(m, -trailer);
	569	}
	570	} else {
	571	csum = 0;
	572	csum_flags = 0;
	573	}
	574
	575	/* Invalidate checksum */
	576	m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
	577
	578	/* Strip off ip header */
	579	m->m_data += hlen;
	580	m->m_len -= hlen;
	581
	582	/* Create a new reassembly queue for this packet */
	583	if (*frag == NULL) {
	584	*frag = pool_get(&pf_frag_pl, PR_NOWAIT);
	585	if (*frag == NULL) {
	586	pf_flush_fragments();
	587	*frag = pool_get(&pf_frag_pl, PR_NOWAIT);
	588	if (*frag == NULL) {
	589	goto drop_fragment;
	590	}
	591	}
	592
	593	(*frag)->fr_flags = 0;
	594	(*frag)->fr_max = 0;
	595	(*frag)->fr_af = AF_INET;
	596	(*frag)->fr_srcx.v4addr = frent->fr_ip->ip_src;
	597	(*frag)->fr_dstx.v4addr = frent->fr_ip->ip_dst;
	598	(*frag)->fr_p = frent->fr_ip->ip_p;
	599	(*frag)->fr_id = frent->fr_ip->ip_id;
	600	(*frag)->fr_timeout = pf_time_second();
	601	if (csum_flags != 0) {
	602	(*frag)->fr_csum_flags = csum_flags;
	603	(*frag)->fr_csum = csum;
	604	}
	605	LIST_INIT(&(*frag)->fr_queue);
	606
	607	RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
	608	TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
	609
	610	/* We do not have a previous fragment */
	611	frep = NULL;
	612	goto insert;
	613	}
	614
	615	/*
	616	* If this fragment contains similar checksum offload info
	617	* as that of the existing ones, accumulate checksum. Otherwise,
	618	* invalidate checksum offload info for the entire datagram.
	619	*/
	620	if (csum_flags != 0 && csum_flags == (*frag)->fr_csum_flags) {
	621	(*frag)->fr_csum += csum;
	622	} else if ((*frag)->fr_csum_flags != 0) {
	623	(*frag)->fr_csum_flags = 0;
	624	}
	625
	626	/*
	627	* Find a fragment after the current one:
	628	* - off contains the real shifted offset.
	629	*/
	630	LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
	631	if (FR_IP_OFF(frea) > off) {
	632	break;
	633	}
	634	frep = frea;
	635	}
	636
	637	VERIFY(frep != NULL \|\| frea != NULL);
	638
	639	if (frep != NULL &&
	640	FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl *
	641	4 > off) {
	642	u_int16_t precut;
	643
	644	precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
	645	frep->fr_ip->ip_hl * 4 - off;
	646	if (precut >= ip_len) {
	647	goto drop_fragment;
	648	}
	649	m_adj(frent->fr_m, precut);
	650	DPFPRINTF(("overlap -%d\n", precut));
	651	/* Enforce 8 byte boundaries */
	652	ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3));
	653	off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
	654	ip_len -= precut;
	655	ip->ip_len = htons(ip_len);
	656	}
	657
	658	for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
	659	frea = next) {
	660	u_int16_t aftercut;
	661
	662	aftercut = ip_len + off - FR_IP_OFF(frea);
	663	DPFPRINTF(("adjust overlap %d\n", aftercut));
	664	if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl
	665	* 4) {
	666	frea->fr_ip->ip_len =
	667	htons(ntohs(frea->fr_ip->ip_len) - aftercut);
	668	frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) +
	669	(aftercut >> 3));
	670	m_adj(frea->fr_m, aftercut);
	671	break;
	672	}
	673
	674	/* This fragment is completely overlapped, lose it */
	675	next = LIST_NEXT(frea, fr_next);
	676	m_freem(frea->fr_m);
	677	LIST_REMOVE(frea, fr_next);
	678	pool_put(&pf_frent_pl, frea);
	679	pf_nfrents--;
	680	}
	681
	682	insert:
	683	/* Update maximum data size */
	684	if ((*frag)->fr_max < fr_max) {
	685	(*frag)->fr_max = fr_max;
	686	}
	687	/* This is the last segment */
	688	if (!mff) {
	689	(*frag)->fr_flags \|= PFFRAG_SEENLAST;
	690	}
	691
	692	if (frep == NULL) {
	693	LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
	694	} else {
	695	LIST_INSERT_AFTER(frep, frent, fr_next);
	696	}
	697
	698	/* Check if we are completely reassembled */
	699	if (!((*frag)->fr_flags & PFFRAG_SEENLAST)) {
	700	return NULL;
	701	}
	702
	703	/* Check if we have all the data */
	704	off = 0;
	705	for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
	706	next = LIST_NEXT(frep, fr_next);
	707
	708	off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4;
	709	if (off < (*frag)->fr_max &&
	710	(next == NULL \|\| FR_IP_OFF(next) != off)) {
	711	DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
	712	off, next == NULL ? -1 : FR_IP_OFF(next),
	713	(*frag)->fr_max));
	714	return NULL;
	715	}
	716	}
	717	DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
	718	if (off < (*frag)->fr_max) {
	719	return NULL;
	720	}
	721
	722	/* We have all the data */
	723	frent = LIST_FIRST(&(*frag)->fr_queue);
	724	VERIFY(frent != NULL);
	725	if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) {
	726	DPFPRINTF(("drop: too big: %d\n", off));
	727	pf_free_fragment(*frag);
	728	*frag = NULL;
	729	return NULL;
	730	}
	731	next = LIST_NEXT(frent, fr_next);
	732
	733	/* Magic from ip_input */
	734	ip = frent->fr_ip;
	735	m = frent->fr_m;
	736	m2 = m->m_next;
	737	m->m_next = NULL;
	738	m_cat(m, m2);
	739	pool_put(&pf_frent_pl, frent);
	740	pf_nfrents--;
	741	for (frent = next; frent != NULL; frent = next) {
	742	next = LIST_NEXT(frent, fr_next);
	743
	744	m2 = frent->fr_m;
	745	pool_put(&pf_frent_pl, frent);
	746	pf_nfrents--;
	747	m_cat(m, m2);
	748	}
	749
	750	ip->ip_src = (*frag)->fr_srcx.v4addr;
	751	ip->ip_dst = (*frag)->fr_dstx.v4addr;
	752
	753	if ((*frag)->fr_csum_flags != 0) {
	754	csum = (*frag)->fr_csum;
	755
	756	ADDCARRY(csum);
	757
	758	m->m_pkthdr.csum_rx_val = csum;
	759	m->m_pkthdr.csum_rx_start = sizeof(struct ip);
	760	m->m_pkthdr.csum_flags = (*frag)->fr_csum_flags;
	761	} else if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) \|\|
	762	(m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
	763	/* loopback checksums are always OK */
	764	m->m_pkthdr.csum_data = 0xffff;
	765	m->m_pkthdr.csum_flags =
	766	CSUM_DATA_VALID \| CSUM_PSEUDO_HDR \|
	767	CSUM_IP_CHECKED \| CSUM_IP_VALID;
	768	}
	769
	770	/* Remove from fragment queue */
	771	pf_remove_fragment(*frag);
	772	*frag = NULL;
	773
	774	hlen = ip->ip_hl << 2;
	775	ip->ip_len = htons(off + hlen);
	776	m->m_len += hlen;
	777	m->m_data -= hlen;
	778
	779	/* some debugging cruft by sklower, below, will go away soon */
	780	/* XXX this should be done elsewhere */
	781	if (m->m_flags & M_PKTHDR) {
	782	int plen = 0;
	783	for (m2 = m; m2; m2 = m2->m_next) {
	784	plen += m2->m_len;
	785	}
	786	m->m_pkthdr.len = plen;
	787	}
	788
	789	DPFPRINTF(("complete: 0x%llx(%d)\n",
	790	(uint64_t)VM_KERNEL_ADDRPERM(m), ntohs(ip->ip_len)));
	791	return m;
	792
	793	drop_fragment:
	794	/* Oops - fail safe - drop packet */
	795	pool_put(&pf_frent_pl, frent);
	796	pf_nfrents--;
	797	m_freem(m);
	798	return NULL;
	799	}
	800
	801	static struct mbuf *
	802	pf_fragcache(struct mbuf *m0, struct ip h, struct pf_fragment **frag, int mff,
	803	int drop, int *nomem)
	804	{
	805	struct mbuf m = m0;
	806	struct pf_frcache frp, fra, *cur = NULL;
	807	int ip_len = ntohs(h->ip_len) - (h->ip_hl << 2);
	808	u_int16_t off = ntohs(h->ip_off) << 3;
	809	u_int16_t fr_max = ip_len + off;
	810	int hosed = 0;
	811
	812	VERIFY(frag == NULL \|\| !BUFFER_FRAGMENTS(frag));
	813
	814	/* Create a new range queue for this packet */
	815	if (*frag == NULL) {
	816	*frag = pool_get(&pf_cache_pl, PR_NOWAIT);
	817	if (*frag == NULL) {
	818	pf_flush_fragments();
	819	*frag = pool_get(&pf_cache_pl, PR_NOWAIT);
	820	if (*frag == NULL) {
	821	goto no_mem;
	822	}
	823	}
	824
	825	/* Get an entry for the queue */
	826	cur = pool_get(&pf_cent_pl, PR_NOWAIT);
	827	if (cur == NULL) {
	828	pool_put(&pf_cache_pl, *frag);
	829	*frag = NULL;
	830	goto no_mem;
	831	}
	832	pf_ncache++;
	833
	834	(*frag)->fr_flags = PFFRAG_NOBUFFER;
	835	(*frag)->fr_max = 0;
	836	(*frag)->fr_af = AF_INET;
	837	(*frag)->fr_srcx.v4addr = h->ip_src;
	838	(*frag)->fr_dstx.v4addr = h->ip_dst;
	839	(*frag)->fr_p = h->ip_p;
	840	(*frag)->fr_id = h->ip_id;
	841	(*frag)->fr_timeout = pf_time_second();
	842
	843	cur->fr_off = off;
	844	cur->fr_end = fr_max;
	845	LIST_INIT(&(*frag)->fr_cache);
	846	LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
	847
	848	RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
	849	TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
	850
	851	DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off,
	852	fr_max));
	853
	854	goto pass;
	855	}
	856
	857	/*
	858	* Find a fragment after the current one:
	859	* - off contains the real shifted offset.
	860	*/
	861	frp = NULL;
	862	LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
	863	if (fra->fr_off > off) {
	864	break;
	865	}
	866	frp = fra;
	867	}
	868
	869	VERIFY(frp != NULL \|\| fra != NULL);
	870
	871	if (frp != NULL) {
	872	int precut;
	873
	874	precut = frp->fr_end - off;
	875	if (precut >= ip_len) {
	876	/* Fragment is entirely a duplicate */
	877	DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
	878	h->ip_id, frp->fr_off, frp->fr_end, off, fr_max));
	879	goto drop_fragment;
	880	}
	881	if (precut == 0) {
	882	/* They are adjacent. Fixup cache entry */
	883	DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
	884	h->ip_id, frp->fr_off, frp->fr_end, off, fr_max));
	885	frp->fr_end = fr_max;
	886	} else if (precut > 0) {
	887	/*
	888	* The first part of this payload overlaps with a
	889	* fragment that has already been passed.
	890	* Need to trim off the first part of the payload.
	891	* But to do so easily, we need to create another
	892	* mbuf to throw the original header into.
	893	*/
	894
	895	DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
	896	h->ip_id, precut, frp->fr_off, frp->fr_end, off,
	897	fr_max));
	898
	899	off += precut;
	900	fr_max -= precut;
	901	/* Update the previous frag to encompass this one */
	902	frp->fr_end = fr_max;
	903
	904	if (!drop) {
	905	/*
	906	* XXX Optimization opportunity
	907	* This is a very heavy way to trim the payload.
	908	* we could do it much faster by diddling mbuf
	909	* internals but that would be even less legible
	910	* than this mbuf magic. For my next trick,
	911	* I'll pull a rabbit out of my laptop.
	912	*/
	913	*m0 = m_copym(m, 0, h->ip_hl << 2, M_NOWAIT);
	914	if (*m0 == NULL) {
	915	goto no_mem;
	916	}
	917	VERIFY((*m0)->m_next == NULL);
	918	m_adj(m, precut + (h->ip_hl << 2));
	919	m_cat(*m0, m);
	920	m = *m0;
	921	if (m->m_flags & M_PKTHDR) {
	922	int plen = 0;
	923	struct mbuf *t;
	924	for (t = m; t; t = t->m_next) {
	925	plen += t->m_len;
	926	}
	927	m->m_pkthdr.len = plen;
	928	}
	929
	930
	931	h = mtod(m, struct ip *);
	932
	933
	934	VERIFY((int)m->m_len ==
	935	ntohs(h->ip_len) - precut);
	936	h->ip_off = htons(ntohs(h->ip_off) +
	937	(precut >> 3));
	938	h->ip_len = htons(ntohs(h->ip_len) - precut);
	939	} else {
	940	hosed++;
	941	}
	942	} else {
	943	/* There is a gap between fragments */
	944
	945	DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
	946	h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
	947	fr_max));
	948
	949	cur = pool_get(&pf_cent_pl, PR_NOWAIT);
	950	if (cur == NULL) {
	951	goto no_mem;
	952	}
	953	pf_ncache++;
	954
	955	cur->fr_off = off;
	956	cur->fr_end = fr_max;
	957	LIST_INSERT_AFTER(frp, cur, fr_next);
	958	}
	959	}
	960
	961	if (fra != NULL) {
	962	int aftercut;
	963	int merge = 0;
	964
	965	aftercut = fr_max - fra->fr_off;
	966	if (aftercut == 0) {
	967	/* Adjacent fragments */
	968	DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
	969	h->ip_id, off, fr_max, fra->fr_off, fra->fr_end));
	970	fra->fr_off = off;
	971	merge = 1;
	972	} else if (aftercut > 0) {
	973	/* Need to chop off the tail of this fragment */
	974	DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
	975	h->ip_id, aftercut, off, fr_max, fra->fr_off,
	976	fra->fr_end));
	977	fra->fr_off = off;
	978	fr_max -= aftercut;
	979
	980	merge = 1;
	981
	982	if (!drop) {
	983	m_adj(m, -aftercut);
	984	if (m->m_flags & M_PKTHDR) {
	985	int plen = 0;
	986	struct mbuf *t;
	987	for (t = m; t; t = t->m_next) {
	988	plen += t->m_len;
	989	}
	990	m->m_pkthdr.len = plen;
	991	}
	992	h = mtod(m, struct ip *);
	993	VERIFY((int)m->m_len ==
	994	ntohs(h->ip_len) - aftercut);
	995	h->ip_len = htons(ntohs(h->ip_len) - aftercut);
	996	} else {
	997	hosed++;
	998	}
	999	} else if (frp == NULL) {
	1000	/* There is a gap between fragments */
	1001	DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
	1002	h->ip_id, -aftercut, off, fr_max, fra->fr_off,
	1003	fra->fr_end));
	1004
	1005	cur = pool_get(&pf_cent_pl, PR_NOWAIT);
	1006	if (cur == NULL) {
	1007	goto no_mem;
	1008	}
	1009	pf_ncache++;
	1010
	1011	cur->fr_off = off;
	1012	cur->fr_end = fr_max;
	1013	LIST_INSERT_BEFORE(fra, cur, fr_next);
	1014	}
	1015
	1016
	1017	/* Need to glue together two separate fragment descriptors */
	1018	if (merge) {
	1019	if (cur && fra->fr_off <= cur->fr_end) {
	1020	/* Need to merge in a previous 'cur' */
	1021	DPFPRINTF(("fragcache[%d]: adjacent(merge "
	1022	"%d-%d) %d-%d (%d-%d)\n",
	1023	h->ip_id, cur->fr_off, cur->fr_end, off,
	1024	fr_max, fra->fr_off, fra->fr_end));
	1025	fra->fr_off = cur->fr_off;
	1026	LIST_REMOVE(cur, fr_next);
	1027	pool_put(&pf_cent_pl, cur);
	1028	pf_ncache--;
	1029	cur = NULL;
	1030	} else if (frp && fra->fr_off <= frp->fr_end) {
	1031	/* Need to merge in a modified 'frp' */
	1032	VERIFY(cur == NULL);
	1033	DPFPRINTF(("fragcache[%d]: adjacent(merge "
	1034	"%d-%d) %d-%d (%d-%d)\n",
	1035	h->ip_id, frp->fr_off, frp->fr_end, off,
	1036	fr_max, fra->fr_off, fra->fr_end));
	1037	fra->fr_off = frp->fr_off;
	1038	LIST_REMOVE(frp, fr_next);
	1039	pool_put(&pf_cent_pl, frp);
	1040	pf_ncache--;
	1041	frp = NULL;
	1042	}
	1043	}
	1044	}
	1045
	1046	if (hosed) {
	1047	/*
	1048	* We must keep tracking the overall fragment even when
	1049	* we're going to drop it anyway so that we know when to
	1050	* free the overall descriptor. Thus we drop the frag late.
	1051	*/
	1052	goto drop_fragment;
	1053	}
	1054
	1055
	1056	pass:
	1057	/* Update maximum data size */
	1058	if ((*frag)->fr_max < fr_max) {
	1059	(*frag)->fr_max = fr_max;
	1060	}
	1061
	1062	/* This is the last segment */
	1063	if (!mff) {
	1064	(*frag)->fr_flags \|= PFFRAG_SEENLAST;
	1065	}
	1066
	1067	/* Check if we are completely reassembled */
	1068	if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
	1069	LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
	1070	LIST_FIRST(&(frag)->fr_cache)->fr_end == (frag)->fr_max) {
	1071	/* Remove from fragment queue */
	1072	DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
	1073	(*frag)->fr_max));
	1074	pf_free_fragment(*frag);
	1075	*frag = NULL;
	1076	}
	1077
	1078	return m;
	1079
	1080	no_mem:
	1081	*nomem = 1;
	1082
	1083	/* Still need to pay attention to !IP_MF */
	1084	if (!mff && *frag != NULL) {
	1085	(*frag)->fr_flags \|= PFFRAG_SEENLAST;
	1086	}
	1087
	1088	m_freem(m);
	1089	return NULL;
	1090
	1091	drop_fragment:
	1092
	1093	/* Still need to pay attention to !IP_MF */
	1094	if (!mff && *frag != NULL) {
	1095	(*frag)->fr_flags \|= PFFRAG_SEENLAST;
	1096	}
	1097
	1098	if (drop) {
	1099	/* This fragment has been deemed bad. Don't reass */
	1100	if (((*frag)->fr_flags & PFFRAG_DROP) == 0) {
	1101	DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
	1102	h->ip_id));
	1103	}
	1104	(*frag)->fr_flags \|= PFFRAG_DROP;
	1105	}
	1106
	1107	m_freem(m);
	1108	return NULL;
	1109	}
	1110
	1111	#define FR_IP6_OFF(fr) \
	1112	(ntohs((fr)->fr_ip6f_opt.ip6f_offlg & IP6F_OFF_MASK))
	1113	#define FR_IP6_PLEN(fr) (ntohs((fr)->fr_ip6->ip6_plen))
	1114	struct mbuf *
	1115	pf_reassemble6(struct mbuf m0, struct pf_fragment frag,
	1116	struct pf_frent *frent, int mff)
	1117	{
	1118	struct mbuf m, m2;
	1119	struct pf_frent frea, frep, *next;
	1120	struct ip6_hdr *ip6;
	1121	struct ip6_frag *ip6f;
	1122	int plen, off, fr_max, pktlen;
	1123	uint32_t uoff, csum, csum_flags;
	1124
	1125	VERIFY(frag == NULL \|\| BUFFER_FRAGMENTS(frag));
	1126	m = *m0;
	1127	frep = NULL;
	1128	ip6 = frent->fr_ip6;
	1129	ip6f = &frent->fr_ip6f_opt;
	1130	off = FR_IP6_OFF(frent);
	1131	uoff = frent->fr_ip6f_hlen;
	1132	plen = FR_IP6_PLEN(frent);
	1133	fr_max = off + plen - (frent->fr_ip6f_hlen - sizeof(*ip6));
	1134	pktlen = plen + sizeof(*ip6);
	1135
	1136	DPFPRINTF(("0x%llx IPv6 frag plen %u off %u fr_ip6f_hlen %u "
	1137	"fr_max %u m_len %u\n", (uint64_t)VM_KERNEL_ADDRPERM(m), plen, off,
	1138	frent->fr_ip6f_hlen, fr_max, m->m_len));
	1139
	1140	/*
	1141	* Leverage partial checksum offload for simple UDP/IP fragments,
	1142	* as that is the most common case.
	1143	*
	1144	* Perform 1's complement adjustment of octets that got included/
	1145	* excluded in the hardware-calculated checksum value. Also take
	1146	* care of any trailing bytes and subtract out their partial sum.
	1147	*/
	1148	if (ip6f->ip6f_nxt == IPPROTO_UDP &&
	1149	uoff == (sizeof(ip6) + sizeof(ip6f)) &&
	1150	(m->m_pkthdr.csum_flags &
	1151	(CSUM_DATA_VALID \| CSUM_PARTIAL \| CSUM_PSEUDO_HDR)) ==
	1152	(CSUM_DATA_VALID \| CSUM_PARTIAL)) {
	1153	uint32_t start = m->m_pkthdr.csum_rx_start;
	1154	uint32_t ip_len = (sizeof(*ip6) + ntohs(ip6->ip6_plen));
	1155	int32_t trailer = (m_pktlen(m) - ip_len);
	1156	uint32_t swbytes = (uint32_t)trailer;
	1157
	1158	csum = m->m_pkthdr.csum_rx_val;
	1159
	1160	ASSERT(trailer >= 0);
	1161	if (start != uoff \|\| trailer != 0) {
	1162	uint16_t s = 0, d = 0;
	1163
	1164	if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) {
	1165	s = ip6->ip6_src.s6_addr16[1];
	1166	ip6->ip6_src.s6_addr16[1] = 0;
	1167	}
	1168	if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) {
	1169	d = ip6->ip6_dst.s6_addr16[1];
	1170	ip6->ip6_dst.s6_addr16[1] = 0;
	1171	}
	1172
	1173	/* callee folds in sum */
	1174	csum = m_adj_sum16(m, start, uoff,
	1175	(ip_len - uoff), csum);
	1176	if (uoff > start) {
	1177	swbytes += (uoff - start);
	1178	} else {
	1179	swbytes += (start - uoff);
	1180	}
	1181
	1182	if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) {
	1183	ip6->ip6_src.s6_addr16[1] = s;
	1184	}
	1185	if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) {
	1186	ip6->ip6_dst.s6_addr16[1] = d;
	1187	}
	1188	}
	1189	csum_flags = m->m_pkthdr.csum_flags;
	1190
	1191	if (swbytes != 0) {
	1192	udp_in6_cksum_stats(swbytes);
	1193	}
	1194	if (trailer != 0) {
	1195	m_adj(m, -trailer);
	1196	}
	1197	} else {
	1198	csum = 0;
	1199	csum_flags = 0;
	1200	}
	1201
	1202	/* Invalidate checksum */
	1203	m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
	1204
	1205	/* strip off headers up to the fragment payload */
	1206	m->m_data += frent->fr_ip6f_hlen;
	1207	m->m_len -= frent->fr_ip6f_hlen;
	1208
	1209	/* Create a new reassembly queue for this packet */
	1210	if (*frag == NULL) {
	1211	*frag = pool_get(&pf_frag_pl, PR_NOWAIT);
	1212	if (*frag == NULL) {
	1213	pf_flush_fragments();
	1214	*frag = pool_get(&pf_frag_pl, PR_NOWAIT);
	1215	if (*frag == NULL) {
	1216	goto drop_fragment;
	1217	}
	1218	}
	1219
	1220	(*frag)->fr_flags = 0;
	1221	(*frag)->fr_max = 0;
	1222	(*frag)->fr_ip6_maxlen = pktlen;
	1223	(*frag)->fr_af = AF_INET6;
	1224	(*frag)->fr_srcx.v6addr = frent->fr_ip6->ip6_src;
	1225	(*frag)->fr_dstx.v6addr = frent->fr_ip6->ip6_dst;
	1226	(*frag)->fr_p = frent->fr_ip6f_opt.ip6f_nxt;
	1227	(*frag)->fr_id6 = frent->fr_ip6f_opt.ip6f_ident;
	1228	(*frag)->fr_timeout = pf_time_second();
	1229	if (csum_flags != 0) {
	1230	(*frag)->fr_csum_flags = csum_flags;
	1231	(*frag)->fr_csum = csum;
	1232	}
	1233	LIST_INIT(&(*frag)->fr_queue);
	1234
	1235	RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
	1236	TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
	1237
	1238	/* We do not have a previous fragment */
	1239	frep = NULL;
	1240	goto insert;
	1241	}
	1242
	1243	/* Remember maximum fragment len for refragmentation */
	1244	if (pktlen > (*frag)->fr_ip6_maxlen) {
	1245	(*frag)->fr_ip6_maxlen = pktlen;
	1246	}
	1247	/*
	1248	* If this fragment contains similar checksum offload info
	1249	* as that of the existing ones, accumulate checksum. Otherwise,
	1250	* invalidate checksum offload info for the entire datagram.
	1251	*/
	1252	if (csum_flags != 0 && csum_flags == (*frag)->fr_csum_flags) {
	1253	(*frag)->fr_csum += csum;
	1254	} else if ((*frag)->fr_csum_flags != 0) {
	1255	(*frag)->fr_csum_flags = 0;
	1256	}
	1257
	1258	/*
	1259	* Find a fragment after the current one:
	1260	* - off contains the real shifted offset.
	1261	*/
	1262	LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
	1263	if (FR_IP6_OFF(frea) > off) {
	1264	break;
	1265	}
	1266	frep = frea;
	1267	}
	1268
	1269	VERIFY(frep != NULL \|\| frea != NULL);
	1270
	1271	if (frep != NULL &&
	1272	FR_IP6_OFF(frep) + FR_IP6_PLEN(frep) - frep->fr_ip6f_hlen > off) {
	1273	u_int16_t precut;
	1274
	1275	precut = FR_IP6_OFF(frep) + FR_IP6_PLEN(frep) -
	1276	frep->fr_ip6f_hlen - off;
	1277	if (precut >= plen) {
	1278	goto drop_fragment;
	1279	}
	1280	m_adj(frent->fr_m, precut);
	1281	DPFPRINTF(("overlap -%d\n", precut));
	1282	/* Enforce 8 byte boundaries */
	1283	frent->fr_ip6f_opt.ip6f_offlg =
	1284	htons(ntohs(frent->fr_ip6f_opt.ip6f_offlg) +
	1285	(precut >> 3));
	1286	off = FR_IP6_OFF(frent);
	1287	plen -= precut;
	1288	ip6->ip6_plen = htons(plen);
	1289	}
	1290
	1291	for (; frea != NULL && plen + off > FR_IP6_OFF(frea); frea = next) {
	1292	u_int16_t aftercut;
	1293
	1294	aftercut = plen + off - FR_IP6_OFF(frea);
	1295	DPFPRINTF(("adjust overlap %d\n", aftercut));
	1296	if (aftercut < FR_IP6_PLEN(frea) - frea->fr_ip6f_hlen) {
	1297	frea->fr_ip6->ip6_plen = htons(FR_IP6_PLEN(frea) -
	1298	aftercut);
	1299	frea->fr_ip6f_opt.ip6f_offlg =
	1300	htons(ntohs(frea->fr_ip6f_opt.ip6f_offlg) +
	1301	(aftercut >> 3));
	1302	m_adj(frea->fr_m, aftercut);
	1303	break;
	1304	}
	1305
	1306	/* This fragment is completely overlapped, lose it */
	1307	next = LIST_NEXT(frea, fr_next);
	1308	m_freem(frea->fr_m);
	1309	LIST_REMOVE(frea, fr_next);
	1310	pool_put(&pf_frent_pl, frea);
	1311	pf_nfrents--;
	1312	}
	1313
	1314	insert:
	1315	/* Update maximum data size */
	1316	if ((*frag)->fr_max < fr_max) {
	1317	(*frag)->fr_max = fr_max;
	1318	}
	1319	/* This is the last segment */
	1320	if (!mff) {
	1321	(*frag)->fr_flags \|= PFFRAG_SEENLAST;
	1322	}
	1323
	1324	if (frep == NULL) {
	1325	LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
	1326	} else {
	1327	LIST_INSERT_AFTER(frep, frent, fr_next);
	1328	}
	1329
	1330	/* Check if we are completely reassembled */
	1331	if (!((*frag)->fr_flags & PFFRAG_SEENLAST)) {
	1332	return NULL;
	1333	}
	1334
	1335	/* Check if we have all the data */
	1336	off = 0;
	1337	for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
	1338	next = LIST_NEXT(frep, fr_next);
	1339	off += FR_IP6_PLEN(frep) - (frent->fr_ip6f_hlen - sizeof *ip6);
	1340	DPFPRINTF(("frep at %d, next %d, max %d\n",
	1341	off, next == NULL ? -1 : FR_IP6_OFF(next),
	1342	(*frag)->fr_max));
	1343	if (off < (*frag)->fr_max &&
	1344	(next == NULL \|\| FR_IP6_OFF(next) != off)) {
	1345	DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
	1346	off, next == NULL ? -1 : FR_IP6_OFF(next),
	1347	(*frag)->fr_max));
	1348	return NULL;
	1349	}
	1350	}
	1351	DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
	1352	if (off < (*frag)->fr_max) {
	1353	return NULL;
	1354	}
	1355
	1356	/* We have all the data */
	1357	frent = LIST_FIRST(&(*frag)->fr_queue);
	1358	VERIFY(frent != NULL);
	1359	if (frent->fr_ip6f_hlen + off > IP_MAXPACKET) {
	1360	DPFPRINTF(("drop: too big: %d\n", off));
	1361	pf_free_fragment(*frag);
	1362	*frag = NULL;
	1363	return NULL;
	1364	}
	1365
	1366	ASSERT(*frag != NULL);
	1367	ASSERT(frent != NULL);
	1368	next = LIST_NEXT(frent, fr_next);
	1369	if (next == NULL) {
	1370	DPFPRINTF(("drop: atomic fragment\n"));
	1371	pf_free_fragment(*frag);
	1372	*frag = NULL;
	1373	return NULL;
	1374	}
	1375
	1376	/* retrieve the values to be filled in to reassembled tag */
	1377	uint16_t hdrlen, unfragpartlen, extoff, maxlen;
	1378	uint32_t id;
	1379
	1380	/* Get total extension header length from the first fragment */
	1381	hdrlen = frent->fr_ip6f_hlen - sizeof(struct ip6_frag);
	1382	/*
	1383	* Get total extension header length of per-fragment headers from the
	1384	* subsequent fragment.
	1385	*/
	1386	unfragpartlen = next->fr_ip6f_hlen - sizeof(struct ip6_frag);
	1387	extoff = frent->fr_ip6f_extoff;
	1388	maxlen = (*frag)->fr_ip6_maxlen;
	1389	id = (*frag)->fr_id6;
	1390
	1391	ip6 = frent->fr_ip6;
	1392	ip6->ip6_nxt = (*frag)->fr_p;
	1393	ip6->ip6_plen = htons(off);
	1394	ip6->ip6_src = (*frag)->fr_srcx.v6addr;
	1395	ip6->ip6_dst = (*frag)->fr_dstx.v6addr;
	1396
	1397	if ((*frag)->fr_csum_flags != 0) {
	1398	csum = (*frag)->fr_csum;
	1399
	1400	ADDCARRY(csum);
	1401
	1402	m->m_pkthdr.csum_rx_val = csum;
	1403	m->m_pkthdr.csum_rx_start = sizeof(struct ip6_hdr);
	1404	m->m_pkthdr.csum_flags = (*frag)->fr_csum_flags;
	1405	} else if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) \|\|
	1406	(m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
	1407	/* loopback checksums are always OK */
	1408	m->m_pkthdr.csum_data = 0xffff;
	1409	m->m_pkthdr.csum_flags = CSUM_DATA_VALID \| CSUM_PSEUDO_HDR;
	1410	}
	1411
	1412	/* Remove from fragment queue */
	1413	pf_remove_fragment(*frag);
	1414	*frag = NULL;
	1415
	1416	m = frent->fr_m;
	1417	m->m_len += sizeof(struct ip6_hdr);
	1418	m->m_data -= sizeof(struct ip6_hdr);
	1419	memmove(m->m_data, ip6, sizeof(struct ip6_hdr));
	1420
	1421	next = LIST_NEXT(frent, fr_next);
	1422	pool_put(&pf_frent_pl, frent);
	1423	pf_nfrents--;
	1424	for (frent = next; next != NULL; frent = next) {
	1425	m2 = frent->fr_m;
	1426
	1427	m_cat(m, m2);
	1428	next = LIST_NEXT(frent, fr_next);
	1429	pool_put(&pf_frent_pl, frent);
	1430	pf_nfrents--;
	1431	}
	1432
	1433	/* XXX this should be done elsewhere */
	1434	if (m->m_flags & M_PKTHDR) {
	1435	int len = 0;
	1436	for (m2 = m; m2; m2 = m2->m_next) {
	1437	len += m2->m_len;
	1438	}
	1439	m->m_pkthdr.len = len;
	1440	}
	1441
	1442	DPFPRINTF(("complete: 0x%llx ip6_plen %d m_pkthdr.len %d\n",
	1443	(uint64_t)VM_KERNEL_ADDRPERM(m), ntohs(ip6->ip6_plen),
	1444	m->m_pkthdr.len));
	1445
	1446	/* Add the reassembled tag */
	1447	struct m_tag *mtag;
	1448	struct pf_fragment_tag *ftag;
	1449	mtag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_PF_REASS,
	1450	sizeof(*ftag), M_NOWAIT, m);
	1451	if (mtag == NULL) {
	1452	/* XXX: add stats */
	1453	m_freem(m);
	1454	return NULL;
	1455	}
	1456	ftag = (struct pf_fragment_tag *)(mtag + 1);
	1457	ftag->ft_hdrlen = hdrlen;
	1458	ftag->ft_unfragpartlen = unfragpartlen;
	1459	ftag->ft_extoff = extoff;
	1460	ftag->ft_maxlen = maxlen;
	1461	ftag->ft_id = id;
	1462	m_tag_prepend(m, mtag);
	1463
	1464	struct pf_mtag *pftag = pf_get_mtag(m);
	1465	ASSERT(pftag != NULL);
	1466	pftag->pftag_flags \|= PF_TAG_REASSEMBLED;
	1467	return m;
	1468
	1469	drop_fragment:
	1470	/* Oops - fail safe - drop packet */
	1471	pool_put(&pf_frent_pl, frent);
	1472	--pf_nfrents;
	1473	m_freem(m);
	1474	return NULL;
	1475	}
	1476
	1477	static struct mbuf *
	1478	pf_frag6cache(struct mbuf *m0, struct ip6_hdr h, struct ip6_frag *fh,
	1479	struct pf_fragment *frag, int hlen, int mff, int drop, int nomem)
	1480	{
	1481	struct mbuf m = m0;
	1482	u_int16_t plen, off, fr_max;
	1483	struct pf_frcache frp, fra, *cur = NULL;
	1484	int hosed = 0;
	1485
	1486	VERIFY(frag == NULL \|\| !BUFFER_FRAGMENTS(frag));
	1487	m = *m0;
	1488	off = ntohs(fh->ip6f_offlg & IP6F_OFF_MASK);
	1489	plen = ntohs(h->ip6_plen) - (hlen - sizeof *h);
	1490
	1491	/*
	1492	* Apple Modification: dimambro@apple.com. The hlen, being passed
	1493	* into this function Includes all the headers associated with
	1494	* the packet, and may include routing headers, so to get to
	1495	* the data payload as stored in the original IPv6 header we need
	1496	* to subtract al those headers and the IP header.
	1497	*
	1498	* The 'max' local variable should also contain the offset from the start
	1499	* of the reassembled packet to the octet just past the end of the octets
	1500	* in the current fragment where:
	1501	* - 'off' is the offset from the start of the reassembled packet to the
	1502	* first octet in the fragment,
	1503	* - 'plen' is the length of the "payload data length" Excluding all the
	1504	* IPv6 headers of the fragment.
	1505	* - 'hlen' is computed in pf_normalize_ip6() as the offset from the start
	1506	* of the IPv6 packet to the beginning of the data.
	1507	*/
	1508	fr_max = off + plen;
	1509
	1510	DPFPRINTF(("0x%llx plen %u off %u fr_max %u\n",
	1511	(uint64_t)VM_KERNEL_ADDRPERM(m), plen, off, fr_max));
	1512
	1513	/* Create a new range queue for this packet */
	1514	if (*frag == NULL) {
	1515	*frag = pool_get(&pf_cache_pl, PR_NOWAIT);
	1516	if (*frag == NULL) {
	1517	pf_flush_fragments();
	1518	*frag = pool_get(&pf_cache_pl, PR_NOWAIT);
	1519	if (*frag == NULL) {
	1520	goto no_mem;
	1521	}
	1522	}
	1523
	1524	/* Get an entry for the queue */
	1525	cur = pool_get(&pf_cent_pl, PR_NOWAIT);
	1526	if (cur == NULL) {
	1527	pool_put(&pf_cache_pl, *frag);
	1528	*frag = NULL;
	1529	goto no_mem;
	1530	}
	1531	pf_ncache++;
	1532
	1533	(*frag)->fr_flags = PFFRAG_NOBUFFER;
	1534	(*frag)->fr_max = 0;
	1535	(*frag)->fr_af = AF_INET6;
	1536	(*frag)->fr_srcx.v6addr = h->ip6_src;
	1537	(*frag)->fr_dstx.v6addr = h->ip6_dst;
	1538	(*frag)->fr_p = fh->ip6f_nxt;
	1539	(*frag)->fr_id6 = fh->ip6f_ident;
	1540	(*frag)->fr_timeout = pf_time_second();
	1541
	1542	cur->fr_off = off;
	1543	cur->fr_end = fr_max;
	1544	LIST_INIT(&(*frag)->fr_cache);
	1545	LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
	1546
	1547	RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
	1548	TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
	1549
	1550	DPFPRINTF(("frag6cache[%d]: new %d-%d\n", ntohl(fh->ip6f_ident),
	1551	off, fr_max));
	1552
	1553	goto pass;
	1554	}
	1555
	1556	/*
	1557	* Find a fragment after the current one:
	1558	* - off contains the real shifted offset.
	1559	*/
	1560	frp = NULL;
	1561	LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
	1562	if (fra->fr_off > off) {
	1563	break;
	1564	}
	1565	frp = fra;
	1566	}
	1567
	1568	VERIFY(frp != NULL \|\| fra != NULL);
	1569
	1570	if (frp != NULL) {
	1571	int precut;
	1572
	1573	precut = frp->fr_end - off;
	1574	if (precut >= plen) {
	1575	/* Fragment is entirely a duplicate */
	1576	DPFPRINTF(("frag6cache[%u]: dead (%d-%d) %d-%d\n",
	1577	ntohl(fh->ip6f_ident), frp->fr_off, frp->fr_end,
	1578	off, fr_max));
	1579	goto drop_fragment;
	1580	}
	1581	if (precut == 0) {
	1582	/* They are adjacent. Fixup cache entry */
	1583	DPFPRINTF(("frag6cache[%u]: adjacent (%d-%d) %d-%d\n",
	1584	ntohl(fh->ip6f_ident), frp->fr_off, frp->fr_end,
	1585	off, fr_max));
	1586	frp->fr_end = fr_max;
	1587	} else if (precut > 0) {
	1588	/* The first part of this payload overlaps with a
	1589	* fragment that has already been passed.
	1590	* Need to trim off the first part of the payload.
	1591	* But to do so easily, we need to create another
	1592	* mbuf to throw the original header into.
	1593	*/
	1594
	1595	DPFPRINTF(("frag6cache[%u]: chop %d (%d-%d) %d-%d\n",
	1596	ntohl(fh->ip6f_ident), precut, frp->fr_off,
	1597	frp->fr_end, off, fr_max));
	1598
	1599	off += precut;
	1600	fr_max -= precut;
	1601	/* Update the previous frag to encompass this one */
	1602	frp->fr_end = fr_max;
	1603
	1604	if (!drop) {
	1605	/* XXX Optimization opportunity
	1606	* This is a very heavy way to trim the payload.
	1607	* we could do it much faster by diddling mbuf
	1608	* internals but that would be even less legible
	1609	* than this mbuf magic. For my next trick,
	1610	* I'll pull a rabbit out of my laptop.
	1611	*/
	1612	*m0 = m_copym(m, 0, hlen, M_NOWAIT);
	1613	if (*m0 == NULL) {
	1614	goto no_mem;
	1615	}
	1616	VERIFY((*m0)->m_next == NULL);
	1617	m_adj(m, precut + hlen);
	1618	m_cat(*m0, m);
	1619	m = *m0;
	1620	if (m->m_flags & M_PKTHDR) {
	1621	int pktlen = 0;
	1622	struct mbuf *t;
	1623	for (t = m; t; t = t->m_next) {
	1624	pktlen += t->m_len;
	1625	}
	1626	m->m_pkthdr.len = pktlen;
	1627	}
	1628
	1629	h = mtod(m, struct ip6_hdr *);
	1630
	1631	VERIFY((int)m->m_len ==
	1632	ntohs(h->ip6_plen) - precut);
	1633	fh->ip6f_offlg &= ~IP6F_OFF_MASK;
	1634	fh->ip6f_offlg \|=
	1635	htons(ntohs(fh->ip6f_offlg & IP6F_OFF_MASK)
	1636	+ (precut >> 3));
	1637	h->ip6_plen = htons(ntohs(h->ip6_plen) -
	1638	precut);
	1639	} else {
	1640	hosed++;
	1641	}
	1642	} else {
	1643	/* There is a gap between fragments */
	1644
	1645	DPFPRINTF(("frag6cache[%u]: gap %d (%d-%d) %d-%d\n",
	1646	ntohl(fh->ip6f_ident), -precut, frp->fr_off,
	1647	frp->fr_end, off, fr_max));
	1648
	1649	cur = pool_get(&pf_cent_pl, PR_NOWAIT);
	1650	if (cur == NULL) {
	1651	goto no_mem;
	1652	}
	1653	pf_ncache++;
	1654
	1655	cur->fr_off = off;
	1656	cur->fr_end = fr_max;
	1657	LIST_INSERT_AFTER(frp, cur, fr_next);
	1658	}
	1659	}
	1660
	1661	if (fra != NULL) {
	1662	int aftercut;
	1663	int merge = 0;
	1664
	1665	aftercut = fr_max - fra->fr_off;
	1666	if (aftercut == 0) {
	1667	/* Adjacent fragments */
	1668	DPFPRINTF(("frag6cache[%u]: adjacent %d-%d (%d-%d)\n",
	1669	ntohl(fh->ip6f_ident), off, fr_max, fra->fr_off,
	1670	fra->fr_end));
	1671	fra->fr_off = off;
	1672	merge = 1;
	1673	} else if (aftercut > 0) {
	1674	/* Need to chop off the tail of this fragment */
	1675	DPFPRINTF(("frag6cache[%u]: chop %d %d-%d (%d-%d)\n",
	1676	ntohl(fh->ip6f_ident), aftercut, off, fr_max,
	1677	fra->fr_off, fra->fr_end));
	1678	fra->fr_off = off;
	1679	fr_max -= aftercut;
	1680
	1681	merge = 1;
	1682
	1683	if (!drop) {
	1684	m_adj(m, -aftercut);
	1685	if (m->m_flags & M_PKTHDR) {
	1686	int pktlen = 0;
	1687	struct mbuf *t;
	1688	for (t = m; t; t = t->m_next) {
	1689	pktlen += t->m_len;
	1690	}
	1691	m->m_pkthdr.len = pktlen;
	1692	}
	1693	h = mtod(m, struct ip6_hdr *);
	1694	VERIFY((int)m->m_len ==
	1695	ntohs(h->ip6_plen) - aftercut);
	1696	h->ip6_plen =
	1697	htons(ntohs(h->ip6_plen) - aftercut);
	1698	} else {
	1699	hosed++;
	1700	}
	1701	} else if (frp == NULL) {
	1702	/* There is a gap between fragments */
	1703	DPFPRINTF(("frag6cache[%u]: gap %d %d-%d (%d-%d)\n",
	1704	ntohl(fh->ip6f_ident), -aftercut, off, fr_max,
	1705	fra->fr_off, fra->fr_end));
	1706
	1707	cur = pool_get(&pf_cent_pl, PR_NOWAIT);
	1708	if (cur == NULL) {
	1709	goto no_mem;
	1710	}
	1711	pf_ncache++;
	1712
	1713	cur->fr_off = off;
	1714	cur->fr_end = fr_max;
	1715	LIST_INSERT_BEFORE(fra, cur, fr_next);
	1716	}
	1717
	1718	/* Need to glue together two separate fragment descriptors */
	1719	if (merge) {
	1720	if (cur && fra->fr_off <= cur->fr_end) {
	1721	/* Need to merge in a previous 'cur' */
	1722	DPFPRINTF(("frag6cache[%u]: adjacent(merge "
	1723	"%d-%d) %d-%d (%d-%d)\n",
	1724	ntohl(fh->ip6f_ident), cur->fr_off,
	1725	cur->fr_end, off, fr_max, fra->fr_off,
	1726	fra->fr_end));
	1727	fra->fr_off = cur->fr_off;
	1728	LIST_REMOVE(cur, fr_next);
	1729	pool_put(&pf_cent_pl, cur);
	1730	pf_ncache--;
	1731	cur = NULL;
	1732	} else if (frp && fra->fr_off <= frp->fr_end) {
	1733	/* Need to merge in a modified 'frp' */
	1734	VERIFY(cur == NULL);
	1735	DPFPRINTF(("frag6cache[%u]: adjacent(merge "
	1736	"%d-%d) %d-%d (%d-%d)\n",
	1737	ntohl(fh->ip6f_ident), frp->fr_off,
	1738	frp->fr_end, off, fr_max, fra->fr_off,
	1739	fra->fr_end));
	1740	fra->fr_off = frp->fr_off;
	1741	LIST_REMOVE(frp, fr_next);
	1742	pool_put(&pf_cent_pl, frp);
	1743	pf_ncache--;
	1744	frp = NULL;
	1745	}
	1746	}
	1747	}
	1748
	1749	if (hosed) {
	1750	/*
	1751	* We must keep tracking the overall fragment even when
	1752	* we're going to drop it anyway so that we know when to
	1753	* free the overall descriptor. Thus we drop the frag late.
	1754	*/
	1755	goto drop_fragment;
	1756	}
	1757
	1758	pass:
	1759	/* Update maximum data size */
	1760	if ((*frag)->fr_max < fr_max) {
	1761	(*frag)->fr_max = fr_max;
	1762	}
	1763
	1764	/* This is the last segment */
	1765	if (!mff) {
	1766	(*frag)->fr_flags \|= PFFRAG_SEENLAST;
	1767	}
	1768
	1769	/* Check if we are completely reassembled */
	1770	if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
	1771	LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
	1772	LIST_FIRST(&(frag)->fr_cache)->fr_end == (frag)->fr_max) {
	1773	/* Remove from fragment queue */
	1774	DPFPRINTF(("frag6cache[%u]: done 0-%d\n",
	1775	ntohl(fh->ip6f_ident), (*frag)->fr_max));
	1776	pf_free_fragment(*frag);
	1777	*frag = NULL;
	1778	}
	1779
	1780	return m;
	1781
	1782	no_mem:
	1783	*nomem = 1;
	1784
	1785	/* Still need to pay attention to !IP_MF */
	1786	if (!mff && *frag != NULL) {
	1787	(*frag)->fr_flags \|= PFFRAG_SEENLAST;
	1788	}
	1789
	1790	m_freem(m);
	1791	return NULL;
	1792
	1793	drop_fragment:
	1794
	1795	/* Still need to pay attention to !IP_MF */
	1796	if (!mff && *frag != NULL) {
	1797	(*frag)->fr_flags \|= PFFRAG_SEENLAST;
	1798	}
	1799
	1800	if (drop) {
	1801	/* This fragment has been deemed bad. Don't reass */
	1802	if (((*frag)->fr_flags & PFFRAG_DROP) == 0) {
	1803	DPFPRINTF(("frag6cache[%u]: dropping overall fragment\n",
	1804	ntohl(fh->ip6f_ident)));
	1805	}
	1806	(*frag)->fr_flags \|= PFFRAG_DROP;
	1807	}
	1808
	1809	m_freem(m);
	1810	return NULL;
	1811	}
	1812
	1813	int
	1814	pf_refragment6(struct ifnet ifp, pbuf_t pbufp, struct pf_fragment_tag ftag)
	1815	{
	1816	struct mbuf *m;
	1817	uint32_t frag_id;
	1818	uint16_t hdrlen, extoff, maxlen, unfragpartlen;
	1819	uint8_t proto;
	1820	int error, action;
	1821	uint8_t *lexthdrsp;
	1822	struct route_in6 ip6route;
	1823	struct route_in6 *ro;
	1824	struct sockaddr_in6 *dst;
	1825	struct ip6_hdr *hdr;
	1826	struct pf_mtag *mtag;
	1827	struct m_tag *tag;
	1828
	1829	if (pbufp == NULL \|\| !pbuf_is_valid(*pbufp) \|\| ftag == NULL) {
	1830	panic("pf_route6: invalid parameters");
	1831	/* NOT REACHED */
	1832	}
	1833	m = pbuf_to_mbuf(*pbufp, FALSE);
	1834	hdr = mtod(m, struct ip6_hdr *);
	1835	mtag = pf_find_mtag(m);
	1836	hdrlen = ftag->ft_hdrlen - sizeof(struct ip6_hdr);
	1837	extoff = ftag->ft_extoff;
	1838	maxlen = ftag->ft_maxlen;
	1839	frag_id = ftag->ft_id;
	1840	unfragpartlen = ftag->ft_unfragpartlen;
	1841	tag = (struct m_tag )(void )ftag;
	1842	tag = tag - 1;
	1843	m_tag_delete(m, tag);
	1844	ftag = NULL;
	1845	tag = NULL;
	1846	mtag->pftag_flags &= ~PF_TAG_REASSEMBLED;
	1847	ro = &ip6route;
	1848	bzero((caddr_t)ro, sizeof(*ro));
	1849	dst = (struct sockaddr_in6 *)&ro->ro_dst;
	1850	dst->sin6_family = AF_INET6;
	1851	dst->sin6_len = sizeof(*dst);
	1852	dst->sin6_addr = hdr->ip6_dst;
	1853
	1854	if (extoff) {
	1855	int off;
	1856	struct mbuf *mexthdr;
	1857
	1858	/* Use protocol from next field of last extension header */
	1859	mexthdr = m_getptr(m, extoff +
	1860	offsetof(struct ip6_ext, ip6e_nxt), &off);
	1861	ASSERT(mexthdr != NULL);
	1862	lexthdrsp = (mtod(mexthdr, uint8_t *) + off);
	1863	proto = *lexthdrsp;
	1864	if (proto == IPPROTO_DSTOPTS) {
	1865	struct ip6_ext ext;
	1866	if (!pf_pull_hdr(*pbufp, off, &ext, sizeof(ext), NULL,
	1867	NULL, AF_INET6)) {
	1868	DPFPRINTF(("pkt too short"));
	1869	action = PF_DROP;
	1870	goto done;
	1871	}
	1872	proto = ext.ip6e_nxt;
	1873	}
	1874	} else {
	1875	lexthdrsp = NULL;
	1876	proto = hdr->ip6_nxt;
	1877	}
	1878
	1879	/*
	1880	* The MTU must be a multiple of 8 bytes, or we risk doing the
	1881	* fragmentation wrong.
	1882	*/
	1883	maxlen = maxlen & ~7;
	1884
	1885	error = ip6_do_fragmentation(&m, hdrlen, NULL, unfragpartlen,
	1886	hdr, lexthdrsp, maxlen, proto, frag_id);
	1887
	1888	if (error == 0) {
	1889	/*
	1890	* PF_TAG_REFRAGMENTED flag set to indicate ip6_forward()
	1891	* and pf_route6() that the mbuf contains a chain of fragments.
	1892	*/
	1893	mtag->pftag_flags \|= PF_TAG_REFRAGMENTED;
	1894	action = PF_PASS;
	1895	pbuf_init_mbuf(*pbufp, m, ifp);
	1896	} else {
	1897	DPFPRINTF(("refragment error %d", error));
	1898	action = PF_DROP;
	1899	goto done;
	1900	}
	1901	done:
	1902	return action;
	1903	}
	1904
	1905	int
	1906	pf_normalize_ip(pbuf_t pbuf, int dir, struct pfi_kif kif, u_short *reason,
	1907	struct pf_pdesc *pd)
	1908	{
	1909	struct mbuf *m;
	1910	struct pf_rule *r;
	1911	struct pf_frent *frent;
	1912	struct pf_fragment *frag = NULL;
	1913	struct ip *h = pbuf->pb_data;
	1914	int mff = (ntohs(h->ip_off) & IP_MF);
	1915	int hlen = h->ip_hl << 2;
	1916	u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
	1917	u_int16_t fr_max;
	1918	int ip_len;
	1919	int ip_off;
	1920	int asd = 0;
	1921	struct pf_ruleset *ruleset = NULL;
	1922	struct ifnet *ifp = pbuf->pb_ifp;
	1923
	1924	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
	1925	while (r != NULL) {
	1926	r->evaluations++;
	1927	if (pfi_kif_match(r->kif, kif) == r->ifnot) {
	1928	r = r->skip[PF_SKIP_IFP].ptr;
	1929	} else if (r->direction && r->direction != dir) {
	1930	r = r->skip[PF_SKIP_DIR].ptr;
	1931	} else if (r->af && r->af != AF_INET) {
	1932	r = r->skip[PF_SKIP_AF].ptr;
	1933	} else if (r->proto && r->proto != h->ip_p) {
	1934	r = r->skip[PF_SKIP_PROTO].ptr;
	1935	} else if (PF_MISMATCHAW(&r->src.addr,
	1936	(struct pf_addr *)&h->ip_src.s_addr, AF_INET,
	1937	r->src.neg, kif)) {
	1938	r = r->skip[PF_SKIP_SRC_ADDR].ptr;
	1939	} else if (PF_MISMATCHAW(&r->dst.addr,
	1940	(struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
	1941	r->dst.neg, NULL)) {
	1942	r = r->skip[PF_SKIP_DST_ADDR].ptr;
	1943	} else {
	1944	if (r->anchor == NULL) {
	1945	break;
	1946	} else {
	1947	pf_step_into_anchor(&asd, &ruleset,
	1948	PF_RULESET_SCRUB, &r, NULL, NULL);
	1949	}
	1950	}
	1951	if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
	1952	PF_RULESET_SCRUB, &r, NULL, NULL)) {
	1953	break;
	1954	}
	1955	}
	1956
	1957	if (r == NULL \|\| r->action == PF_NOSCRUB) {
	1958	return PF_PASS;
	1959	} else {
	1960	r->packets[dir == PF_OUT]++;
	1961	r->bytes[dir == PF_OUT] += pd->tot_len;
	1962	}
	1963
	1964	/* Check for illegal packets */
	1965	if (hlen < (int)sizeof(struct ip)) {
	1966	goto drop;
	1967	}
	1968
	1969	if (hlen > ntohs(h->ip_len)) {
	1970	goto drop;
	1971	}
	1972
	1973	/* Clear IP_DF if the rule uses the no-df option */
	1974	if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
	1975	u_int16_t ipoff = h->ip_off;
	1976
	1977	h->ip_off &= htons(~IP_DF);
	1978	h->ip_sum = pf_cksum_fixup(h->ip_sum, ipoff, h->ip_off, 0);
	1979	}
	1980
	1981	/* We will need other tests here */
	1982	if (!fragoff && !mff) {
	1983	goto no_fragment;
	1984	}
	1985
	1986	/*
	1987	* We're dealing with a fragment now. Don't allow fragments
	1988	* with IP_DF to enter the cache. If the flag was cleared by
	1989	* no-df above, fine. Otherwise drop it.
	1990	*/
	1991	if (h->ip_off & htons(IP_DF)) {
	1992	DPFPRINTF(("IP_DF\n"));
	1993	goto bad;
	1994	}
	1995
	1996	ip_len = ntohs(h->ip_len) - hlen;
	1997	ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
	1998
	1999	/* All fragments are 8 byte aligned */
	2000	if (mff && (ip_len & 0x7)) {
	2001	DPFPRINTF(("mff and %d\n", ip_len));
	2002	goto bad;
	2003	}
	2004
	2005	/* Respect maximum length */
	2006	if (fragoff + ip_len > IP_MAXPACKET) {
	2007	DPFPRINTF(("max packet %d\n", fragoff + ip_len));
	2008	goto bad;
	2009	}
	2010	fr_max = fragoff + ip_len;
	2011
	2012	if ((r->rule_flag & (PFRULE_FRAGCROP \| PFRULE_FRAGDROP)) == 0) {
	2013	/* Fully buffer all of the fragments */
	2014
	2015	frag = pf_find_fragment_by_ipv4_header(h, &pf_frag_tree);
	2016	/* Check if we saw the last fragment already */
	2017	if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
	2018	fr_max > frag->fr_max) {
	2019	goto bad;
	2020	}
	2021
	2022	if ((m = pbuf_to_mbuf(pbuf, TRUE)) == NULL) {
	2023	REASON_SET(reason, PFRES_MEMORY);
	2024	return PF_DROP;
	2025	}
	2026
	2027	VERIFY(!pbuf_is_valid(pbuf));
	2028
	2029	/* Restore iph pointer after pbuf_to_mbuf() */
	2030	h = mtod(m, struct ip *);
	2031
	2032	/* Get an entry for the fragment queue */
	2033	frent = pool_get(&pf_frent_pl, PR_NOWAIT);
	2034	if (frent == NULL) {
	2035	REASON_SET(reason, PFRES_MEMORY);
	2036	m_freem(m);
	2037	return PF_DROP;
	2038	}
	2039	pf_nfrents++;
	2040	frent->fr_ip = h;
	2041	frent->fr_m = m;
	2042
	2043	/* Might return a completely reassembled mbuf, or NULL */
	2044	DPFPRINTF(("reass IPv4 frag %d @ %d-%d\n", ntohs(h->ip_id),
	2045	fragoff, fr_max));
	2046	m = pf_reassemble(m, &frag, frent, mff);
	2047
	2048	if (m == NULL) {
	2049	return PF_DROP;
	2050	}
	2051
	2052	VERIFY(m->m_flags & M_PKTHDR);
	2053	pbuf_init_mbuf(pbuf, m, ifp);
	2054
	2055	/* use mtag from concatenated mbuf chain */
	2056	pd->pf_mtag = pf_find_mtag_pbuf(pbuf);
	2057	#if 0
	2058	// SCW: This check is superfluous
	2059	#if DIAGNOSTIC
	2060	if (pd->pf_mtag == NULL) {
	2061	printf("%s: pf_find_mtag returned NULL(1)\n", __func__);
	2062	if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
	2063	m_freem(m);
	2064	m = NULL;
	2065	goto no_mem;
	2066	}
	2067	}
	2068	#endif
	2069	#endif
	2070
	2071	h = mtod(m, struct ip *);
	2072
	2073	if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) {
	2074	goto drop;
	2075	}
	2076	} else {
	2077	/* non-buffering fragment cache (drops or masks overlaps) */
	2078	int nomem = 0;
	2079
	2080	if (dir == PF_OUT && (pd->pf_mtag->pftag_flags & PF_TAG_FRAGCACHE)) {
	2081	/*
	2082	* Already passed the fragment cache in the
	2083	* input direction. If we continued, it would
	2084	* appear to be a dup and would be dropped.
	2085	*/
	2086	goto fragment_pass;
	2087	}
	2088
	2089	frag = pf_find_fragment_by_ipv4_header(h, &pf_cache_tree);
	2090
	2091	/* Check if we saw the last fragment already */
	2092	if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
	2093	fr_max > frag->fr_max) {
	2094	if (r->rule_flag & PFRULE_FRAGDROP) {
	2095	frag->fr_flags \|= PFFRAG_DROP;
	2096	}
	2097	goto bad;
	2098	}
	2099
	2100	if ((m = pbuf_to_mbuf(pbuf, TRUE)) == NULL) {
	2101	REASON_SET(reason, PFRES_MEMORY);
	2102	goto bad;
	2103	}
	2104
	2105	VERIFY(!pbuf_is_valid(pbuf));
	2106
	2107	/* Restore iph pointer after pbuf_to_mbuf() */
	2108	h = mtod(m, struct ip *);
	2109
	2110	m = pf_fragcache(&m, h, &frag, mff,
	2111	(r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
	2112	if (m == NULL) {
	2113	// Note: pf_fragcache() has already m_freem'd the mbuf
	2114	if (nomem) {
	2115	goto no_mem;
	2116	}
	2117	goto drop;
	2118	}
	2119
	2120	VERIFY(m->m_flags & M_PKTHDR);
	2121	pbuf_init_mbuf(pbuf, m, ifp);
	2122
	2123	/* use mtag from copied and trimmed mbuf chain */
	2124	pd->pf_mtag = pf_find_mtag_pbuf(pbuf);
	2125	#if 0
	2126	// SCW: This check is superfluous
	2127	#if DIAGNOSTIC
	2128	if (pd->pf_mtag == NULL) {
	2129	printf("%s: pf_find_mtag returned NULL(2)\n", __func__);
	2130	if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
	2131	m_freem(m);
	2132	m = NULL;
	2133	goto no_mem;
	2134	}
	2135	}
	2136	#endif
	2137	#endif
	2138	if (dir == PF_IN) {
	2139	pd->pf_mtag->pftag_flags \|= PF_TAG_FRAGCACHE;
	2140	}
	2141
	2142	if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) {
	2143	goto drop;
	2144	}
	2145
	2146	goto fragment_pass;
	2147	}
	2148
	2149	no_fragment:
	2150	/* At this point, only IP_DF is allowed in ip_off */
	2151	if (h->ip_off & ~htons(IP_DF)) {
	2152	u_int16_t ipoff = h->ip_off;
	2153
	2154	h->ip_off &= htons(IP_DF);
	2155	h->ip_sum = pf_cksum_fixup(h->ip_sum, ipoff, h->ip_off, 0);
	2156	}
	2157
	2158	/* Enforce a minimum ttl, may cause endless packet loops */
	2159	if (r->min_ttl && h->ip_ttl < r->min_ttl) {
	2160	u_int16_t ip_ttl = h->ip_ttl;
	2161
	2162	h->ip_ttl = r->min_ttl;
	2163	h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
	2164	}
	2165	if (r->rule_flag & PFRULE_RANDOMID) {
	2166	u_int16_t oip_id = h->ip_id;
	2167
	2168	if (rfc6864 && IP_OFF_IS_ATOMIC(ntohs(h->ip_off))) {
	2169	h->ip_id = 0;
	2170	} else {
	2171	h->ip_id = ip_randomid();
	2172	}
	2173	h->ip_sum = pf_cksum_fixup(h->ip_sum, oip_id, h->ip_id, 0);
	2174	}
	2175	if ((r->rule_flag & (PFRULE_FRAGCROP \| PFRULE_FRAGDROP)) == 0) {
	2176	pd->flags \|= PFDESC_IP_REAS;
	2177	}
	2178
	2179	return PF_PASS;
	2180
	2181	fragment_pass:
	2182	/* Enforce a minimum ttl, may cause endless packet loops */
	2183	if (r->min_ttl && h->ip_ttl < r->min_ttl) {
	2184	u_int16_t ip_ttl = h->ip_ttl;
	2185
	2186	h->ip_ttl = r->min_ttl;
	2187	h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
	2188	}
	2189	if ((r->rule_flag & (PFRULE_FRAGCROP \| PFRULE_FRAGDROP)) == 0) {
	2190	pd->flags \|= PFDESC_IP_REAS;
	2191	}
	2192	return PF_PASS;
	2193
	2194	no_mem:
	2195	REASON_SET(reason, PFRES_MEMORY);
	2196	if (r != NULL && r->log && pbuf_is_valid(pbuf)) {
	2197	PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, *reason, r,
	2198	NULL, NULL, pd);
	2199	}
	2200	return PF_DROP;
	2201
	2202	drop:
	2203	REASON_SET(reason, PFRES_NORM);
	2204	if (r != NULL && r->log && pbuf_is_valid(pbuf)) {
	2205	PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, *reason, r,
	2206	NULL, NULL, pd);
	2207	}
	2208	return PF_DROP;
	2209
	2210	bad:
	2211	DPFPRINTF(("dropping bad IPv4 fragment\n"));
	2212
	2213	/* Free associated fragments */
	2214	if (frag != NULL) {
	2215	pf_free_fragment(frag);
	2216	}
	2217
	2218	REASON_SET(reason, PFRES_FRAG);
	2219	if (r != NULL && r->log && pbuf_is_valid(pbuf)) {
	2220	PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, *reason, r, NULL, NULL, pd);
	2221	}
	2222
	2223	return PF_DROP;
	2224	}
	2225
	2226	static __inline struct pf_fragment *
	2227	pf_find_fragment_by_ipv6_header(struct ip6_hdr ip6, struct ip6_frag fh,
	2228	struct pf_frag_tree *tree)
	2229	{
	2230	struct pf_fragment key;
	2231	pf_ip6hdr2key(&key, ip6, fh);
	2232	return pf_find_fragment_by_key(&key, tree);
	2233	}
	2234
	2235	int
	2236	pf_normalize_ip6(pbuf_t pbuf, int dir, struct pfi_kif kif,
	2237	u_short reason, struct pf_pdesc pd)
	2238	{
	2239	struct mbuf *m = NULL;
	2240	struct pf_rule *r;
	2241	struct ip6_hdr *h = pbuf->pb_data;
	2242	int extoff;
	2243	int off;
	2244	struct ip6_ext ext;
	2245	struct ip6_opt opt;
	2246	struct ip6_opt_jumbo jumbo;
	2247	int optend;
	2248	int ooff;
	2249	struct ip6_frag frag;
	2250	u_int32_t jumbolen = 0, plen;
	2251	u_int16_t fragoff = 0;
	2252	u_int8_t proto;
	2253	int terminal;
	2254	struct pf_frent *frent;
	2255	struct pf_fragment *pff = NULL;
	2256	int mff = 0, rh_cnt = 0;
	2257	u_int16_t fr_max;
	2258	int asd = 0;
	2259	struct pf_ruleset *ruleset = NULL;
	2260	struct ifnet *ifp = pbuf->pb_ifp;
	2261
	2262	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
	2263	while (r != NULL) {
	2264	r->evaluations++;
	2265	if (pfi_kif_match(r->kif, kif) == r->ifnot) {
	2266	r = r->skip[PF_SKIP_IFP].ptr;
	2267	} else if (r->direction && r->direction != dir) {
	2268	r = r->skip[PF_SKIP_DIR].ptr;
	2269	} else if (r->af && r->af != AF_INET6) {
	2270	r = r->skip[PF_SKIP_AF].ptr;
	2271	}
	2272	#if 0 /* header chain! */
	2273	else if (r->proto && r->proto != h->ip6_nxt) {
	2274	r = r->skip[PF_SKIP_PROTO].ptr;
	2275	}
	2276	#endif
	2277	else if (PF_MISMATCHAW(&r->src.addr,
	2278	(struct pf_addr *)(uintptr_t)&h->ip6_src, AF_INET6,
	2279	r->src.neg, kif)) {
	2280	r = r->skip[PF_SKIP_SRC_ADDR].ptr;
	2281	} else if (PF_MISMATCHAW(&r->dst.addr,
	2282	(struct pf_addr *)(uintptr_t)&h->ip6_dst, AF_INET6,
	2283	r->dst.neg, NULL)) {
	2284	r = r->skip[PF_SKIP_DST_ADDR].ptr;
	2285	} else {
	2286	if (r->anchor == NULL) {
	2287	break;
	2288	} else {
	2289	pf_step_into_anchor(&asd, &ruleset,
	2290	PF_RULESET_SCRUB, &r, NULL, NULL);
	2291	}
	2292	}
	2293	if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
	2294	PF_RULESET_SCRUB, &r, NULL, NULL)) {
	2295	break;
	2296	}
	2297	}
	2298
	2299	if (r == NULL \|\| r->action == PF_NOSCRUB) {
	2300	return PF_PASS;
	2301	} else {
	2302	r->packets[dir == PF_OUT]++;
	2303	r->bytes[dir == PF_OUT] += pd->tot_len;
	2304	}
	2305
	2306	/* Check for illegal packets */
	2307	if ((uint32_t)(sizeof(struct ip6_hdr) + IPV6_MAXPACKET) <
	2308	pbuf->pb_packet_len) {
	2309	goto drop;
	2310	}
	2311
	2312	extoff = 0;
	2313	off = sizeof(struct ip6_hdr);
	2314	proto = h->ip6_nxt;
	2315	terminal = 0;
	2316	do {
	2317	pd->proto = proto;
	2318	switch (proto) {
	2319	case IPPROTO_FRAGMENT:
	2320	goto fragment;
	2321	case IPPROTO_AH:
	2322	case IPPROTO_ROUTING:
	2323	case IPPROTO_DSTOPTS:
	2324	if (!pf_pull_hdr(pbuf, off, &ext, sizeof(ext), NULL,
	2325	NULL, AF_INET6)) {
	2326	goto shortpkt;
	2327	}
	2328	extoff = off;
	2329	/*
	2330	* <jhw@apple.com>
	2331	* Multiple routing headers not allowed.
	2332	* Routing header type zero considered harmful.
	2333	*/
	2334	if (proto == IPPROTO_ROUTING) {
	2335	const struct ip6_rthdr *rh =
	2336	(const struct ip6_rthdr *)&ext;
	2337	if (rh_cnt++) {
	2338	goto drop;
	2339	}
	2340	if (rh->ip6r_type == IPV6_RTHDR_TYPE_0) {
	2341	goto drop;
	2342	}
	2343	} else if (proto == IPPROTO_AH) {
	2344	off += (ext.ip6e_len + 2) * 4;
	2345	} else {
	2346	off += (ext.ip6e_len + 1) * 8;
	2347	}
	2348	proto = ext.ip6e_nxt;
	2349	break;
	2350	case IPPROTO_HOPOPTS:
	2351	if (!pf_pull_hdr(pbuf, off, &ext, sizeof(ext), NULL,
	2352	NULL, AF_INET6)) {
	2353	goto shortpkt;
	2354	}
	2355	extoff = off;
	2356	optend = off + (ext.ip6e_len + 1) * 8;
	2357	ooff = off + sizeof(ext);
	2358	do {
	2359	if (!pf_pull_hdr(pbuf, ooff, &opt.ip6o_type,
	2360	sizeof(opt.ip6o_type), NULL, NULL,
	2361	AF_INET6)) {
	2362	goto shortpkt;
	2363	}
	2364	if (opt.ip6o_type == IP6OPT_PAD1) {
	2365	ooff++;
	2366	continue;
	2367	}
	2368	if (!pf_pull_hdr(pbuf, ooff, &opt, sizeof(opt),
	2369	NULL, NULL, AF_INET6)) {
	2370	goto shortpkt;
	2371	}
	2372	if ((ooff + (int) sizeof(opt) + opt.ip6o_len) >
	2373	optend) {
	2374	goto drop;
	2375	}
	2376	switch (opt.ip6o_type) {
	2377	case IP6OPT_JUMBO:
	2378	if (h->ip6_plen != 0) {
	2379	goto drop;
	2380	}
	2381	if (!pf_pull_hdr(pbuf, ooff, &jumbo,
	2382	sizeof(jumbo), NULL, NULL,
	2383	AF_INET6)) {
	2384	goto shortpkt;
	2385	}
	2386	memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
	2387	sizeof(jumbolen));
	2388	jumbolen = ntohl(jumbolen);
	2389	if (jumbolen <= IPV6_MAXPACKET) {
	2390	goto drop;
	2391	}
	2392	if ((sizeof(struct ip6_hdr) +
	2393	jumbolen) != pbuf->pb_packet_len) {
	2394	goto drop;
	2395	}
	2396	break;
	2397	default:
	2398	break;
	2399	}
	2400	ooff += sizeof(opt) + opt.ip6o_len;
	2401	} while (ooff < optend);
	2402
	2403	off = optend;
	2404	proto = ext.ip6e_nxt;
	2405	break;
	2406	default:
	2407	terminal = 1;
	2408	break;
	2409	}
	2410	} while (!terminal);
	2411
	2412	/* jumbo payload option must be present, or plen > 0 */
	2413	if (ntohs(h->ip6_plen) == 0) {
	2414	plen = jumbolen;
	2415	} else {
	2416	plen = ntohs(h->ip6_plen);
	2417	}
	2418	if (plen == 0) {
	2419	goto drop;
	2420	}
	2421	if ((uint32_t)(sizeof(struct ip6_hdr) + plen) > pbuf->pb_packet_len) {
	2422	goto shortpkt;
	2423	}
	2424
	2425	/* Enforce a minimum ttl, may cause endless packet loops */
	2426	if (r->min_ttl && h->ip6_hlim < r->min_ttl) {
	2427	h->ip6_hlim = r->min_ttl;
	2428	}
	2429
	2430	return PF_PASS;
	2431
	2432	fragment:
	2433	plen = ntohs(h->ip6_plen);
	2434	/* Jumbo payload packets cannot be fragmented */
	2435	if (plen == 0 \|\| jumbolen) {
	2436	goto drop;
	2437	}
	2438
	2439	if (!pf_pull_hdr(pbuf, off, &frag, sizeof(frag), NULL, NULL, AF_INET6)) {
	2440	goto shortpkt;
	2441	}
	2442	fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
	2443	pd->proto = frag.ip6f_nxt;
	2444	mff = ntohs(frag.ip6f_offlg & IP6F_MORE_FRAG);
	2445	off += sizeof(frag);
	2446	if (fragoff + (plen - off) > IPV6_MAXPACKET) {
	2447	goto badfrag;
	2448	}
	2449
	2450	fr_max = fragoff + plen - (off - sizeof(struct ip6_hdr));
	2451	// XXX SCW: mbuf-specific
	2452	// DPFPRINTF(("0x%llx IPv6 frag plen %u mff %d off %u fragoff %u "
	2453	// "fr_max %u\n", (uint64_t)VM_KERNEL_ADDRPERM(m), plen, mff, off,
	2454	// fragoff, fr_max));
	2455
	2456	if ((r->rule_flag & (PFRULE_FRAGCROP \| PFRULE_FRAGDROP)) == 0) {
	2457	/* Fully buffer all of the fragments */
	2458	pd->flags \|= PFDESC_IP_REAS;
	2459
	2460	pff = pf_find_fragment_by_ipv6_header(h, &frag,
	2461	&pf_frag_tree);
	2462
	2463	/* Check if we saw the last fragment already */
	2464	if (pff != NULL && (pff->fr_flags & PFFRAG_SEENLAST) &&
	2465	fr_max > pff->fr_max) {
	2466	goto badfrag;
	2467	}
	2468
	2469	if ((m = pbuf_to_mbuf(pbuf, TRUE)) == NULL) {
	2470	REASON_SET(reason, PFRES_MEMORY);
	2471	return PF_DROP;
	2472	}
	2473
	2474	/* Restore iph pointer after pbuf_to_mbuf() */
	2475	h = mtod(m, struct ip6_hdr *);
	2476
	2477	/* Get an entry for the fragment queue */
	2478	frent = pool_get(&pf_frent_pl, PR_NOWAIT);
	2479	if (frent == NULL) {
	2480	REASON_SET(reason, PFRES_MEMORY);
	2481	return PF_DROP;
	2482	}
	2483
	2484	pf_nfrents++;
	2485	frent->fr_ip6 = h;
	2486	frent->fr_m = m;
	2487	frent->fr_ip6f_opt = frag;
	2488	frent->fr_ip6f_extoff = extoff;
	2489	frent->fr_ip6f_hlen = off;
	2490	/* account for 2nd Destination Options header if present */
	2491	if (pd->proto == IPPROTO_DSTOPTS) {
	2492	if (!pf_pull_hdr(pbuf, off, &ext, sizeof(ext), NULL,
	2493	NULL, AF_INET6)) {
	2494	goto shortpkt;
	2495	}
	2496	frent->fr_ip6f_hlen += (ext.ip6e_len + 1) * 8;
	2497	}
	2498
	2499	/* Might return a completely reassembled mbuf, or NULL */
	2500	DPFPRINTF(("reass IPv6 frag %d @ %d-%d\n",
	2501	ntohl(frag.ip6f_ident), fragoff, fr_max));
	2502	m = pf_reassemble6(&m, &pff, frent, mff);
	2503
	2504	if (m == NULL) {
	2505	return PF_DROP;
	2506	}
	2507
	2508	pbuf_init_mbuf(pbuf, m, ifp);
	2509	h = pbuf->pb_data;
	2510
	2511	if (pff != NULL && (pff->fr_flags & PFFRAG_DROP)) {
	2512	goto drop;
	2513	}
	2514	} else if (dir == PF_IN \|\|
	2515	!(pd->pf_mtag->pftag_flags & PF_TAG_FRAGCACHE)) {
	2516	/* non-buffering fragment cache (overlaps: see RFC 5722) */
	2517	int nomem = 0;
	2518
	2519	pff = pf_find_fragment_by_ipv6_header(h, &frag,
	2520	&pf_cache_tree);
	2521
	2522	/* Check if we saw the last fragment already */
	2523	if (pff != NULL && (pff->fr_flags & PFFRAG_SEENLAST) &&
	2524	fr_max > pff->fr_max) {
	2525	if (r->rule_flag & PFRULE_FRAGDROP) {
	2526	pff->fr_flags \|= PFFRAG_DROP;
	2527	}
	2528	goto badfrag;
	2529	}
	2530
	2531	if ((m = pbuf_to_mbuf(pbuf, TRUE)) == NULL) {
	2532	goto no_mem;
	2533	}
	2534
	2535	/* Restore iph pointer after pbuf_to_mbuf() */
	2536	h = mtod(m, struct ip6_hdr *);
	2537
	2538	m = pf_frag6cache(&m, h, &frag, &pff, off, mff,
	2539	(r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
	2540	if (m == NULL) {
	2541	// Note: pf_frag6cache() has already m_freem'd the mbuf
	2542	if (nomem) {
	2543	goto no_mem;
	2544	}
	2545	goto drop;
	2546	}
	2547
	2548	pbuf_init_mbuf(pbuf, m, ifp);
	2549	pd->pf_mtag = pf_find_mtag_pbuf(pbuf);
	2550	h = pbuf->pb_data;
	2551
	2552	if (dir == PF_IN) {
	2553	pd->pf_mtag->pftag_flags \|= PF_TAG_FRAGCACHE;
	2554	}
	2555
	2556	if (pff != NULL && (pff->fr_flags & PFFRAG_DROP)) {
	2557	goto drop;
	2558	}
	2559	}
	2560
	2561	/* Enforce a minimum ttl, may cause endless packet loops */
	2562	if (r->min_ttl && h->ip6_hlim < r->min_ttl) {
	2563	h->ip6_hlim = r->min_ttl;
	2564	}
	2565	return PF_PASS;
	2566
	2567	no_mem:
	2568	REASON_SET(reason, PFRES_MEMORY);
	2569	goto dropout;
	2570
	2571	shortpkt:
	2572	REASON_SET(reason, PFRES_SHORT);
	2573	goto dropout;
	2574
	2575	drop:
	2576	REASON_SET(reason, PFRES_NORM);
	2577	goto dropout;
	2578
	2579	badfrag:
	2580	DPFPRINTF(("dropping bad IPv6 fragment\n"));
	2581	REASON_SET(reason, PFRES_FRAG);
	2582	goto dropout;
	2583
	2584	dropout:
	2585	if (pff != NULL) {
	2586	pf_free_fragment(pff);
	2587	}
	2588	if (r != NULL && r->log && pbuf_is_valid(pbuf)) {
	2589	PFLOG_PACKET(kif, h, pbuf, AF_INET6, dir, *reason, r, NULL, NULL, pd);
	2590	}
	2591	return PF_DROP;
	2592	}
	2593
	2594	int
	2595	pf_normalize_tcp(int dir, struct pfi_kif kif, pbuf_t pbuf, int ipoff,
	2596	int off, void h, struct pf_pdesc pd)
	2597	{
	2598	#pragma unused(ipoff, h)
	2599	struct pf_rule r, rm = NULL;
	2600	struct tcphdr *th = pd->hdr.tcp;
	2601	int rewrite = 0;
	2602	int asd = 0;
	2603	u_short reason;
	2604	u_int8_t flags;
	2605	sa_family_t af = pd->af;
	2606	struct pf_ruleset *ruleset = NULL;
	2607	union pf_state_xport sxport, dxport;
	2608
	2609	sxport.port = th->th_sport;
	2610	dxport.port = th->th_dport;
	2611
	2612	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
	2613	while (r != NULL) {
	2614	r->evaluations++;
	2615	if (pfi_kif_match(r->kif, kif) == r->ifnot) {
	2616	r = r->skip[PF_SKIP_IFP].ptr;
	2617	} else if (r->direction && r->direction != dir) {
	2618	r = r->skip[PF_SKIP_DIR].ptr;
	2619	} else if (r->af && r->af != af) {
	2620	r = r->skip[PF_SKIP_AF].ptr;
	2621	} else if (r->proto && r->proto != pd->proto) {
	2622	r = r->skip[PF_SKIP_PROTO].ptr;
	2623	} else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
	2624	r->src.neg, kif)) {
	2625	r = r->skip[PF_SKIP_SRC_ADDR].ptr;
	2626	} else if (r->src.xport.range.op &&
	2627	!pf_match_xport(r->src.xport.range.op, r->proto_variant,
	2628	&r->src.xport, &sxport)) {
	2629	r = r->skip[PF_SKIP_SRC_PORT].ptr;
	2630	} else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
	2631	r->dst.neg, NULL)) {
	2632	r = r->skip[PF_SKIP_DST_ADDR].ptr;
	2633	} else if (r->dst.xport.range.op &&
	2634	!pf_match_xport(r->dst.xport.range.op, r->proto_variant,
	2635	&r->dst.xport, &dxport)) {
	2636	r = r->skip[PF_SKIP_DST_PORT].ptr;
	2637	} else if (r->os_fingerprint != PF_OSFP_ANY &&
	2638	!pf_osfp_match(pf_osfp_fingerprint(pd, pbuf, off, th),
	2639	r->os_fingerprint)) {
	2640	r = TAILQ_NEXT(r, entries);
	2641	} else {
	2642	if (r->anchor == NULL) {
	2643	rm = r;
	2644	break;
	2645	} else {
	2646	pf_step_into_anchor(&asd, &ruleset,
	2647	PF_RULESET_SCRUB, &r, NULL, NULL);
	2648	}
	2649	}
	2650	if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
	2651	PF_RULESET_SCRUB, &r, NULL, NULL)) {
	2652	break;
	2653	}
	2654	}
	2655
	2656	if (rm == NULL \|\| rm->action == PF_NOSCRUB) {
	2657	return PF_PASS;
	2658	} else {
	2659	r->packets[dir == PF_OUT]++;
	2660	r->bytes[dir == PF_OUT] += pd->tot_len;
	2661	}
	2662
	2663	if (rm->rule_flag & PFRULE_REASSEMBLE_TCP) {
	2664	pd->flags \|= PFDESC_TCP_NORM;
	2665	}
	2666
	2667	flags = th->th_flags;
	2668	if (flags & TH_SYN) {
	2669	/* Illegal packet */
	2670	if (flags & TH_RST) {
	2671	goto tcp_drop;
	2672	}
	2673
	2674	if (flags & TH_FIN) {
	2675	flags &= ~TH_FIN;
	2676	}
	2677	} else {
	2678	/* Illegal packet */
	2679	if (!(flags & (TH_ACK \| TH_RST))) {
	2680	goto tcp_drop;
	2681	}
	2682	}
	2683
	2684	if (!(flags & TH_ACK)) {
	2685	/* These flags are only valid if ACK is set */
	2686	if ((flags & TH_FIN) \|\| (flags & TH_PUSH) \|\| (flags & TH_URG)) {
	2687	goto tcp_drop;
	2688	}
	2689	}
	2690
	2691	/* Check for illegal header length */
	2692	if (th->th_off < (sizeof(struct tcphdr) >> 2)) {
	2693	goto tcp_drop;
	2694	}
	2695
	2696	/* If flags changed, or reserved data set, then adjust */
	2697	if (flags != th->th_flags \|\| th->th_x2 != 0) {
	2698	u_int16_t ov, nv;
	2699
	2700	ov = (u_int16_t )(&th->th_ack + 1);
	2701	th->th_flags = flags;
	2702	th->th_x2 = 0;
	2703	nv = (u_int16_t )(&th->th_ack + 1);
	2704
	2705	th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0);
	2706	rewrite = 1;
	2707	}
	2708
	2709	/* Remove urgent pointer, if TH_URG is not set */
	2710	if (!(flags & TH_URG) && th->th_urp) {
	2711	th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0);
	2712	th->th_urp = 0;
	2713	rewrite = 1;
	2714	}
	2715
	2716	/* copy back packet headers if we sanitized */
	2717	/* Process options */
	2718	if (r->max_mss) {
	2719	int rv = pf_normalize_tcpopt(r, dir, kif, pd, pbuf, th, off,
	2720	&rewrite);
	2721	if (rv == PF_DROP) {
	2722	return rv;
	2723	}
	2724	pbuf = pd->mp;
	2725	}
	2726
	2727	if (rewrite) {
	2728	if (pf_lazy_makewritable(pd, pbuf,
	2729	off + sizeof(*th)) == NULL) {
	2730	REASON_SET(&reason, PFRES_MEMORY);
	2731	if (r->log) {
	2732	PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, reason,
	2733	r, 0, 0, pd);
	2734	}
	2735	return PF_DROP;
	2736	}
	2737
	2738	pbuf_copy_back(pbuf, off, sizeof(*th), th);
	2739	}
	2740
	2741	return PF_PASS;
	2742
	2743	tcp_drop:
	2744	REASON_SET(&reason, PFRES_NORM);
	2745	if (rm != NULL && r->log) {
	2746	PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, reason, r, NULL, NULL, pd);
	2747	}
	2748	return PF_DROP;
	2749	}
	2750
	2751	int
	2752	pf_normalize_tcp_init(pbuf_t pbuf, int off, struct pf_pdesc pd,
	2753	struct tcphdr th, struct pf_state_peer src, struct pf_state_peer *dst)
	2754	{
	2755	#pragma unused(dst)
	2756	u_int32_t tsval, tsecr;
	2757	u_int8_t hdr[60];
	2758	u_int8_t *opt;
	2759
	2760	VERIFY(src->scrub == NULL);
	2761
	2762	src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
	2763	if (src->scrub == NULL) {
	2764	return 1;
	2765	}
	2766	bzero(src->scrub, sizeof(*src->scrub));
	2767
	2768	switch (pd->af) {
	2769	#if INET
	2770	case AF_INET: {
	2771	struct ip *h = pbuf->pb_data;
	2772	src->scrub->pfss_ttl = h->ip_ttl;
	2773	break;
	2774	}
	2775	#endif /* INET */
	2776	case AF_INET6: {
	2777	struct ip6_hdr *h = pbuf->pb_data;
	2778	src->scrub->pfss_ttl = h->ip6_hlim;
	2779	break;
	2780	}
	2781	}
	2782
	2783
	2784	/*
	2785	* All normalizations below are only begun if we see the start of
	2786	* the connections. They must all set an enabled bit in pfss_flags
	2787	*/
	2788	if ((th->th_flags & TH_SYN) == 0) {
	2789	return 0;
	2790	}
	2791
	2792
	2793	if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub &&
	2794	pf_pull_hdr(pbuf, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
	2795	/* Diddle with TCP options */
	2796	int hlen;
	2797	opt = hdr + sizeof(struct tcphdr);
	2798	hlen = (th->th_off << 2) - sizeof(struct tcphdr);
	2799	while (hlen >= TCPOLEN_TIMESTAMP) {
	2800	switch (*opt) {
	2801	case TCPOPT_EOL: /* FALLTHROUGH */
	2802	case TCPOPT_NOP:
	2803	opt++;
	2804	hlen--;
	2805	break;
	2806	case TCPOPT_TIMESTAMP:
	2807	if (opt[1] >= TCPOLEN_TIMESTAMP) {
	2808	src->scrub->pfss_flags \|=
	2809	PFSS_TIMESTAMP;
	2810	src->scrub->pfss_ts_mod =
	2811	htonl(random());
	2812
	2813	/* note PFSS_PAWS not set yet */
	2814	memcpy(&tsval, &opt[2],
	2815	sizeof(u_int32_t));
	2816	memcpy(&tsecr, &opt[6],
	2817	sizeof(u_int32_t));
	2818	src->scrub->pfss_tsval0 = ntohl(tsval);
	2819	src->scrub->pfss_tsval = ntohl(tsval);
	2820	src->scrub->pfss_tsecr = ntohl(tsecr);
	2821	getmicrouptime(&src->scrub->pfss_last);
	2822	}
	2823	OS_FALLTHROUGH;
	2824	default:
	2825	hlen -= MAX(opt[1], 2);
	2826	opt += MAX(opt[1], 2);
	2827	break;
	2828	}
	2829	}
	2830	}
	2831
	2832	return 0;
	2833	}
	2834
	2835	void
	2836	pf_normalize_tcp_cleanup(struct pf_state *state)
	2837	{
	2838	if (state->src.scrub) {
	2839	pool_put(&pf_state_scrub_pl, state->src.scrub);
	2840	}
	2841	if (state->dst.scrub) {
	2842	pool_put(&pf_state_scrub_pl, state->dst.scrub);
	2843	}
	2844
	2845	/* Someday... flush the TCP segment reassembly descriptors. */
	2846	}
	2847
	2848	int
	2849	pf_normalize_tcp_stateful(pbuf_t pbuf, int off, struct pf_pdesc pd,
	2850	u_short reason, struct tcphdr th, struct pf_state *state,
	2851	struct pf_state_peer src, struct pf_state_peer dst, int *writeback)
	2852	{
	2853	struct timeval uptime;
	2854	u_int32_t tsval = 0, tsecr = 0;
	2855	u_int tsval_from_last;
	2856	u_int8_t hdr[60];
	2857	u_int8_t *opt;
	2858	int copyback = 0;
	2859	int got_ts = 0;
	2860
	2861	VERIFY(src->scrub \|\| dst->scrub);
	2862
	2863	/*
	2864	* Enforce the minimum TTL seen for this connection. Negate a common
	2865	* technique to evade an intrusion detection system and confuse
	2866	* firewall state code.
	2867	*/
	2868	switch (pd->af) {
	2869	#if INET
	2870	case AF_INET: {
	2871	if (src->scrub) {
	2872	struct ip *h = pbuf->pb_data;
	2873	if (h->ip_ttl > src->scrub->pfss_ttl) {
	2874	src->scrub->pfss_ttl = h->ip_ttl;
	2875	}
	2876	h->ip_ttl = src->scrub->pfss_ttl;
	2877	}
	2878	break;
	2879	}
	2880	#endif /* INET */
	2881	case AF_INET6: {
	2882	if (src->scrub) {
	2883	struct ip6_hdr *h = pbuf->pb_data;
	2884	if (h->ip6_hlim > src->scrub->pfss_ttl) {
	2885	src->scrub->pfss_ttl = h->ip6_hlim;
	2886	}
	2887	h->ip6_hlim = src->scrub->pfss_ttl;
	2888	}
	2889	break;
	2890	}
	2891	}
	2892
	2893	if (th->th_off > (sizeof(struct tcphdr) >> 2) &&
	2894	((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) \|\|
	2895	(dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
	2896	pf_pull_hdr(pbuf, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
	2897	/* Diddle with TCP options */
	2898	int hlen;
	2899	opt = hdr + sizeof(struct tcphdr);
	2900	hlen = (th->th_off << 2) - sizeof(struct tcphdr);
	2901	while (hlen >= TCPOLEN_TIMESTAMP) {
	2902	switch (*opt) {
	2903	case TCPOPT_EOL: /* FALLTHROUGH */
	2904	case TCPOPT_NOP:
	2905	opt++;
	2906	hlen--;
	2907	break;
	2908	case TCPOPT_TIMESTAMP:
	2909	/*
	2910	* Modulate the timestamps. Can be used for
	2911	* NAT detection, OS uptime determination or
	2912	* reboot detection.
	2913	*/
	2914
	2915	if (got_ts) {
	2916	/* Huh? Multiple timestamps!? */
	2917	if (pf_status.debug >= PF_DEBUG_MISC) {
	2918	DPFPRINTF(("multiple TS??"));
	2919	pf_print_state(state);
	2920	printf("\n");
	2921	}
	2922	REASON_SET(reason, PFRES_TS);
	2923	return PF_DROP;
	2924	}
	2925	if (opt[1] >= TCPOLEN_TIMESTAMP) {
	2926	memcpy(&tsval, &opt[2],
	2927	sizeof(u_int32_t));
	2928	if (tsval && src->scrub &&
	2929	(src->scrub->pfss_flags &
	2930	PFSS_TIMESTAMP)) {
	2931	tsval = ntohl(tsval);
	2932	pf_change_a(&opt[2],
	2933	&th->th_sum,
	2934	htonl(tsval +
	2935	src->scrub->pfss_ts_mod),
	2936	0);
	2937	copyback = 1;
	2938	}
	2939
	2940	/* Modulate TS reply iff valid (!0) */
	2941	memcpy(&tsecr, &opt[6],
	2942	sizeof(u_int32_t));
	2943	if (tsecr && dst->scrub &&
	2944	(dst->scrub->pfss_flags &
	2945	PFSS_TIMESTAMP)) {
	2946	tsecr = ntohl(tsecr)
	2947	- dst->scrub->pfss_ts_mod;
	2948	pf_change_a(&opt[6],
	2949	&th->th_sum, htonl(tsecr),
	2950	0);
	2951	copyback = 1;
	2952	}
	2953	got_ts = 1;
	2954	}
	2955	OS_FALLTHROUGH;
	2956	default:
	2957	hlen -= MAX(opt[1], 2);
	2958	opt += MAX(opt[1], 2);
	2959	break;
	2960	}
	2961	}
	2962	if (copyback) {
	2963	/* Copyback the options, caller copys back header */
	2964	int optoff = off + sizeof(*th);
	2965	int optlen = (th->th_off << 2) - sizeof(*th);
	2966	if (pf_lazy_makewritable(pd, pbuf, optoff + optlen) ==
	2967	NULL) {
	2968	REASON_SET(reason, PFRES_MEMORY);
	2969	return PF_DROP;
	2970	}
	2971	*writeback = optoff + optlen;
	2972	pbuf_copy_back(pbuf, optoff, optlen, hdr + sizeof(*th));
	2973	}
	2974	}
	2975
	2976
	2977	/*
	2978	* Must invalidate PAWS checks on connections idle for too long.
	2979	* The fastest allowed timestamp clock is 1ms. That turns out to
	2980	* be about 24 days before it wraps. XXX Right now our lowerbound
	2981	* TS echo check only works for the first 12 days of a connection
	2982	* when the TS has exhausted half its 32bit space
	2983	*/
	2984	#define TS_MAX_IDLE (242460*60)
	2985	#define TS_MAX_CONN (12246060) / XXX remove when better tsecr check */
	2986
	2987	getmicrouptime(&uptime);
	2988	if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
	2989	(uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE \|\|
	2990	pf_time_second() - state->creation > TS_MAX_CONN)) {
	2991	if (pf_status.debug >= PF_DEBUG_MISC) {
	2992	DPFPRINTF(("src idled out of PAWS\n"));
	2993	pf_print_state(state);
	2994	printf("\n");
	2995	}
	2996	src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
	2997	\| PFSS_PAWS_IDLED;
	2998	}
	2999	if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
	3000	uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
	3001	if (pf_status.debug >= PF_DEBUG_MISC) {
	3002	DPFPRINTF(("dst idled out of PAWS\n"));
	3003	pf_print_state(state);
	3004	printf("\n");
	3005	}
	3006	dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
	3007	\| PFSS_PAWS_IDLED;
	3008	}
	3009
	3010	if (got_ts && src->scrub && dst->scrub &&
	3011	(src->scrub->pfss_flags & PFSS_PAWS) &&
	3012	(dst->scrub->pfss_flags & PFSS_PAWS)) {
	3013	/*
	3014	* Validate that the timestamps are "in-window".
	3015	* RFC1323 describes TCP Timestamp options that allow
	3016	* measurement of RTT (round trip time) and PAWS
	3017	* (protection against wrapped sequence numbers). PAWS
	3018	* gives us a set of rules for rejecting packets on
	3019	* long fat pipes (packets that were somehow delayed
	3020	* in transit longer than the time it took to send the
	3021	* full TCP sequence space of 4Gb). We can use these
	3022	* rules and infer a few others that will let us treat
	3023	* the 32bit timestamp and the 32bit echoed timestamp
	3024	* as sequence numbers to prevent a blind attacker from
	3025	* inserting packets into a connection.
	3026	*
	3027	* RFC1323 tells us:
	3028	* - The timestamp on this packet must be greater than
	3029	* or equal to the last value echoed by the other
	3030	* endpoint. The RFC says those will be discarded
	3031	* since it is a dup that has already been acked.
	3032	* This gives us a lowerbound on the timestamp.
	3033	* timestamp >= other last echoed timestamp
	3034	* - The timestamp will be less than or equal to
	3035	* the last timestamp plus the time between the
	3036	* last packet and now. The RFC defines the max
	3037	* clock rate as 1ms. We will allow clocks to be
	3038	* up to 10% fast and will allow a total difference
	3039	* or 30 seconds due to a route change. And this
	3040	* gives us an upperbound on the timestamp.
	3041	* timestamp <= last timestamp + max ticks
	3042	* We have to be careful here. Windows will send an
	3043	* initial timestamp of zero and then initialize it
	3044	* to a random value after the 3whs; presumably to
	3045	* avoid a DoS by having to call an expensive RNG
	3046	* during a SYN flood. Proof MS has at least one
	3047	* good security geek.
	3048	*
	3049	* - The TCP timestamp option must also echo the other
	3050	* endpoints timestamp. The timestamp echoed is the
	3051	* one carried on the earliest unacknowledged segment
	3052	* on the left edge of the sequence window. The RFC
	3053	* states that the host will reject any echoed
	3054	* timestamps that were larger than any ever sent.
	3055	* This gives us an upperbound on the TS echo.
	3056	* tescr <= largest_tsval
	3057	* - The lowerbound on the TS echo is a little more
	3058	* tricky to determine. The other endpoint's echoed
	3059	* values will not decrease. But there may be
	3060	* network conditions that re-order packets and
	3061	* cause our view of them to decrease. For now the
	3062	* only lowerbound we can safely determine is that
	3063	* the TS echo will never be less than the original
	3064	* TS. XXX There is probably a better lowerbound.
	3065	* Remove TS_MAX_CONN with better lowerbound check.
	3066	* tescr >= other original TS
	3067	*
	3068	* It is also important to note that the fastest
	3069	* timestamp clock of 1ms will wrap its 32bit space in
	3070	* 24 days. So we just disable TS checking after 24
	3071	* days of idle time. We actually must use a 12d
	3072	* connection limit until we can come up with a better
	3073	* lowerbound to the TS echo check.
	3074	*/
	3075	struct timeval delta_ts;
	3076	int ts_fudge;
	3077
	3078
	3079	/*
	3080	* PFTM_TS_DIFF is how many seconds of leeway to allow
	3081	* a host's timestamp. This can happen if the previous
	3082	* packet got delayed in transit for much longer than
	3083	* this packet.
	3084	*/
	3085	if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0) {
	3086	ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF];
	3087	}
	3088
	3089
	3090	/* Calculate max ticks since the last timestamp */
	3091	#define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */
	3092	#define TS_MICROSECS 1000000 /* microseconds per second */
	3093	timersub(&uptime, &src->scrub->pfss_last, &delta_ts);
	3094	tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
	3095	tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS / TS_MAXFREQ);
	3096
	3097
	3098	if ((src->state >= TCPS_ESTABLISHED &&
	3099	dst->state >= TCPS_ESTABLISHED) &&
	3100	(SEQ_LT(tsval, dst->scrub->pfss_tsecr) \|\|
	3101	SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) \|\|
	3102	(tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) \|\|
	3103	SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
	3104	/*
	3105	* Bad RFC1323 implementation or an insertion attack.
	3106	*
	3107	* - Solaris 2.6 and 2.7 are known to send another ACK
	3108	* after the FIN,FIN\|ACK,ACK closing that carries
	3109	* an old timestamp.
	3110	*/
	3111
	3112	DPFPRINTF(("Timestamp failed %c%c%c%c\n",
	3113	SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
	3114	SEQ_GT(tsval, src->scrub->pfss_tsval +
	3115	tsval_from_last) ? '1' : ' ',
	3116	SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
	3117	SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
	3118	DPFPRINTF((" tsval: %u tsecr: %u +ticks: %u "
	3119	"idle: %lus %ums\n",
	3120	tsval, tsecr, tsval_from_last, delta_ts.tv_sec,
	3121	delta_ts.tv_usec / 1000));
	3122	DPFPRINTF((" src->tsval: %u tsecr: %u\n",
	3123	src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
	3124	DPFPRINTF((" dst->tsval: %u tsecr: %u tsval0: %u\n",
	3125	dst->scrub->pfss_tsval, dst->scrub->pfss_tsecr,
	3126	dst->scrub->pfss_tsval0));
	3127	if (pf_status.debug >= PF_DEBUG_MISC) {
	3128	pf_print_state(state);
	3129	pf_print_flags(th->th_flags);
	3130	printf("\n");
	3131	}
	3132	REASON_SET(reason, PFRES_TS);
	3133	return PF_DROP;
	3134	}
	3135
	3136	/* XXX I'd really like to require tsecr but it's optional */
	3137	} else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
	3138	((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
	3139	\|\| pd->p_len > 0 \|\| (th->th_flags & TH_SYN)) &&
	3140	src->scrub && dst->scrub &&
	3141	(src->scrub->pfss_flags & PFSS_PAWS) &&
	3142	(dst->scrub->pfss_flags & PFSS_PAWS)) {
	3143	/*
	3144	* Didn't send a timestamp. Timestamps aren't really useful
	3145	* when:
	3146	* - connection opening or closing (often not even sent).
	3147	* but we must not let an attacker to put a FIN on a
	3148	* data packet to sneak it through our ESTABLISHED check.
	3149	* - on a TCP reset. RFC suggests not even looking at TS.
	3150	* - on an empty ACK. The TS will not be echoed so it will
	3151	* probably not help keep the RTT calculation in sync and
	3152	* there isn't as much danger when the sequence numbers
	3153	* got wrapped. So some stacks don't include TS on empty
	3154	* ACKs :-(
	3155	*
	3156	* To minimize the disruption to mostly RFC1323 conformant
	3157	* stacks, we will only require timestamps on data packets.
	3158	*
	3159	* And what do ya know, we cannot require timestamps on data
	3160	* packets. There appear to be devices that do legitimate
	3161	* TCP connection hijacking. There are HTTP devices that allow
	3162	* a 3whs (with timestamps) and then buffer the HTTP request.
	3163	* If the intermediate device has the HTTP response cache, it
	3164	* will spoof the response but not bother timestamping its
	3165	* packets. So we can look for the presence of a timestamp in
	3166	* the first data packet and if there, require it in all future
	3167	* packets.
	3168	*/
	3169
	3170	if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
	3171	/*
	3172	* Hey! Someone tried to sneak a packet in. Or the
	3173	* stack changed its RFC1323 behavior?!?!
	3174	*/
	3175	if (pf_status.debug >= PF_DEBUG_MISC) {
	3176	DPFPRINTF(("Did not receive expected RFC1323 "
	3177	"timestamp\n"));
	3178	pf_print_state(state);
	3179	pf_print_flags(th->th_flags);
	3180	printf("\n");
	3181	}
	3182	REASON_SET(reason, PFRES_TS);
	3183	return PF_DROP;
	3184	}
	3185	}
	3186
	3187
	3188	/*
	3189	* We will note if a host sends his data packets with or without
	3190	* timestamps. And require all data packets to contain a timestamp
	3191	* if the first does. PAWS implicitly requires that all data packets be
	3192	* timestamped. But I think there are middle-man devices that hijack
	3193	* TCP streams immediately after the 3whs and don't timestamp their
	3194	* packets (seen in a WWW accelerator or cache).
	3195	*/
	3196	if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
	3197	(PFSS_TIMESTAMP \| PFSS_DATA_TS \| PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
	3198	if (got_ts) {
	3199	src->scrub->pfss_flags \|= PFSS_DATA_TS;
	3200	} else {
	3201	src->scrub->pfss_flags \|= PFSS_DATA_NOTS;
	3202	if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
	3203	(dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
	3204	/* Don't warn if other host rejected RFC1323 */
	3205	DPFPRINTF(("Broken RFC1323 stack did not "
	3206	"timestamp data packet. Disabled PAWS "
	3207	"security.\n"));
	3208	pf_print_state(state);
	3209	pf_print_flags(th->th_flags);
	3210	printf("\n");
	3211	}
	3212	}
	3213	}
	3214
	3215
	3216	/*
	3217	* Update PAWS values
	3218	*/
	3219	if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
	3220	(PFSS_PAWS_IDLED \| PFSS_TIMESTAMP))) {
	3221	getmicrouptime(&src->scrub->pfss_last);
	3222	if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) \|\|
	3223	(src->scrub->pfss_flags & PFSS_PAWS) == 0) {
	3224	src->scrub->pfss_tsval = tsval;
	3225	}
	3226
	3227	if (tsecr) {
	3228	if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) \|\|
	3229	(src->scrub->pfss_flags & PFSS_PAWS) == 0) {
	3230	src->scrub->pfss_tsecr = tsecr;
	3231	}
	3232
	3233	if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
	3234	(SEQ_LT(tsval, src->scrub->pfss_tsval0) \|\|
	3235	src->scrub->pfss_tsval0 == 0)) {
	3236	/* tsval0 MUST be the lowest timestamp */
	3237	src->scrub->pfss_tsval0 = tsval;
	3238	}
	3239
	3240	/* Only fully initialized after a TS gets echoed */
	3241	if ((src->scrub->pfss_flags & PFSS_PAWS) == 0) {
	3242	src->scrub->pfss_flags \|= PFSS_PAWS;
	3243	}
	3244	}
	3245	}
	3246
	3247	/* I have a dream.... TCP segment reassembly.... */
	3248	return 0;
	3249	}
	3250
	3251	static int
	3252	pf_normalize_tcpopt(struct pf_rule r, int dir, struct pfi_kif kif,
	3253	struct pf_pdesc pd, pbuf_t pbuf, struct tcphdr *th, int off,
	3254	int *rewrptr)
	3255	{
	3256	#pragma unused(dir, kif)
	3257	sa_family_t af = pd->af;
	3258	u_int16_t *mss;
	3259	int thoff;
	3260	int opt, cnt, optlen = 0;
	3261	int rewrite = 0;
	3262	u_char opts[MAX_TCPOPTLEN];
	3263	u_char *optp = opts;
	3264
	3265	thoff = th->th_off << 2;
	3266	cnt = thoff - sizeof(struct tcphdr);
	3267
	3268	if (cnt > 0 && !pf_pull_hdr(pbuf, off + sizeof(*th), opts, cnt,
	3269	NULL, NULL, af)) {
	3270	return PF_DROP;
	3271	}
	3272
	3273	for (; cnt > 0; cnt -= optlen, optp += optlen) {
	3274	opt = optp[0];
	3275	if (opt == TCPOPT_EOL) {
	3276	break;
	3277	}
	3278	if (opt == TCPOPT_NOP) {
	3279	optlen = 1;
	3280	} else {
	3281	if (cnt < 2) {
	3282	break;
	3283	}
	3284	optlen = optp[1];
	3285	if (optlen < 2 \|\| optlen > cnt) {
	3286	break;
	3287	}
	3288	}
	3289	switch (opt) {
	3290	case TCPOPT_MAXSEG:
	3291	mss = (u_int16_t )(void )(optp + 2);
	3292	if ((ntohs(*mss)) > r->max_mss) {
	3293	/*
	3294	* <jhw@apple.com>
	3295	* Only do the TCP checksum fixup if delayed
	3296	* checksum calculation will not be performed.
	3297	*/
	3298	if (pbuf->pb_ifp \|\|
	3299	!(*pbuf->pb_csum_flags & CSUM_TCP)) {
	3300	th->th_sum = pf_cksum_fixup(th->th_sum,
	3301	*mss, htons(r->max_mss), 0);
	3302	}
	3303	*mss = htons(r->max_mss);
	3304	rewrite = 1;
	3305	}
	3306	break;
	3307	default:
	3308	break;
	3309	}
	3310	}
	3311
	3312	if (rewrite) {
	3313	u_short reason;
	3314
	3315	VERIFY(pbuf == pd->mp);
	3316
	3317	if (pf_lazy_makewritable(pd, pd->mp,
	3318	off + sizeof(*th) + thoff) == NULL) {
	3319	REASON_SET(&reason, PFRES_MEMORY);
	3320	if (r->log) {
	3321	PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, reason,
	3322	r, 0, 0, pd);
	3323	}
	3324	return PF_DROP;
	3325	}
	3326
	3327	*rewrptr = 1;
	3328	pbuf_copy_back(pd->mp, off + sizeof(th), thoff - sizeof(th), opts);
	3329	}
	3330
	3331	return PF_PASS;
	3332	}