]>
git.saurik.com Git - apple/xnu.git/blob - bsd/net/pf_norm.c
2 * Copyright (c) 2007-2016 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 /* $apfw: pf_norm.c,v 1.10 2008/08/28 19:10:53 jhw Exp $ */
30 /* $OpenBSD: pf_norm.c,v 1.107 2006/04/16 00:59:52 pascoe Exp $ */
33 * Copyright 2001 Niels Provos <provos@citi.umich.edu>
34 * All rights reserved.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
45 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
46 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
47 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
48 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
49 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
50 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
51 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
52 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
53 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
54 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
57 #include <sys/param.h>
58 #include <sys/systm.h>
60 #include <sys/filio.h>
61 #include <sys/fcntl.h>
62 #include <sys/socket.h>
63 #include <sys/kernel.h>
65 #include <sys/random.h>
66 #include <sys/mcache.h>
69 #include <net/if_types.h>
71 #include <net/route.h>
72 #include <net/if_pflog.h>
74 #include <netinet/in.h>
75 #include <netinet/in_var.h>
76 #include <netinet/in_systm.h>
77 #include <netinet/ip.h>
78 #include <netinet/ip_var.h>
79 #include <netinet/tcp.h>
80 #include <netinet/tcp_seq.h>
81 #include <netinet/tcp_fsm.h>
82 #include <netinet/udp.h>
83 #include <netinet/ip_icmp.h>
86 #include <netinet/ip6.h>
89 #include <net/pfvar.h>
92 LIST_ENTRY(pf_frent
) fr_next
;
94 #define fr_ip fr_u.fru_ipv4
95 #define fr_ip6 fr_u.fru_ipv6
98 struct ip6_hdr
*fru_ipv6
;
100 struct ip6_frag fr_ip6f_opt
;
105 LIST_ENTRY(pf_frcache
) fr_next
;
110 #define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */
111 #define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */
112 #define PFFRAG_DROP 0x0004 /* Drop all fragments */
113 #define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER))
116 RB_ENTRY(pf_fragment
) fr_entry
;
117 TAILQ_ENTRY(pf_fragment
) frag_next
;
118 struct pf_addr fr_srcx
;
119 struct pf_addr fr_dstx
;
120 u_int8_t fr_p
; /* protocol of this fragment */
121 u_int8_t fr_flags
; /* status flags */
122 u_int16_t fr_max
; /* fragment data max */
123 #define fr_id fr_uid.fru_id4
124 #define fr_id6 fr_uid.fru_id6
130 u_int32_t fr_timeout
;
131 #define fr_queue fr_u.fru_queue
132 #define fr_cache fr_u.fru_cache
134 LIST_HEAD(pf_fragq
, pf_frent
) fru_queue
; /* buffering */
135 LIST_HEAD(pf_cacheq
, pf_frcache
) fru_cache
; /* non-buf */
137 uint32_t fr_csum_flags
; /* checksum flags */
138 uint32_t fr_csum
; /* partial checksum value */
141 static TAILQ_HEAD(pf_fragqueue
, pf_fragment
) pf_fragqueue
;
142 static TAILQ_HEAD(pf_cachequeue
, pf_fragment
) pf_cachequeue
;
144 static __inline
int pf_frag_compare(struct pf_fragment
*,
145 struct pf_fragment
*);
146 static RB_HEAD(pf_frag_tree
, pf_fragment
) pf_frag_tree
, pf_cache_tree
;
147 RB_PROTOTYPE_SC(__private_extern__
, pf_frag_tree
, pf_fragment
, fr_entry
,
149 RB_GENERATE(pf_frag_tree
, pf_fragment
, fr_entry
, pf_frag_compare
);
151 /* Private prototypes */
152 static void pf_ip6hdr2key(struct pf_fragment
*, struct ip6_hdr
*,
154 static void pf_ip2key(struct pf_fragment
*, struct ip
*);
155 static void pf_remove_fragment(struct pf_fragment
*);
156 static void pf_flush_fragments(void);
157 static void pf_free_fragment(struct pf_fragment
*);
158 static struct pf_fragment
*pf_find_fragment_by_key(struct pf_fragment
*,
159 struct pf_frag_tree
*);
160 static __inline
struct pf_fragment
*
161 pf_find_fragment_by_ipv4_header(struct ip
*, struct pf_frag_tree
*);
162 static __inline
struct pf_fragment
*
163 pf_find_fragment_by_ipv6_header(struct ip6_hdr
*, struct ip6_frag
*,
164 struct pf_frag_tree
*);
165 static struct mbuf
*pf_reassemble(struct mbuf
*, struct pf_fragment
**,
166 struct pf_frent
*, int);
167 static struct mbuf
*pf_fragcache(struct mbuf
**, struct ip
*,
168 struct pf_fragment
**, int, int, int *);
169 static struct mbuf
*pf_reassemble6(struct mbuf
**, struct pf_fragment
**,
170 struct pf_frent
*, int);
171 static struct mbuf
*pf_frag6cache(struct mbuf
**, struct ip6_hdr
*,
172 struct ip6_frag
*, struct pf_fragment
**, int, int, int, int *);
173 static int pf_normalize_tcpopt(struct pf_rule
*, int, struct pfi_kif
*,
174 struct pf_pdesc
*, pbuf_t
*, struct tcphdr
*, int, int *);
176 #define DPFPRINTF(x) do { \
177 if (pf_status.debug >= PF_DEBUG_MISC) { \
178 printf("%s: ", __func__); \
184 struct pool pf_frent_pl
, pf_frag_pl
;
185 static struct pool pf_cache_pl
, pf_cent_pl
;
186 struct pool pf_state_scrub_pl
;
188 static int pf_nfrents
, pf_ncache
;
191 pf_normalize_init(void)
193 pool_init(&pf_frent_pl
, sizeof (struct pf_frent
), 0, 0, 0, "pffrent",
195 pool_init(&pf_frag_pl
, sizeof (struct pf_fragment
), 0, 0, 0, "pffrag",
197 pool_init(&pf_cache_pl
, sizeof (struct pf_fragment
), 0, 0, 0,
199 pool_init(&pf_cent_pl
, sizeof (struct pf_frcache
), 0, 0, 0, "pffrcent",
201 pool_init(&pf_state_scrub_pl
, sizeof (struct pf_state_scrub
), 0, 0, 0,
204 pool_sethiwat(&pf_frag_pl
, PFFRAG_FRAG_HIWAT
);
205 pool_sethardlimit(&pf_frent_pl
, PFFRAG_FRENT_HIWAT
, NULL
, 0);
206 pool_sethardlimit(&pf_cache_pl
, PFFRAG_FRCACHE_HIWAT
, NULL
, 0);
207 pool_sethardlimit(&pf_cent_pl
, PFFRAG_FRCENT_HIWAT
, NULL
, 0);
209 TAILQ_INIT(&pf_fragqueue
);
210 TAILQ_INIT(&pf_cachequeue
);
215 pf_normalize_destroy(void)
217 pool_destroy(&pf_state_scrub_pl
);
218 pool_destroy(&pf_cent_pl
);
219 pool_destroy(&pf_cache_pl
);
220 pool_destroy(&pf_frag_pl
);
221 pool_destroy(&pf_frent_pl
);
226 pf_normalize_isempty(void)
228 return (TAILQ_EMPTY(&pf_fragqueue
) && TAILQ_EMPTY(&pf_cachequeue
));
232 pf_frag_compare(struct pf_fragment
*a
, struct pf_fragment
*b
)
236 if ((diff
= a
->fr_af
- b
->fr_af
))
238 else if ((diff
= a
->fr_p
- b
->fr_p
))
241 struct pf_addr
*sa
= &a
->fr_srcx
;
242 struct pf_addr
*sb
= &b
->fr_srcx
;
243 struct pf_addr
*da
= &a
->fr_dstx
;
244 struct pf_addr
*db
= &b
->fr_dstx
;
249 if ((diff
= a
->fr_id
- b
->fr_id
))
251 else if (sa
->v4addr
.s_addr
< sb
->v4addr
.s_addr
)
253 else if (sa
->v4addr
.s_addr
> sb
->v4addr
.s_addr
)
255 else if (da
->v4addr
.s_addr
< db
->v4addr
.s_addr
)
257 else if (da
->v4addr
.s_addr
> db
->v4addr
.s_addr
)
263 if ((diff
= a
->fr_id6
- b
->fr_id6
))
265 else if (sa
->addr32
[3] < sb
->addr32
[3])
267 else if (sa
->addr32
[3] > sb
->addr32
[3])
269 else if (sa
->addr32
[2] < sb
->addr32
[2])
271 else if (sa
->addr32
[2] > sb
->addr32
[2])
273 else if (sa
->addr32
[1] < sb
->addr32
[1])
275 else if (sa
->addr32
[1] > sb
->addr32
[1])
277 else if (sa
->addr32
[0] < sb
->addr32
[0])
279 else if (sa
->addr32
[0] > sb
->addr32
[0])
281 else if (da
->addr32
[3] < db
->addr32
[3])
283 else if (da
->addr32
[3] > db
->addr32
[3])
285 else if (da
->addr32
[2] < db
->addr32
[2])
287 else if (da
->addr32
[2] > db
->addr32
[2])
289 else if (da
->addr32
[1] < db
->addr32
[1])
291 else if (da
->addr32
[1] > db
->addr32
[1])
293 else if (da
->addr32
[0] < db
->addr32
[0])
295 else if (da
->addr32
[0] > db
->addr32
[0])
300 VERIFY(!0 && "only IPv4 and IPv6 supported!");
308 pf_purge_expired_fragments(void)
310 struct pf_fragment
*frag
;
311 u_int32_t expire
= pf_time_second() -
312 pf_default_rule
.timeout
[PFTM_FRAG
];
314 while ((frag
= TAILQ_LAST(&pf_fragqueue
, pf_fragqueue
)) != NULL
) {
315 VERIFY(BUFFER_FRAGMENTS(frag
));
316 if (frag
->fr_timeout
> expire
)
319 switch (frag
->fr_af
) {
321 DPFPRINTF(("expiring IPv4 %d(0x%llx) from queue.\n",
323 (uint64_t)VM_KERNEL_ADDRPERM(frag
)));
326 DPFPRINTF(("expiring IPv6 %d(0x%llx) from queue.\n",
328 (uint64_t)VM_KERNEL_ADDRPERM(frag
)));
331 VERIFY(0 && "only IPv4 and IPv6 supported");
334 pf_free_fragment(frag
);
337 while ((frag
= TAILQ_LAST(&pf_cachequeue
, pf_cachequeue
)) != NULL
) {
338 VERIFY(!BUFFER_FRAGMENTS(frag
));
339 if (frag
->fr_timeout
> expire
)
342 switch (frag
->fr_af
) {
344 DPFPRINTF(("expiring IPv4 %d(0x%llx) from cache.\n",
346 (uint64_t)VM_KERNEL_ADDRPERM(frag
)));
349 DPFPRINTF(("expiring IPv6 %d(0x%llx) from cache.\n",
351 (uint64_t)VM_KERNEL_ADDRPERM(frag
)));
354 VERIFY(0 && "only IPv4 and IPv6 supported");
357 pf_free_fragment(frag
);
358 VERIFY(TAILQ_EMPTY(&pf_cachequeue
) ||
359 TAILQ_LAST(&pf_cachequeue
, pf_cachequeue
) != frag
);
364 * Try to flush old fragments to make space for new ones
368 pf_flush_fragments(void)
370 struct pf_fragment
*frag
;
373 goal
= pf_nfrents
* 9 / 10;
374 DPFPRINTF(("trying to free > %d frents\n",
376 while (goal
< pf_nfrents
) {
377 frag
= TAILQ_LAST(&pf_fragqueue
, pf_fragqueue
);
380 pf_free_fragment(frag
);
384 goal
= pf_ncache
* 9 / 10;
385 DPFPRINTF(("trying to free > %d cache entries\n",
387 while (goal
< pf_ncache
) {
388 frag
= TAILQ_LAST(&pf_cachequeue
, pf_cachequeue
);
391 pf_free_fragment(frag
);
395 /* Frees the fragments and all associated entries */
398 pf_free_fragment(struct pf_fragment
*frag
)
400 struct pf_frent
*frent
;
401 struct pf_frcache
*frcache
;
403 /* Free all fragments */
404 if (BUFFER_FRAGMENTS(frag
)) {
405 for (frent
= LIST_FIRST(&frag
->fr_queue
); frent
;
406 frent
= LIST_FIRST(&frag
->fr_queue
)) {
407 LIST_REMOVE(frent
, fr_next
);
409 m_freem(frent
->fr_m
);
410 pool_put(&pf_frent_pl
, frent
);
414 for (frcache
= LIST_FIRST(&frag
->fr_cache
); frcache
;
415 frcache
= LIST_FIRST(&frag
->fr_cache
)) {
416 LIST_REMOVE(frcache
, fr_next
);
418 VERIFY(LIST_EMPTY(&frag
->fr_cache
) ||
419 LIST_FIRST(&frag
->fr_cache
)->fr_off
>
422 pool_put(&pf_cent_pl
, frcache
);
427 pf_remove_fragment(frag
);
431 pf_ip6hdr2key(struct pf_fragment
*key
, struct ip6_hdr
*ip6
,
434 key
->fr_p
= fh
->ip6f_nxt
;
435 key
->fr_id6
= fh
->ip6f_ident
;
436 key
->fr_af
= AF_INET6
;
437 key
->fr_srcx
.v6addr
= ip6
->ip6_src
;
438 key
->fr_dstx
.v6addr
= ip6
->ip6_dst
;
442 pf_ip2key(struct pf_fragment
*key
, struct ip
*ip
)
444 key
->fr_p
= ip
->ip_p
;
445 key
->fr_id
= ip
->ip_id
;
446 key
->fr_af
= AF_INET
;
447 key
->fr_srcx
.v4addr
.s_addr
= ip
->ip_src
.s_addr
;
448 key
->fr_dstx
.v4addr
.s_addr
= ip
->ip_dst
.s_addr
;
451 static struct pf_fragment
*
452 pf_find_fragment_by_key(struct pf_fragment
*key
, struct pf_frag_tree
*tree
)
454 struct pf_fragment
*frag
;
456 frag
= RB_FIND(pf_frag_tree
, tree
, key
);
458 /* XXX Are we sure we want to update the timeout? */
459 frag
->fr_timeout
= pf_time_second();
460 if (BUFFER_FRAGMENTS(frag
)) {
461 TAILQ_REMOVE(&pf_fragqueue
, frag
, frag_next
);
462 TAILQ_INSERT_HEAD(&pf_fragqueue
, frag
, frag_next
);
464 TAILQ_REMOVE(&pf_cachequeue
, frag
, frag_next
);
465 TAILQ_INSERT_HEAD(&pf_cachequeue
, frag
, frag_next
);
472 static __inline
struct pf_fragment
*
473 pf_find_fragment_by_ipv4_header(struct ip
*ip
, struct pf_frag_tree
*tree
)
475 struct pf_fragment key
;
477 return pf_find_fragment_by_key(&key
, tree
);
480 static __inline
struct pf_fragment
*
481 pf_find_fragment_by_ipv6_header(struct ip6_hdr
*ip6
, struct ip6_frag
*fh
,
482 struct pf_frag_tree
*tree
)
484 struct pf_fragment key
;
485 pf_ip6hdr2key(&key
, ip6
, fh
);
486 return pf_find_fragment_by_key(&key
, tree
);
489 /* Removes a fragment from the fragment queue and frees the fragment */
492 pf_remove_fragment(struct pf_fragment
*frag
)
494 if (BUFFER_FRAGMENTS(frag
)) {
495 RB_REMOVE(pf_frag_tree
, &pf_frag_tree
, frag
);
496 TAILQ_REMOVE(&pf_fragqueue
, frag
, frag_next
);
497 pool_put(&pf_frag_pl
, frag
);
499 RB_REMOVE(pf_frag_tree
, &pf_cache_tree
, frag
);
500 TAILQ_REMOVE(&pf_cachequeue
, frag
, frag_next
);
501 pool_put(&pf_cache_pl
, frag
);
505 #define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
507 pf_reassemble(struct mbuf
*m0
, struct pf_fragment
**frag
,
508 struct pf_frent
*frent
, int mff
)
510 struct mbuf
*m
= m0
, *m2
;
511 struct pf_frent
*frea
, *next
;
512 struct pf_frent
*frep
= NULL
;
513 struct ip
*ip
= frent
->fr_ip
;
514 uint32_t hlen
= ip
->ip_hl
<< 2;
515 u_int16_t off
= (ntohs(ip
->ip_off
) & IP_OFFMASK
) << 3;
516 u_int16_t ip_len
= ntohs(ip
->ip_len
) - ip
->ip_hl
* 4;
517 u_int16_t fr_max
= ip_len
+ off
;
518 uint32_t csum
, csum_flags
;
520 VERIFY(*frag
== NULL
|| BUFFER_FRAGMENTS(*frag
));
523 * Leverage partial checksum offload for IP fragments. Narrow down
524 * the scope to cover only UDP without IP options, as that is the
527 * Perform 1's complement adjustment of octets that got included/
528 * excluded in the hardware-calculated checksum value. Ignore cases
529 * where the value includes the entire IPv4 header span, as the sum
530 * for those octets would already be 0 by the time we get here; IP
531 * has already performed its header checksum validation. Also take
532 * care of any trailing bytes and subtract out their partial sum.
534 if (ip
->ip_p
== IPPROTO_UDP
&& hlen
== sizeof (struct ip
) &&
535 (m
->m_pkthdr
.csum_flags
&
536 (CSUM_DATA_VALID
| CSUM_PARTIAL
| CSUM_PSEUDO_HDR
)) ==
537 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
538 uint32_t start
= m
->m_pkthdr
.csum_rx_start
;
539 int32_t trailer
= (m_pktlen(m
) - ntohs(ip
->ip_len
));
540 uint32_t swbytes
= (uint32_t)trailer
;
542 csum
= m
->m_pkthdr
.csum_rx_val
;
544 ASSERT(trailer
>= 0);
545 if ((start
!= 0 && start
!= hlen
) || trailer
!= 0) {
546 #if BYTE_ORDER != BIG_ENDIAN
551 #endif /* BYTE_ORDER != BIG_ENDIAN */
552 /* callee folds in sum */
553 csum
= m_adj_sum16(m
, start
, hlen
,
554 (ip
->ip_len
- hlen
), csum
);
556 swbytes
+= (hlen
- start
);
558 swbytes
+= (start
- hlen
);
559 #if BYTE_ORDER != BIG_ENDIAN
564 #endif /* BYTE_ORDER != BIG_ENDIAN */
566 csum_flags
= m
->m_pkthdr
.csum_flags
;
569 udp_in_cksum_stats(swbytes
);
577 /* Invalidate checksum */
578 m
->m_pkthdr
.csum_flags
&= ~CSUM_DATA_VALID
;
580 /* Strip off ip header */
584 /* Create a new reassembly queue for this packet */
586 *frag
= pool_get(&pf_frag_pl
, PR_NOWAIT
);
588 pf_flush_fragments();
589 *frag
= pool_get(&pf_frag_pl
, PR_NOWAIT
);
594 (*frag
)->fr_flags
= 0;
596 (*frag
)->fr_af
= AF_INET
;
597 (*frag
)->fr_srcx
.v4addr
= frent
->fr_ip
->ip_src
;
598 (*frag
)->fr_dstx
.v4addr
= frent
->fr_ip
->ip_dst
;
599 (*frag
)->fr_p
= frent
->fr_ip
->ip_p
;
600 (*frag
)->fr_id
= frent
->fr_ip
->ip_id
;
601 (*frag
)->fr_timeout
= pf_time_second();
602 if (csum_flags
!= 0) {
603 (*frag
)->fr_csum_flags
= csum_flags
;
604 (*frag
)->fr_csum
= csum
;
606 LIST_INIT(&(*frag
)->fr_queue
);
608 RB_INSERT(pf_frag_tree
, &pf_frag_tree
, *frag
);
609 TAILQ_INSERT_HEAD(&pf_fragqueue
, *frag
, frag_next
);
611 /* We do not have a previous fragment */
617 * If this fragment contains similar checksum offload info
618 * as that of the existing ones, accumulate checksum. Otherwise,
619 * invalidate checksum offload info for the entire datagram.
621 if (csum_flags
!= 0 && csum_flags
== (*frag
)->fr_csum_flags
)
622 (*frag
)->fr_csum
+= csum
;
623 else if ((*frag
)->fr_csum_flags
!= 0)
624 (*frag
)->fr_csum_flags
= 0;
627 * Find a fragment after the current one:
628 * - off contains the real shifted offset.
630 LIST_FOREACH(frea
, &(*frag
)->fr_queue
, fr_next
) {
631 if (FR_IP_OFF(frea
) > off
)
636 VERIFY(frep
!= NULL
|| frea
!= NULL
);
639 FR_IP_OFF(frep
) + ntohs(frep
->fr_ip
->ip_len
) - frep
->fr_ip
->ip_hl
*
643 precut
= FR_IP_OFF(frep
) + ntohs(frep
->fr_ip
->ip_len
) -
644 frep
->fr_ip
->ip_hl
* 4 - off
;
645 if (precut
>= ip_len
)
647 m_adj(frent
->fr_m
, precut
);
648 DPFPRINTF(("overlap -%d\n", precut
));
649 /* Enforce 8 byte boundaries */
650 ip
->ip_off
= htons(ntohs(ip
->ip_off
) + (precut
>> 3));
651 off
= (ntohs(ip
->ip_off
) & IP_OFFMASK
) << 3;
653 ip
->ip_len
= htons(ip_len
);
656 for (; frea
!= NULL
&& ip_len
+ off
> FR_IP_OFF(frea
);
660 aftercut
= ip_len
+ off
- FR_IP_OFF(frea
);
661 DPFPRINTF(("adjust overlap %d\n", aftercut
));
662 if (aftercut
< ntohs(frea
->fr_ip
->ip_len
) - frea
->fr_ip
->ip_hl
664 frea
->fr_ip
->ip_len
=
665 htons(ntohs(frea
->fr_ip
->ip_len
) - aftercut
);
666 frea
->fr_ip
->ip_off
= htons(ntohs(frea
->fr_ip
->ip_off
) +
668 m_adj(frea
->fr_m
, aftercut
);
672 /* This fragment is completely overlapped, lose it */
673 next
= LIST_NEXT(frea
, fr_next
);
675 LIST_REMOVE(frea
, fr_next
);
676 pool_put(&pf_frent_pl
, frea
);
681 /* Update maximum data size */
682 if ((*frag
)->fr_max
< fr_max
)
683 (*frag
)->fr_max
= fr_max
;
684 /* This is the last segment */
686 (*frag
)->fr_flags
|= PFFRAG_SEENLAST
;
689 LIST_INSERT_HEAD(&(*frag
)->fr_queue
, frent
, fr_next
);
691 LIST_INSERT_AFTER(frep
, frent
, fr_next
);
693 /* Check if we are completely reassembled */
694 if (!((*frag
)->fr_flags
& PFFRAG_SEENLAST
))
697 /* Check if we have all the data */
699 for (frep
= LIST_FIRST(&(*frag
)->fr_queue
); frep
; frep
= next
) {
700 next
= LIST_NEXT(frep
, fr_next
);
702 off
+= ntohs(frep
->fr_ip
->ip_len
) - frep
->fr_ip
->ip_hl
* 4;
703 if (off
< (*frag
)->fr_max
&&
704 (next
== NULL
|| FR_IP_OFF(next
) != off
)) {
705 DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
706 off
, next
== NULL
? -1 : FR_IP_OFF(next
),
711 DPFPRINTF(("%d < %d?\n", off
, (*frag
)->fr_max
));
712 if (off
< (*frag
)->fr_max
)
715 /* We have all the data */
716 frent
= LIST_FIRST(&(*frag
)->fr_queue
);
717 VERIFY(frent
!= NULL
);
718 if ((frent
->fr_ip
->ip_hl
<< 2) + off
> IP_MAXPACKET
) {
719 DPFPRINTF(("drop: too big: %d\n", off
));
720 pf_free_fragment(*frag
);
724 next
= LIST_NEXT(frent
, fr_next
);
726 /* Magic from ip_input */
732 pool_put(&pf_frent_pl
, frent
);
734 for (frent
= next
; frent
!= NULL
; frent
= next
) {
735 next
= LIST_NEXT(frent
, fr_next
);
738 pool_put(&pf_frent_pl
, frent
);
743 ip
->ip_src
= (*frag
)->fr_srcx
.v4addr
;
744 ip
->ip_dst
= (*frag
)->fr_dstx
.v4addr
;
746 if ((*frag
)->fr_csum_flags
!= 0) {
747 csum
= (*frag
)->fr_csum
;
751 m
->m_pkthdr
.csum_rx_val
= csum
;
752 m
->m_pkthdr
.csum_rx_start
= sizeof (struct ip
);
753 m
->m_pkthdr
.csum_flags
= (*frag
)->fr_csum_flags
;
754 } else if ((m
->m_pkthdr
.rcvif
->if_flags
& IFF_LOOPBACK
) ||
755 (m
->m_pkthdr
.pkt_flags
& PKTF_LOOP
)) {
756 /* loopback checksums are always OK */
757 m
->m_pkthdr
.csum_data
= 0xffff;
758 m
->m_pkthdr
.csum_flags
&= ~CSUM_PARTIAL
;
759 m
->m_pkthdr
.csum_flags
=
760 CSUM_DATA_VALID
| CSUM_PSEUDO_HDR
|
761 CSUM_IP_CHECKED
| CSUM_IP_VALID
;
764 /* Remove from fragment queue */
765 pf_remove_fragment(*frag
);
768 hlen
= ip
->ip_hl
<< 2;
769 ip
->ip_len
= htons(off
+ hlen
);
773 /* some debugging cruft by sklower, below, will go away soon */
774 /* XXX this should be done elsewhere */
775 if (m
->m_flags
& M_PKTHDR
) {
777 for (m2
= m
; m2
; m2
= m2
->m_next
)
779 m
->m_pkthdr
.len
= plen
;
782 DPFPRINTF(("complete: 0x%llx(%d)\n",
783 (uint64_t)VM_KERNEL_ADDRPERM(m
), ntohs(ip
->ip_len
)));
787 /* Oops - fail safe - drop packet */
788 pool_put(&pf_frent_pl
, frent
);
795 pf_fragcache(struct mbuf
**m0
, struct ip
*h
, struct pf_fragment
**frag
, int mff
,
796 int drop
, int *nomem
)
798 struct mbuf
*m
= *m0
;
799 struct pf_frcache
*frp
, *fra
, *cur
= NULL
;
800 int ip_len
= ntohs(h
->ip_len
) - (h
->ip_hl
<< 2);
801 u_int16_t off
= ntohs(h
->ip_off
) << 3;
802 u_int16_t fr_max
= ip_len
+ off
;
805 VERIFY(*frag
== NULL
|| !BUFFER_FRAGMENTS(*frag
));
807 /* Create a new range queue for this packet */
809 *frag
= pool_get(&pf_cache_pl
, PR_NOWAIT
);
811 pf_flush_fragments();
812 *frag
= pool_get(&pf_cache_pl
, PR_NOWAIT
);
817 /* Get an entry for the queue */
818 cur
= pool_get(&pf_cent_pl
, PR_NOWAIT
);
820 pool_put(&pf_cache_pl
, *frag
);
826 (*frag
)->fr_flags
= PFFRAG_NOBUFFER
;
828 (*frag
)->fr_af
= AF_INET
;
829 (*frag
)->fr_srcx
.v4addr
= h
->ip_src
;
830 (*frag
)->fr_dstx
.v4addr
= h
->ip_dst
;
831 (*frag
)->fr_p
= h
->ip_p
;
832 (*frag
)->fr_id
= h
->ip_id
;
833 (*frag
)->fr_timeout
= pf_time_second();
836 cur
->fr_end
= fr_max
;
837 LIST_INIT(&(*frag
)->fr_cache
);
838 LIST_INSERT_HEAD(&(*frag
)->fr_cache
, cur
, fr_next
);
840 RB_INSERT(pf_frag_tree
, &pf_cache_tree
, *frag
);
841 TAILQ_INSERT_HEAD(&pf_cachequeue
, *frag
, frag_next
);
843 DPFPRINTF(("fragcache[%d]: new %d-%d\n", h
->ip_id
, off
,
850 * Find a fragment after the current one:
851 * - off contains the real shifted offset.
854 LIST_FOREACH(fra
, &(*frag
)->fr_cache
, fr_next
) {
855 if (fra
->fr_off
> off
)
860 VERIFY(frp
!= NULL
|| fra
!= NULL
);
865 precut
= frp
->fr_end
- off
;
866 if (precut
>= ip_len
) {
867 /* Fragment is entirely a duplicate */
868 DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
869 h
->ip_id
, frp
->fr_off
, frp
->fr_end
, off
, fr_max
));
873 /* They are adjacent. Fixup cache entry */
874 DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
875 h
->ip_id
, frp
->fr_off
, frp
->fr_end
, off
, fr_max
));
876 frp
->fr_end
= fr_max
;
877 } else if (precut
> 0) {
879 * The first part of this payload overlaps with a
880 * fragment that has already been passed.
881 * Need to trim off the first part of the payload.
882 * But to do so easily, we need to create another
883 * mbuf to throw the original header into.
886 DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
887 h
->ip_id
, precut
, frp
->fr_off
, frp
->fr_end
, off
,
892 /* Update the previous frag to encompass this one */
893 frp
->fr_end
= fr_max
;
897 * XXX Optimization opportunity
898 * This is a very heavy way to trim the payload.
899 * we could do it much faster by diddling mbuf
900 * internals but that would be even less legible
901 * than this mbuf magic. For my next trick,
902 * I'll pull a rabbit out of my laptop.
904 *m0
= m_copym(m
, 0, h
->ip_hl
<< 2, M_NOWAIT
);
907 VERIFY((*m0
)->m_next
== NULL
);
908 m_adj(m
, precut
+ (h
->ip_hl
<< 2));
911 if (m
->m_flags
& M_PKTHDR
) {
914 for (t
= m
; t
; t
= t
->m_next
)
916 m
->m_pkthdr
.len
= plen
;
920 h
= mtod(m
, struct ip
*);
923 VERIFY((int)m
->m_len
==
924 ntohs(h
->ip_len
) - precut
);
925 h
->ip_off
= htons(ntohs(h
->ip_off
) +
927 h
->ip_len
= htons(ntohs(h
->ip_len
) - precut
);
932 /* There is a gap between fragments */
934 DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
935 h
->ip_id
, -precut
, frp
->fr_off
, frp
->fr_end
, off
,
938 cur
= pool_get(&pf_cent_pl
, PR_NOWAIT
);
944 cur
->fr_end
= fr_max
;
945 LIST_INSERT_AFTER(frp
, cur
, fr_next
);
953 aftercut
= fr_max
- fra
->fr_off
;
955 /* Adjacent fragments */
956 DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
957 h
->ip_id
, off
, fr_max
, fra
->fr_off
, fra
->fr_end
));
960 } else if (aftercut
> 0) {
961 /* Need to chop off the tail of this fragment */
962 DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
963 h
->ip_id
, aftercut
, off
, fr_max
, fra
->fr_off
,
972 if (m
->m_flags
& M_PKTHDR
) {
975 for (t
= m
; t
; t
= t
->m_next
)
977 m
->m_pkthdr
.len
= plen
;
979 h
= mtod(m
, struct ip
*);
980 VERIFY((int)m
->m_len
==
981 ntohs(h
->ip_len
) - aftercut
);
982 h
->ip_len
= htons(ntohs(h
->ip_len
) - aftercut
);
986 } else if (frp
== NULL
) {
987 /* There is a gap between fragments */
988 DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
989 h
->ip_id
, -aftercut
, off
, fr_max
, fra
->fr_off
,
992 cur
= pool_get(&pf_cent_pl
, PR_NOWAIT
);
998 cur
->fr_end
= fr_max
;
999 LIST_INSERT_BEFORE(fra
, cur
, fr_next
);
1003 /* Need to glue together two separate fragment descriptors */
1005 if (cur
&& fra
->fr_off
<= cur
->fr_end
) {
1006 /* Need to merge in a previous 'cur' */
1007 DPFPRINTF(("fragcache[%d]: adjacent(merge "
1008 "%d-%d) %d-%d (%d-%d)\n",
1009 h
->ip_id
, cur
->fr_off
, cur
->fr_end
, off
,
1010 fr_max
, fra
->fr_off
, fra
->fr_end
));
1011 fra
->fr_off
= cur
->fr_off
;
1012 LIST_REMOVE(cur
, fr_next
);
1013 pool_put(&pf_cent_pl
, cur
);
1017 } else if (frp
&& fra
->fr_off
<= frp
->fr_end
) {
1018 /* Need to merge in a modified 'frp' */
1019 VERIFY(cur
== NULL
);
1020 DPFPRINTF(("fragcache[%d]: adjacent(merge "
1021 "%d-%d) %d-%d (%d-%d)\n",
1022 h
->ip_id
, frp
->fr_off
, frp
->fr_end
, off
,
1023 fr_max
, fra
->fr_off
, fra
->fr_end
));
1024 fra
->fr_off
= frp
->fr_off
;
1025 LIST_REMOVE(frp
, fr_next
);
1026 pool_put(&pf_cent_pl
, frp
);
1036 * We must keep tracking the overall fragment even when
1037 * we're going to drop it anyway so that we know when to
1038 * free the overall descriptor. Thus we drop the frag late.
1045 /* Update maximum data size */
1046 if ((*frag
)->fr_max
< fr_max
)
1047 (*frag
)->fr_max
= fr_max
;
1049 /* This is the last segment */
1051 (*frag
)->fr_flags
|= PFFRAG_SEENLAST
;
1053 /* Check if we are completely reassembled */
1054 if (((*frag
)->fr_flags
& PFFRAG_SEENLAST
) &&
1055 LIST_FIRST(&(*frag
)->fr_cache
)->fr_off
== 0 &&
1056 LIST_FIRST(&(*frag
)->fr_cache
)->fr_end
== (*frag
)->fr_max
) {
1057 /* Remove from fragment queue */
1058 DPFPRINTF(("fragcache[%d]: done 0-%d\n", h
->ip_id
,
1060 pf_free_fragment(*frag
);
1069 /* Still need to pay attention to !IP_MF */
1070 if (!mff
&& *frag
!= NULL
)
1071 (*frag
)->fr_flags
|= PFFRAG_SEENLAST
;
1078 /* Still need to pay attention to !IP_MF */
1079 if (!mff
&& *frag
!= NULL
)
1080 (*frag
)->fr_flags
|= PFFRAG_SEENLAST
;
1083 /* This fragment has been deemed bad. Don't reass */
1084 if (((*frag
)->fr_flags
& PFFRAG_DROP
) == 0)
1085 DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
1087 (*frag
)->fr_flags
|= PFFRAG_DROP
;
1094 #define FR_IP6_OFF(fr) \
1095 (ntohs((fr)->fr_ip6f_opt.ip6f_offlg & IP6F_OFF_MASK))
1096 #define FR_IP6_PLEN(fr) (ntohs((fr)->fr_ip6->ip6_plen))
1098 pf_reassemble6(struct mbuf
**m0
, struct pf_fragment
**frag
,
1099 struct pf_frent
*frent
, int mff
)
1101 struct mbuf
*m
, *m2
;
1102 struct pf_frent
*frea
, *frep
, *next
;
1103 struct ip6_hdr
*ip6
;
1104 struct ip6_frag
*ip6f
;
1105 int plen
, off
, fr_max
;
1106 uint32_t uoff
, csum
, csum_flags
;
1108 VERIFY(*frag
== NULL
|| BUFFER_FRAGMENTS(*frag
));
1111 ip6
= frent
->fr_ip6
;
1112 ip6f
= &frent
->fr_ip6f_opt
;
1113 off
= FR_IP6_OFF(frent
);
1114 uoff
= frent
->fr_ip6f_hlen
;
1115 plen
= FR_IP6_PLEN(frent
);
1116 fr_max
= off
+ plen
- (frent
->fr_ip6f_hlen
- sizeof *ip6
);
1118 DPFPRINTF(("0x%llx IPv6 frag plen %u off %u fr_ip6f_hlen %u "
1119 "fr_max %u m_len %u\n", (uint64_t)VM_KERNEL_ADDRPERM(m
), plen
, off
,
1120 frent
->fr_ip6f_hlen
, fr_max
, m
->m_len
));
1123 * Leverage partial checksum offload for simple UDP/IP fragments,
1124 * as that is the most common case.
1126 * Perform 1's complement adjustment of octets that got included/
1127 * excluded in the hardware-calculated checksum value. Also take
1128 * care of any trailing bytes and subtract out their partial sum.
1130 if (ip6f
->ip6f_nxt
== IPPROTO_UDP
&&
1131 uoff
== (sizeof (*ip6
) + sizeof (*ip6f
)) &&
1132 (m
->m_pkthdr
.csum_flags
&
1133 (CSUM_DATA_VALID
| CSUM_PARTIAL
| CSUM_PSEUDO_HDR
)) ==
1134 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
1135 uint32_t start
= m
->m_pkthdr
.csum_rx_start
;
1136 uint32_t ip_len
= (sizeof (*ip6
) + ntohs(ip6
->ip6_plen
));
1137 int32_t trailer
= (m_pktlen(m
) - ip_len
);
1138 uint32_t swbytes
= (uint32_t)trailer
;
1140 csum
= m
->m_pkthdr
.csum_rx_val
;
1142 ASSERT(trailer
>= 0);
1143 if (start
!= uoff
|| trailer
!= 0) {
1144 uint16_t s
= 0, d
= 0;
1146 if (IN6_IS_SCOPE_EMBED(&ip6
->ip6_src
)) {
1147 s
= ip6
->ip6_src
.s6_addr16
[1];
1148 ip6
->ip6_src
.s6_addr16
[1] = 0 ;
1150 if (IN6_IS_SCOPE_EMBED(&ip6
->ip6_dst
)) {
1151 d
= ip6
->ip6_dst
.s6_addr16
[1];
1152 ip6
->ip6_dst
.s6_addr16
[1] = 0;
1155 /* callee folds in sum */
1156 csum
= m_adj_sum16(m
, start
, uoff
,
1157 (ip_len
- uoff
), csum
);
1159 swbytes
+= (uoff
- start
);
1161 swbytes
+= (start
- uoff
);
1163 if (IN6_IS_SCOPE_EMBED(&ip6
->ip6_src
))
1164 ip6
->ip6_src
.s6_addr16
[1] = s
;
1165 if (IN6_IS_SCOPE_EMBED(&ip6
->ip6_dst
))
1166 ip6
->ip6_dst
.s6_addr16
[1] = d
;
1169 csum_flags
= m
->m_pkthdr
.csum_flags
;
1172 udp_in6_cksum_stats(swbytes
);
1180 /* Invalidate checksum */
1181 m
->m_pkthdr
.csum_flags
&= ~CSUM_DATA_VALID
;
1183 /* strip off headers up to the fragment payload */
1184 m
->m_data
+= frent
->fr_ip6f_hlen
;
1185 m
->m_len
-= frent
->fr_ip6f_hlen
;
1187 /* Create a new reassembly queue for this packet */
1188 if (*frag
== NULL
) {
1189 *frag
= pool_get(&pf_frag_pl
, PR_NOWAIT
);
1190 if (*frag
== NULL
) {
1191 pf_flush_fragments();
1192 *frag
= pool_get(&pf_frag_pl
, PR_NOWAIT
);
1197 (*frag
)->fr_flags
= 0;
1198 (*frag
)->fr_max
= 0;
1199 (*frag
)->fr_af
= AF_INET6
;
1200 (*frag
)->fr_srcx
.v6addr
= frent
->fr_ip6
->ip6_src
;
1201 (*frag
)->fr_dstx
.v6addr
= frent
->fr_ip6
->ip6_dst
;
1202 (*frag
)->fr_p
= frent
->fr_ip6f_opt
.ip6f_nxt
;
1203 (*frag
)->fr_id6
= frent
->fr_ip6f_opt
.ip6f_ident
;
1204 (*frag
)->fr_timeout
= pf_time_second();
1205 if (csum_flags
!= 0) {
1206 (*frag
)->fr_csum_flags
= csum_flags
;
1207 (*frag
)->fr_csum
= csum
;
1209 LIST_INIT(&(*frag
)->fr_queue
);
1211 RB_INSERT(pf_frag_tree
, &pf_frag_tree
, *frag
);
1212 TAILQ_INSERT_HEAD(&pf_fragqueue
, *frag
, frag_next
);
1214 /* We do not have a previous fragment */
1220 * If this fragment contains similar checksum offload info
1221 * as that of the existing ones, accumulate checksum. Otherwise,
1222 * invalidate checksum offload info for the entire datagram.
1224 if (csum_flags
!= 0 && csum_flags
== (*frag
)->fr_csum_flags
)
1225 (*frag
)->fr_csum
+= csum
;
1226 else if ((*frag
)->fr_csum_flags
!= 0)
1227 (*frag
)->fr_csum_flags
= 0;
1230 * Find a fragment after the current one:
1231 * - off contains the real shifted offset.
1233 LIST_FOREACH(frea
, &(*frag
)->fr_queue
, fr_next
) {
1234 if (FR_IP6_OFF(frea
) > off
)
1239 VERIFY(frep
!= NULL
|| frea
!= NULL
);
1242 FR_IP6_OFF(frep
) + FR_IP6_PLEN(frep
) - frep
->fr_ip6f_hlen
> off
)
1246 precut
= FR_IP6_OFF(frep
) + FR_IP6_PLEN(frep
) -
1247 frep
->fr_ip6f_hlen
- off
;
1250 m_adj(frent
->fr_m
, precut
);
1251 DPFPRINTF(("overlap -%d\n", precut
));
1252 /* Enforce 8 byte boundaries */
1253 frent
->fr_ip6f_opt
.ip6f_offlg
=
1254 htons(ntohs(frent
->fr_ip6f_opt
.ip6f_offlg
) +
1256 off
= FR_IP6_OFF(frent
);
1258 ip6
->ip6_plen
= htons(plen
);
1261 for (; frea
!= NULL
&& plen
+ off
> FR_IP6_OFF(frea
); frea
= next
) {
1264 aftercut
= plen
+ off
- FR_IP6_OFF(frea
);
1265 DPFPRINTF(("adjust overlap %d\n", aftercut
));
1266 if (aftercut
< FR_IP6_PLEN(frea
) - frea
->fr_ip6f_hlen
) {
1267 frea
->fr_ip6
->ip6_plen
= htons(FR_IP6_PLEN(frea
) -
1269 frea
->fr_ip6f_opt
.ip6f_offlg
=
1270 htons(ntohs(frea
->fr_ip6f_opt
.ip6f_offlg
) +
1272 m_adj(frea
->fr_m
, aftercut
);
1276 /* This fragment is completely overlapped, lose it */
1277 next
= LIST_NEXT(frea
, fr_next
);
1278 m_freem(frea
->fr_m
);
1279 LIST_REMOVE(frea
, fr_next
);
1280 pool_put(&pf_frent_pl
, frea
);
1285 /* Update maximum data size */
1286 if ((*frag
)->fr_max
< fr_max
)
1287 (*frag
)->fr_max
= fr_max
;
1288 /* This is the last segment */
1290 (*frag
)->fr_flags
|= PFFRAG_SEENLAST
;
1293 LIST_INSERT_HEAD(&(*frag
)->fr_queue
, frent
, fr_next
);
1295 LIST_INSERT_AFTER(frep
, frent
, fr_next
);
1297 /* Check if we are completely reassembled */
1298 if (!((*frag
)->fr_flags
& PFFRAG_SEENLAST
))
1301 /* Check if we have all the data */
1303 for (frep
= LIST_FIRST(&(*frag
)->fr_queue
); frep
; frep
= next
) {
1304 next
= LIST_NEXT(frep
, fr_next
);
1305 off
+= FR_IP6_PLEN(frep
) - (frent
->fr_ip6f_hlen
- sizeof *ip6
);
1306 DPFPRINTF(("frep at %d, next %d, max %d\n",
1307 off
, next
== NULL
? -1 : FR_IP6_OFF(next
),
1309 if (off
< (*frag
)->fr_max
&&
1310 (next
== NULL
|| FR_IP6_OFF(next
) != off
)) {
1311 DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
1312 off
, next
== NULL
? -1 : FR_IP6_OFF(next
),
1317 DPFPRINTF(("%d < %d?\n", off
, (*frag
)->fr_max
));
1318 if (off
< (*frag
)->fr_max
)
1321 /* We have all the data */
1322 frent
= LIST_FIRST(&(*frag
)->fr_queue
);
1323 VERIFY(frent
!= NULL
);
1324 if (frent
->fr_ip6f_hlen
+ off
> IP_MAXPACKET
) {
1325 DPFPRINTF(("drop: too big: %d\n", off
));
1326 pf_free_fragment(*frag
);
1331 ip6
= frent
->fr_ip6
;
1332 ip6
->ip6_nxt
= (*frag
)->fr_p
;
1333 ip6
->ip6_plen
= htons(off
);
1334 ip6
->ip6_src
= (*frag
)->fr_srcx
.v6addr
;
1335 ip6
->ip6_dst
= (*frag
)->fr_dstx
.v6addr
;
1337 if ((*frag
)->fr_csum_flags
!= 0) {
1338 csum
= (*frag
)->fr_csum
;
1342 m
->m_pkthdr
.csum_rx_val
= csum
;
1343 m
->m_pkthdr
.csum_rx_start
= sizeof (struct ip6_hdr
);
1344 m
->m_pkthdr
.csum_flags
= (*frag
)->fr_csum_flags
;
1345 } else if ((m
->m_pkthdr
.rcvif
->if_flags
& IFF_LOOPBACK
) ||
1346 (m
->m_pkthdr
.pkt_flags
& PKTF_LOOP
)) {
1347 /* loopback checksums are always OK */
1348 m
->m_pkthdr
.csum_data
= 0xffff;
1349 m
->m_pkthdr
.csum_flags
&= ~CSUM_PARTIAL
;
1350 m
->m_pkthdr
.csum_flags
= CSUM_DATA_VALID
| CSUM_PSEUDO_HDR
;
1353 /* Remove from fragment queue */
1354 pf_remove_fragment(*frag
);
1358 m
->m_len
+= sizeof(struct ip6_hdr
);
1359 m
->m_data
-= sizeof(struct ip6_hdr
);
1360 memmove(m
->m_data
, ip6
, sizeof(struct ip6_hdr
));
1362 next
= LIST_NEXT(frent
, fr_next
);
1363 pool_put(&pf_frent_pl
, frent
);
1365 for (frent
= next
; next
!= NULL
; frent
= next
) {
1369 next
= LIST_NEXT(frent
, fr_next
);
1370 pool_put(&pf_frent_pl
, frent
);
1374 /* XXX this should be done elsewhere */
1375 if (m
->m_flags
& M_PKTHDR
) {
1377 for (m2
= m
; m2
; m2
= m2
->m_next
)
1378 pktlen
+= m2
->m_len
;
1379 m
->m_pkthdr
.len
= pktlen
;
1382 DPFPRINTF(("complete: 0x%llx ip6_plen %d m_pkthdr.len %d\n",
1383 (uint64_t)VM_KERNEL_ADDRPERM(m
), ntohs(ip6
->ip6_plen
),
1389 /* Oops - fail safe - drop packet */
1390 pool_put(&pf_frent_pl
, frent
);
1396 static struct mbuf
*
1397 pf_frag6cache(struct mbuf
**m0
, struct ip6_hdr
*h
, struct ip6_frag
*fh
,
1398 struct pf_fragment
**frag
, int hlen
, int mff
, int drop
, int *nomem
)
1400 struct mbuf
*m
= *m0
;
1401 u_int16_t plen
, off
, fr_max
;
1402 struct pf_frcache
*frp
, *fra
, *cur
= NULL
;
1405 VERIFY(*frag
== NULL
|| !BUFFER_FRAGMENTS(*frag
));
1407 off
= ntohs(fh
->ip6f_offlg
& IP6F_OFF_MASK
);
1408 plen
= ntohs(h
->ip6_plen
) - (hlen
- sizeof *h
);
1411 * Apple Modification: dimambro@apple.com. The hlen, being passed
1412 * into this function Includes all the headers associated with
1413 * the packet, and may include routing headers, so to get to
1414 * the data payload as stored in the original IPv6 header we need
1415 * to subtract al those headers and the IP header.
1417 * The 'max' local variable should also contain the offset from the start
1418 * of the reassembled packet to the octet just past the end of the octets
1419 * in the current fragment where:
1420 * - 'off' is the offset from the start of the reassembled packet to the
1421 * first octet in the fragment,
1422 * - 'plen' is the length of the "payload data length" Excluding all the
1423 * IPv6 headers of the fragment.
1424 * - 'hlen' is computed in pf_normalize_ip6() as the offset from the start
1425 * of the IPv6 packet to the beginning of the data.
1427 fr_max
= off
+ plen
;
1429 DPFPRINTF(("0x%llx plen %u off %u fr_max %u\n",
1430 (uint64_t)VM_KERNEL_ADDRPERM(m
), plen
, off
, fr_max
));
1432 /* Create a new range queue for this packet */
1433 if (*frag
== NULL
) {
1434 *frag
= pool_get(&pf_cache_pl
, PR_NOWAIT
);
1435 if (*frag
== NULL
) {
1436 pf_flush_fragments();
1437 *frag
= pool_get(&pf_cache_pl
, PR_NOWAIT
);
1442 /* Get an entry for the queue */
1443 cur
= pool_get(&pf_cent_pl
, PR_NOWAIT
);
1445 pool_put(&pf_cache_pl
, *frag
);
1451 (*frag
)->fr_flags
= PFFRAG_NOBUFFER
;
1452 (*frag
)->fr_max
= 0;
1453 (*frag
)->fr_af
= AF_INET6
;
1454 (*frag
)->fr_srcx
.v6addr
= h
->ip6_src
;
1455 (*frag
)->fr_dstx
.v6addr
= h
->ip6_dst
;
1456 (*frag
)->fr_p
= fh
->ip6f_nxt
;
1457 (*frag
)->fr_id6
= fh
->ip6f_ident
;
1458 (*frag
)->fr_timeout
= pf_time_second();
1461 cur
->fr_end
= fr_max
;
1462 LIST_INIT(&(*frag
)->fr_cache
);
1463 LIST_INSERT_HEAD(&(*frag
)->fr_cache
, cur
, fr_next
);
1465 RB_INSERT(pf_frag_tree
, &pf_cache_tree
, *frag
);
1466 TAILQ_INSERT_HEAD(&pf_cachequeue
, *frag
, frag_next
);
1468 DPFPRINTF(("frag6cache[%d]: new %d-%d\n", ntohl(fh
->ip6f_ident
),
1475 * Find a fragment after the current one:
1476 * - off contains the real shifted offset.
1479 LIST_FOREACH(fra
, &(*frag
)->fr_cache
, fr_next
) {
1480 if (fra
->fr_off
> off
)
1485 VERIFY(frp
!= NULL
|| fra
!= NULL
);
1490 precut
= frp
->fr_end
- off
;
1491 if (precut
>= plen
) {
1492 /* Fragment is entirely a duplicate */
1493 DPFPRINTF(("frag6cache[%u]: dead (%d-%d) %d-%d\n",
1494 ntohl(fh
->ip6f_ident
), frp
->fr_off
, frp
->fr_end
,
1499 /* They are adjacent. Fixup cache entry */
1500 DPFPRINTF(("frag6cache[%u]: adjacent (%d-%d) %d-%d\n",
1501 ntohl(fh
->ip6f_ident
), frp
->fr_off
, frp
->fr_end
,
1503 frp
->fr_end
= fr_max
;
1504 } else if (precut
> 0) {
1505 /* The first part of this payload overlaps with a
1506 * fragment that has already been passed.
1507 * Need to trim off the first part of the payload.
1508 * But to do so easily, we need to create another
1509 * mbuf to throw the original header into.
1512 DPFPRINTF(("frag6cache[%u]: chop %d (%d-%d) %d-%d\n",
1513 ntohl(fh
->ip6f_ident
), precut
, frp
->fr_off
,
1514 frp
->fr_end
, off
, fr_max
));
1518 /* Update the previous frag to encompass this one */
1519 frp
->fr_end
= fr_max
;
1522 /* XXX Optimization opportunity
1523 * This is a very heavy way to trim the payload.
1524 * we could do it much faster by diddling mbuf
1525 * internals but that would be even less legible
1526 * than this mbuf magic. For my next trick,
1527 * I'll pull a rabbit out of my laptop.
1529 *m0
= m_copym(m
, 0, hlen
, M_NOWAIT
);
1532 VERIFY((*m0
)->m_next
== NULL
);
1533 m_adj(m
, precut
+ hlen
);
1536 if (m
->m_flags
& M_PKTHDR
) {
1539 for (t
= m
; t
; t
= t
->m_next
)
1541 m
->m_pkthdr
.len
= pktlen
;
1544 h
= mtod(m
, struct ip6_hdr
*);
1546 VERIFY((int)m
->m_len
==
1547 ntohs(h
->ip6_plen
) - precut
);
1548 fh
->ip6f_offlg
&= ~IP6F_OFF_MASK
;
1550 htons(ntohs(fh
->ip6f_offlg
& IP6F_OFF_MASK
)
1552 h
->ip6_plen
= htons(ntohs(h
->ip6_plen
) -
1558 /* There is a gap between fragments */
1560 DPFPRINTF(("frag6cache[%u]: gap %d (%d-%d) %d-%d\n",
1561 ntohl(fh
->ip6f_ident
), -precut
, frp
->fr_off
,
1562 frp
->fr_end
, off
, fr_max
));
1564 cur
= pool_get(&pf_cent_pl
, PR_NOWAIT
);
1570 cur
->fr_end
= fr_max
;
1571 LIST_INSERT_AFTER(frp
, cur
, fr_next
);
1579 aftercut
= fr_max
- fra
->fr_off
;
1580 if (aftercut
== 0) {
1581 /* Adjacent fragments */
1582 DPFPRINTF(("frag6cache[%u]: adjacent %d-%d (%d-%d)\n",
1583 ntohl(fh
->ip6f_ident
), off
, fr_max
, fra
->fr_off
,
1587 } else if (aftercut
> 0) {
1588 /* Need to chop off the tail of this fragment */
1589 DPFPRINTF(("frag6cache[%u]: chop %d %d-%d (%d-%d)\n",
1590 ntohl(fh
->ip6f_ident
), aftercut
, off
, fr_max
,
1591 fra
->fr_off
, fra
->fr_end
));
1598 m_adj(m
, -aftercut
);
1599 if (m
->m_flags
& M_PKTHDR
) {
1602 for (t
= m
; t
; t
= t
->m_next
)
1604 m
->m_pkthdr
.len
= pktlen
;
1606 h
= mtod(m
, struct ip6_hdr
*);
1607 VERIFY((int)m
->m_len
==
1608 ntohs(h
->ip6_plen
) - aftercut
);
1610 htons(ntohs(h
->ip6_plen
) - aftercut
);
1614 } else if (frp
== NULL
) {
1615 /* There is a gap between fragments */
1616 DPFPRINTF(("frag6cache[%u]: gap %d %d-%d (%d-%d)\n",
1617 ntohl(fh
->ip6f_ident
), -aftercut
, off
, fr_max
,
1618 fra
->fr_off
, fra
->fr_end
));
1620 cur
= pool_get(&pf_cent_pl
, PR_NOWAIT
);
1626 cur
->fr_end
= fr_max
;
1627 LIST_INSERT_BEFORE(fra
, cur
, fr_next
);
1630 /* Need to glue together two separate fragment descriptors */
1632 if (cur
&& fra
->fr_off
<= cur
->fr_end
) {
1633 /* Need to merge in a previous 'cur' */
1634 DPFPRINTF(("frag6cache[%u]: adjacent(merge "
1635 "%d-%d) %d-%d (%d-%d)\n",
1636 ntohl(fh
->ip6f_ident
), cur
->fr_off
,
1637 cur
->fr_end
, off
, fr_max
, fra
->fr_off
,
1639 fra
->fr_off
= cur
->fr_off
;
1640 LIST_REMOVE(cur
, fr_next
);
1641 pool_put(&pf_cent_pl
, cur
);
1644 } else if (frp
&& fra
->fr_off
<= frp
->fr_end
) {
1645 /* Need to merge in a modified 'frp' */
1646 VERIFY(cur
== NULL
);
1647 DPFPRINTF(("frag6cache[%u]: adjacent(merge "
1648 "%d-%d) %d-%d (%d-%d)\n",
1649 ntohl(fh
->ip6f_ident
), frp
->fr_off
,
1650 frp
->fr_end
, off
, fr_max
, fra
->fr_off
,
1652 fra
->fr_off
= frp
->fr_off
;
1653 LIST_REMOVE(frp
, fr_next
);
1654 pool_put(&pf_cent_pl
, frp
);
1663 * We must keep tracking the overall fragment even when
1664 * we're going to drop it anyway so that we know when to
1665 * free the overall descriptor. Thus we drop the frag late.
1671 /* Update maximum data size */
1672 if ((*frag
)->fr_max
< fr_max
)
1673 (*frag
)->fr_max
= fr_max
;
1675 /* This is the last segment */
1677 (*frag
)->fr_flags
|= PFFRAG_SEENLAST
;
1679 /* Check if we are completely reassembled */
1680 if (((*frag
)->fr_flags
& PFFRAG_SEENLAST
) &&
1681 LIST_FIRST(&(*frag
)->fr_cache
)->fr_off
== 0 &&
1682 LIST_FIRST(&(*frag
)->fr_cache
)->fr_end
== (*frag
)->fr_max
) {
1683 /* Remove from fragment queue */
1684 DPFPRINTF(("frag6cache[%u]: done 0-%d\n",
1685 ntohl(fh
->ip6f_ident
), (*frag
)->fr_max
));
1686 pf_free_fragment(*frag
);
1695 /* Still need to pay attention to !IP_MF */
1696 if (!mff
&& *frag
!= NULL
)
1697 (*frag
)->fr_flags
|= PFFRAG_SEENLAST
;
1704 /* Still need to pay attention to !IP_MF */
1705 if (!mff
&& *frag
!= NULL
)
1706 (*frag
)->fr_flags
|= PFFRAG_SEENLAST
;
1709 /* This fragment has been deemed bad. Don't reass */
1710 if (((*frag
)->fr_flags
& PFFRAG_DROP
) == 0)
1711 DPFPRINTF(("frag6cache[%u]: dropping overall fragment\n",
1712 ntohl(fh
->ip6f_ident
)));
1713 (*frag
)->fr_flags
|= PFFRAG_DROP
;
1721 pf_normalize_ip(pbuf_t
*pbuf
, int dir
, struct pfi_kif
*kif
, u_short
*reason
,
1722 struct pf_pdesc
*pd
)
1726 struct pf_frent
*frent
;
1727 struct pf_fragment
*frag
= NULL
;
1728 struct ip
*h
= pbuf
->pb_data
;
1729 int mff
= (ntohs(h
->ip_off
) & IP_MF
);
1730 int hlen
= h
->ip_hl
<< 2;
1731 u_int16_t fragoff
= (ntohs(h
->ip_off
) & IP_OFFMASK
) << 3;
1736 struct pf_ruleset
*ruleset
= NULL
;
1737 struct ifnet
*ifp
= pbuf
->pb_ifp
;
1739 r
= TAILQ_FIRST(pf_main_ruleset
.rules
[PF_RULESET_SCRUB
].active
.ptr
);
1742 if (pfi_kif_match(r
->kif
, kif
) == r
->ifnot
)
1743 r
= r
->skip
[PF_SKIP_IFP
].ptr
;
1744 else if (r
->direction
&& r
->direction
!= dir
)
1745 r
= r
->skip
[PF_SKIP_DIR
].ptr
;
1746 else if (r
->af
&& r
->af
!= AF_INET
)
1747 r
= r
->skip
[PF_SKIP_AF
].ptr
;
1748 else if (r
->proto
&& r
->proto
!= h
->ip_p
)
1749 r
= r
->skip
[PF_SKIP_PROTO
].ptr
;
1750 else if (PF_MISMATCHAW(&r
->src
.addr
,
1751 (struct pf_addr
*)&h
->ip_src
.s_addr
, AF_INET
,
1753 r
= r
->skip
[PF_SKIP_SRC_ADDR
].ptr
;
1754 else if (PF_MISMATCHAW(&r
->dst
.addr
,
1755 (struct pf_addr
*)&h
->ip_dst
.s_addr
, AF_INET
,
1757 r
= r
->skip
[PF_SKIP_DST_ADDR
].ptr
;
1759 if (r
->anchor
== NULL
)
1762 pf_step_into_anchor(&asd
, &ruleset
,
1763 PF_RULESET_SCRUB
, &r
, NULL
, NULL
);
1765 if (r
== NULL
&& pf_step_out_of_anchor(&asd
, &ruleset
,
1766 PF_RULESET_SCRUB
, &r
, NULL
, NULL
))
1770 if (r
== NULL
|| r
->action
== PF_NOSCRUB
)
1773 r
->packets
[dir
== PF_OUT
]++;
1774 r
->bytes
[dir
== PF_OUT
] += pd
->tot_len
;
1777 /* Check for illegal packets */
1778 if (hlen
< (int)sizeof (struct ip
))
1781 if (hlen
> ntohs(h
->ip_len
))
1784 /* Clear IP_DF if the rule uses the no-df option */
1785 if (r
->rule_flag
& PFRULE_NODF
&& h
->ip_off
& htons(IP_DF
)) {
1786 u_int16_t ipoff
= h
->ip_off
;
1788 h
->ip_off
&= htons(~IP_DF
);
1789 h
->ip_sum
= pf_cksum_fixup(h
->ip_sum
, ipoff
, h
->ip_off
, 0);
1792 /* We will need other tests here */
1793 if (!fragoff
&& !mff
)
1797 * We're dealing with a fragment now. Don't allow fragments
1798 * with IP_DF to enter the cache. If the flag was cleared by
1799 * no-df above, fine. Otherwise drop it.
1801 if (h
->ip_off
& htons(IP_DF
)) {
1802 DPFPRINTF(("IP_DF\n"));
1806 ip_len
= ntohs(h
->ip_len
) - hlen
;
1807 ip_off
= (ntohs(h
->ip_off
) & IP_OFFMASK
) << 3;
1809 /* All fragments are 8 byte aligned */
1810 if (mff
&& (ip_len
& 0x7)) {
1811 DPFPRINTF(("mff and %d\n", ip_len
));
1815 /* Respect maximum length */
1816 if (fragoff
+ ip_len
> IP_MAXPACKET
) {
1817 DPFPRINTF(("max packet %d\n", fragoff
+ ip_len
));
1820 fr_max
= fragoff
+ ip_len
;
1822 if ((r
->rule_flag
& (PFRULE_FRAGCROP
|PFRULE_FRAGDROP
)) == 0) {
1823 /* Fully buffer all of the fragments */
1825 frag
= pf_find_fragment_by_ipv4_header(h
, &pf_frag_tree
);
1826 /* Check if we saw the last fragment already */
1827 if (frag
!= NULL
&& (frag
->fr_flags
& PFFRAG_SEENLAST
) &&
1828 fr_max
> frag
->fr_max
)
1831 if ((m
= pbuf_to_mbuf(pbuf
, TRUE
)) == NULL
) {
1832 REASON_SET(reason
, PFRES_MEMORY
);
1836 VERIFY(!pbuf_is_valid(pbuf
));
1838 /* Restore iph pointer after pbuf_to_mbuf() */
1839 h
= mtod(m
, struct ip
*);
1841 /* Get an entry for the fragment queue */
1842 frent
= pool_get(&pf_frent_pl
, PR_NOWAIT
);
1843 if (frent
== NULL
) {
1844 REASON_SET(reason
, PFRES_MEMORY
);
1852 /* Might return a completely reassembled mbuf, or NULL */
1853 DPFPRINTF(("reass IPv4 frag %d @ %d-%d\n", ntohs(h
->ip_id
),
1855 m
= pf_reassemble(m
, &frag
, frent
, mff
);
1860 VERIFY(m
->m_flags
& M_PKTHDR
);
1861 pbuf_init_mbuf(pbuf
, m
, ifp
);
1863 /* use mtag from concatenated mbuf chain */
1864 pd
->pf_mtag
= pf_find_mtag_pbuf(pbuf
);
1866 // SCW: This check is superfluous
1868 if (pd
->pf_mtag
== NULL
) {
1869 printf("%s: pf_find_mtag returned NULL(1)\n", __func__
);
1870 if ((pd
->pf_mtag
= pf_get_mtag(m
)) == NULL
) {
1879 h
= mtod(m
, struct ip
*);
1881 if (frag
!= NULL
&& (frag
->fr_flags
& PFFRAG_DROP
))
1884 /* non-buffering fragment cache (drops or masks overlaps) */
1887 if (dir
== PF_OUT
&& (pd
->pf_mtag
->pftag_flags
& PF_TAG_FRAGCACHE
)) {
1889 * Already passed the fragment cache in the
1890 * input direction. If we continued, it would
1891 * appear to be a dup and would be dropped.
1896 frag
= pf_find_fragment_by_ipv4_header(h
, &pf_cache_tree
);
1898 /* Check if we saw the last fragment already */
1899 if (frag
!= NULL
&& (frag
->fr_flags
& PFFRAG_SEENLAST
) &&
1900 fr_max
> frag
->fr_max
) {
1901 if (r
->rule_flag
& PFRULE_FRAGDROP
)
1902 frag
->fr_flags
|= PFFRAG_DROP
;
1906 if ((m
= pbuf_to_mbuf(pbuf
, TRUE
)) == NULL
) {
1907 REASON_SET(reason
, PFRES_MEMORY
);
1911 VERIFY(!pbuf_is_valid(pbuf
));
1913 /* Restore iph pointer after pbuf_to_mbuf() */
1914 h
= mtod(m
, struct ip
*);
1916 m
= pf_fragcache(&m
, h
, &frag
, mff
,
1917 (r
->rule_flag
& PFRULE_FRAGDROP
) ? 1 : 0, &nomem
);
1919 // Note: pf_fragcache() has already m_freem'd the mbuf
1925 VERIFY(m
->m_flags
& M_PKTHDR
);
1926 pbuf_init_mbuf(pbuf
, m
, ifp
);
1928 /* use mtag from copied and trimmed mbuf chain */
1929 pd
->pf_mtag
= pf_find_mtag_pbuf(pbuf
);
1931 // SCW: This check is superfluous
1933 if (pd
->pf_mtag
== NULL
) {
1934 printf("%s: pf_find_mtag returned NULL(2)\n", __func__
);
1935 if ((pd
->pf_mtag
= pf_get_mtag(m
)) == NULL
) {
1944 pd
->pf_mtag
->pftag_flags
|= PF_TAG_FRAGCACHE
;
1946 if (frag
!= NULL
&& (frag
->fr_flags
& PFFRAG_DROP
))
1953 /* At this point, only IP_DF is allowed in ip_off */
1954 if (h
->ip_off
& ~htons(IP_DF
)) {
1955 u_int16_t ipoff
= h
->ip_off
;
1957 h
->ip_off
&= htons(IP_DF
);
1958 h
->ip_sum
= pf_cksum_fixup(h
->ip_sum
, ipoff
, h
->ip_off
, 0);
1961 /* Enforce a minimum ttl, may cause endless packet loops */
1962 if (r
->min_ttl
&& h
->ip_ttl
< r
->min_ttl
) {
1963 u_int16_t ip_ttl
= h
->ip_ttl
;
1965 h
->ip_ttl
= r
->min_ttl
;
1966 h
->ip_sum
= pf_cksum_fixup(h
->ip_sum
, ip_ttl
, h
->ip_ttl
, 0);
1968 if (r
->rule_flag
& PFRULE_RANDOMID
) {
1969 u_int16_t oip_id
= h
->ip_id
;
1971 if (rfc6864
&& IP_OFF_IS_ATOMIC(ntohs(h
->ip_off
))) {
1974 h
->ip_id
= ip_randomid();
1976 h
->ip_sum
= pf_cksum_fixup(h
->ip_sum
, oip_id
, h
->ip_id
, 0);
1978 if ((r
->rule_flag
& (PFRULE_FRAGCROP
|PFRULE_FRAGDROP
)) == 0)
1979 pd
->flags
|= PFDESC_IP_REAS
;
1984 /* Enforce a minimum ttl, may cause endless packet loops */
1985 if (r
->min_ttl
&& h
->ip_ttl
< r
->min_ttl
) {
1986 u_int16_t ip_ttl
= h
->ip_ttl
;
1988 h
->ip_ttl
= r
->min_ttl
;
1989 h
->ip_sum
= pf_cksum_fixup(h
->ip_sum
, ip_ttl
, h
->ip_ttl
, 0);
1991 if ((r
->rule_flag
& (PFRULE_FRAGCROP
|PFRULE_FRAGDROP
)) == 0)
1992 pd
->flags
|= PFDESC_IP_REAS
;
1996 REASON_SET(reason
, PFRES_MEMORY
);
1997 if (r
!= NULL
&& r
->log
&& pbuf_is_valid(pbuf
))
1998 PFLOG_PACKET(kif
, h
, pbuf
, AF_INET
, dir
, *reason
, r
,
2003 REASON_SET(reason
, PFRES_NORM
);
2004 if (r
!= NULL
&& r
->log
&& pbuf_is_valid(pbuf
))
2005 PFLOG_PACKET(kif
, h
, pbuf
, AF_INET
, dir
, *reason
, r
,
2010 DPFPRINTF(("dropping bad IPv4 fragment\n"));
2012 /* Free associated fragments */
2014 pf_free_fragment(frag
);
2016 REASON_SET(reason
, PFRES_FRAG
);
2017 if (r
!= NULL
&& r
->log
&& pbuf_is_valid(pbuf
))
2018 PFLOG_PACKET(kif
, h
, pbuf
, AF_INET
, dir
, *reason
, r
, NULL
, NULL
, pd
);
2025 pf_normalize_ip6(pbuf_t
*pbuf
, int dir
, struct pfi_kif
*kif
,
2026 u_short
*reason
, struct pf_pdesc
*pd
)
2030 struct ip6_hdr
*h
= pbuf
->pb_data
;
2036 struct ip6_opt_jumbo jumbo
;
2040 struct ip6_frag frag
;
2041 u_int32_t jumbolen
= 0, plen
;
2042 u_int16_t fragoff
= 0;
2045 struct pf_frent
*frent
;
2046 struct pf_fragment
*pff
= NULL
;
2047 int mff
= 0, rh_cnt
= 0;
2050 struct pf_ruleset
*ruleset
= NULL
;
2051 struct ifnet
*ifp
= pbuf
->pb_ifp
;
2053 r
= TAILQ_FIRST(pf_main_ruleset
.rules
[PF_RULESET_SCRUB
].active
.ptr
);
2056 if (pfi_kif_match(r
->kif
, kif
) == r
->ifnot
)
2057 r
= r
->skip
[PF_SKIP_IFP
].ptr
;
2058 else if (r
->direction
&& r
->direction
!= dir
)
2059 r
= r
->skip
[PF_SKIP_DIR
].ptr
;
2060 else if (r
->af
&& r
->af
!= AF_INET6
)
2061 r
= r
->skip
[PF_SKIP_AF
].ptr
;
2062 #if 0 /* header chain! */
2063 else if (r
->proto
&& r
->proto
!= h
->ip6_nxt
)
2064 r
= r
->skip
[PF_SKIP_PROTO
].ptr
;
2066 else if (PF_MISMATCHAW(&r
->src
.addr
,
2067 (struct pf_addr
*)(uintptr_t)&h
->ip6_src
, AF_INET6
,
2069 r
= r
->skip
[PF_SKIP_SRC_ADDR
].ptr
;
2070 else if (PF_MISMATCHAW(&r
->dst
.addr
,
2071 (struct pf_addr
*)(uintptr_t)&h
->ip6_dst
, AF_INET6
,
2073 r
= r
->skip
[PF_SKIP_DST_ADDR
].ptr
;
2075 if (r
->anchor
== NULL
)
2078 pf_step_into_anchor(&asd
, &ruleset
,
2079 PF_RULESET_SCRUB
, &r
, NULL
, NULL
);
2081 if (r
== NULL
&& pf_step_out_of_anchor(&asd
, &ruleset
,
2082 PF_RULESET_SCRUB
, &r
, NULL
, NULL
))
2086 if (r
== NULL
|| r
->action
== PF_NOSCRUB
)
2089 r
->packets
[dir
== PF_OUT
]++;
2090 r
->bytes
[dir
== PF_OUT
] += pd
->tot_len
;
2093 /* Check for illegal packets */
2094 if ((uint32_t)(sizeof (struct ip6_hdr
) + IPV6_MAXPACKET
) <
2095 pbuf
->pb_packet_len
)
2098 off
= sizeof (struct ip6_hdr
);
2104 case IPPROTO_FRAGMENT
:
2107 case IPPROTO_ROUTING
:
2108 case IPPROTO_DSTOPTS
:
2109 if (!pf_pull_hdr(pbuf
, off
, &ext
, sizeof (ext
), NULL
,
2114 * Multiple routing headers not allowed.
2115 * Routing header type zero considered harmful.
2117 if (proto
== IPPROTO_ROUTING
) {
2118 const struct ip6_rthdr
*rh
=
2119 (const struct ip6_rthdr
*)&ext
;
2122 if (rh
->ip6r_type
== IPV6_RTHDR_TYPE_0
)
2126 if (proto
== IPPROTO_AH
)
2127 off
+= (ext
.ip6e_len
+ 2) * 4;
2129 off
+= (ext
.ip6e_len
+ 1) * 8;
2130 proto
= ext
.ip6e_nxt
;
2132 case IPPROTO_HOPOPTS
:
2135 if (!pf_pull_hdr(m
, off
, &ext
, sizeof (ext
), NULL
,
2138 optend
= off
+ (ext
.ip6e_len
+ 1) * 8;
2139 ooff
= off
+ sizeof (ext
);
2141 if (!pf_pull_hdr(m
, ooff
, &opt
.ip6o_type
,
2142 sizeof (opt
.ip6o_type
), NULL
, NULL
,
2145 if (opt
.ip6o_type
== IP6OPT_PAD1
) {
2149 if (!pf_pull_hdr(m
, ooff
, &opt
, sizeof (opt
),
2150 NULL
, NULL
, AF_INET6
))
2152 if (ooff
+ sizeof (opt
) + opt
.ip6o_len
> optend
)
2154 switch (opt
.ip6o_type
) {
2156 if (h
->ip6_plen
!= 0)
2158 if (!pf_pull_hdr(m
, ooff
, &jumbo
,
2159 sizeof (jumbo
), NULL
, NULL
,
2162 memcpy(&jumbolen
, jumbo
.ip6oj_jumbo_len
,
2164 jumbolen
= ntohl(jumbolen
);
2165 if (jumbolen
<= IPV6_MAXPACKET
)
2167 if (sizeof (struct ip6_hdr
) +
2168 jumbolen
!= m
->m_pkthdr
.len
)
2174 ooff
+= sizeof (opt
) + opt
.ip6o_len
;
2175 } while (ooff
< optend
);
2178 proto
= ext
.ip6e_nxt
;
2185 } while (!terminal
);
2187 /* jumbo payload option must be present, or plen > 0 */
2188 if (ntohs(h
->ip6_plen
) == 0)
2191 plen
= ntohs(h
->ip6_plen
);
2194 if ((uint32_t)(sizeof (struct ip6_hdr
) + plen
) > pbuf
->pb_packet_len
)
2197 /* Enforce a minimum ttl, may cause endless packet loops */
2198 if (r
->min_ttl
&& h
->ip6_hlim
< r
->min_ttl
)
2199 h
->ip6_hlim
= r
->min_ttl
;
2204 if (ntohs(h
->ip6_plen
) == 0 || jumbolen
)
2206 plen
= ntohs(h
->ip6_plen
);
2208 if (!pf_pull_hdr(pbuf
, off
, &frag
, sizeof (frag
), NULL
, NULL
, AF_INET6
))
2210 fragoff
= ntohs(frag
.ip6f_offlg
& IP6F_OFF_MASK
);
2211 pd
->proto
= frag
.ip6f_nxt
;
2212 mff
= ntohs(frag
.ip6f_offlg
& IP6F_MORE_FRAG
);
2214 if (fragoff
+ (plen
- off
) > IPV6_MAXPACKET
)
2217 fr_max
= fragoff
+ plen
- (off
- sizeof(struct ip6_hdr
));
2218 // XXX SCW: mbuf-specific
2219 // DPFPRINTF(("0x%llx IPv6 frag plen %u mff %d off %u fragoff %u "
2220 // "fr_max %u\n", (uint64_t)VM_KERNEL_ADDRPERM(m), plen, mff, off,
2221 // fragoff, fr_max));
2223 if ((r
->rule_flag
& (PFRULE_FRAGCROP
|PFRULE_FRAGDROP
)) == 0) {
2224 /* Fully buffer all of the fragments */
2225 pd
->flags
|= PFDESC_IP_REAS
;
2227 pff
= pf_find_fragment_by_ipv6_header(h
, &frag
,
2230 /* Check if we saw the last fragment already */
2231 if (pff
!= NULL
&& (pff
->fr_flags
& PFFRAG_SEENLAST
) &&
2232 fr_max
> pff
->fr_max
)
2235 if ((m
= pbuf_to_mbuf(pbuf
, TRUE
)) == NULL
) {
2236 REASON_SET(reason
, PFRES_MEMORY
);
2240 /* Restore iph pointer after pbuf_to_mbuf() */
2241 h
= mtod(m
, struct ip6_hdr
*);
2243 /* Get an entry for the fragment queue */
2244 frent
= pool_get(&pf_frent_pl
, PR_NOWAIT
);
2245 if (frent
== NULL
) {
2246 REASON_SET(reason
, PFRES_MEMORY
);
2253 frent
->fr_ip6f_opt
= frag
;
2254 frent
->fr_ip6f_hlen
= off
;
2256 /* Might return a completely reassembled mbuf, or NULL */
2257 DPFPRINTF(("reass IPv6 frag %d @ %d-%d\n",
2258 ntohl(frag
.ip6f_ident
), fragoff
, fr_max
));
2259 m
= pf_reassemble6(&m
, &pff
, frent
, mff
);
2264 pbuf_init_mbuf(pbuf
, m
, ifp
);
2267 if (pff
!= NULL
&& (pff
->fr_flags
& PFFRAG_DROP
))
2270 else if (dir
== PF_IN
|| !(pd
->pf_mtag
->pftag_flags
& PF_TAG_FRAGCACHE
)) {
2271 /* non-buffering fragment cache (overlaps: see RFC 5722) */
2274 pff
= pf_find_fragment_by_ipv6_header(h
, &frag
,
2277 /* Check if we saw the last fragment already */
2278 if (pff
!= NULL
&& (pff
->fr_flags
& PFFRAG_SEENLAST
) &&
2279 fr_max
> pff
->fr_max
) {
2280 if (r
->rule_flag
& PFRULE_FRAGDROP
)
2281 pff
->fr_flags
|= PFFRAG_DROP
;
2285 if ((m
= pbuf_to_mbuf(pbuf
, TRUE
)) == NULL
) {
2289 /* Restore iph pointer after pbuf_to_mbuf() */
2290 h
= mtod(m
, struct ip6_hdr
*);
2292 m
= pf_frag6cache(&m
, h
, &frag
, &pff
, off
, mff
,
2293 (r
->rule_flag
& PFRULE_FRAGDROP
) ? 1 : 0, &nomem
);
2295 // Note: pf_frag6cache() has already m_freem'd the mbuf
2301 pbuf_init_mbuf(pbuf
, m
, ifp
);
2302 pd
->pf_mtag
= pf_find_mtag_pbuf(pbuf
);
2306 pd
->pf_mtag
->pftag_flags
|= PF_TAG_FRAGCACHE
;
2308 if (pff
!= NULL
&& (pff
->fr_flags
& PFFRAG_DROP
))
2312 /* Enforce a minimum ttl, may cause endless packet loops */
2313 if (r
->min_ttl
&& h
->ip6_hlim
< r
->min_ttl
)
2314 h
->ip6_hlim
= r
->min_ttl
;
2318 REASON_SET(reason
, PFRES_MEMORY
);
2322 REASON_SET(reason
, PFRES_SHORT
);
2326 REASON_SET(reason
, PFRES_NORM
);
2330 DPFPRINTF(("dropping bad IPv6 fragment\n"));
2331 REASON_SET(reason
, PFRES_FRAG
);
2336 pf_free_fragment(pff
);
2337 if (r
!= NULL
&& r
->log
&& pbuf_is_valid(pbuf
))
2338 PFLOG_PACKET(kif
, h
, pbuf
, AF_INET6
, dir
, *reason
, r
, NULL
, NULL
, pd
);
2344 pf_normalize_tcp(int dir
, struct pfi_kif
*kif
, pbuf_t
*pbuf
, int ipoff
,
2345 int off
, void *h
, struct pf_pdesc
*pd
)
2347 #pragma unused(ipoff, h)
2348 struct pf_rule
*r
, *rm
= NULL
;
2349 struct tcphdr
*th
= pd
->hdr
.tcp
;
2354 sa_family_t af
= pd
->af
;
2355 struct pf_ruleset
*ruleset
= NULL
;
2356 union pf_state_xport sxport
, dxport
;
2358 sxport
.port
= th
->th_sport
;
2359 dxport
.port
= th
->th_dport
;
2361 r
= TAILQ_FIRST(pf_main_ruleset
.rules
[PF_RULESET_SCRUB
].active
.ptr
);
2364 if (pfi_kif_match(r
->kif
, kif
) == r
->ifnot
)
2365 r
= r
->skip
[PF_SKIP_IFP
].ptr
;
2366 else if (r
->direction
&& r
->direction
!= dir
)
2367 r
= r
->skip
[PF_SKIP_DIR
].ptr
;
2368 else if (r
->af
&& r
->af
!= af
)
2369 r
= r
->skip
[PF_SKIP_AF
].ptr
;
2370 else if (r
->proto
&& r
->proto
!= pd
->proto
)
2371 r
= r
->skip
[PF_SKIP_PROTO
].ptr
;
2372 else if (PF_MISMATCHAW(&r
->src
.addr
, pd
->src
, af
,
2374 r
= r
->skip
[PF_SKIP_SRC_ADDR
].ptr
;
2375 else if (r
->src
.xport
.range
.op
&&
2376 !pf_match_xport(r
->src
.xport
.range
.op
, r
->proto_variant
,
2377 &r
->src
.xport
, &sxport
))
2378 r
= r
->skip
[PF_SKIP_SRC_PORT
].ptr
;
2379 else if (PF_MISMATCHAW(&r
->dst
.addr
, pd
->dst
, af
,
2381 r
= r
->skip
[PF_SKIP_DST_ADDR
].ptr
;
2382 else if (r
->dst
.xport
.range
.op
&&
2383 !pf_match_xport(r
->dst
.xport
.range
.op
, r
->proto_variant
,
2384 &r
->dst
.xport
, &dxport
))
2385 r
= r
->skip
[PF_SKIP_DST_PORT
].ptr
;
2386 else if (r
->os_fingerprint
!= PF_OSFP_ANY
&&
2387 !pf_osfp_match(pf_osfp_fingerprint(pd
, pbuf
, off
, th
),
2389 r
= TAILQ_NEXT(r
, entries
);
2391 if (r
->anchor
== NULL
) {
2395 pf_step_into_anchor(&asd
, &ruleset
,
2396 PF_RULESET_SCRUB
, &r
, NULL
, NULL
);
2399 if (r
== NULL
&& pf_step_out_of_anchor(&asd
, &ruleset
,
2400 PF_RULESET_SCRUB
, &r
, NULL
, NULL
))
2404 if (rm
== NULL
|| rm
->action
== PF_NOSCRUB
)
2407 r
->packets
[dir
== PF_OUT
]++;
2408 r
->bytes
[dir
== PF_OUT
] += pd
->tot_len
;
2411 if (rm
->rule_flag
& PFRULE_REASSEMBLE_TCP
)
2412 pd
->flags
|= PFDESC_TCP_NORM
;
2414 flags
= th
->th_flags
;
2415 if (flags
& TH_SYN
) {
2416 /* Illegal packet */
2423 /* Illegal packet */
2424 if (!(flags
& (TH_ACK
|TH_RST
)))
2428 if (!(flags
& TH_ACK
)) {
2429 /* These flags are only valid if ACK is set */
2430 if ((flags
& TH_FIN
) || (flags
& TH_PUSH
) || (flags
& TH_URG
))
2434 /* Check for illegal header length */
2435 if (th
->th_off
< (sizeof (struct tcphdr
) >> 2))
2438 /* If flags changed, or reserved data set, then adjust */
2439 if (flags
!= th
->th_flags
|| th
->th_x2
!= 0) {
2442 ov
= *(u_int16_t
*)(&th
->th_ack
+ 1);
2443 th
->th_flags
= flags
;
2445 nv
= *(u_int16_t
*)(&th
->th_ack
+ 1);
2447 th
->th_sum
= pf_cksum_fixup(th
->th_sum
, ov
, nv
, 0);
2451 /* Remove urgent pointer, if TH_URG is not set */
2452 if (!(flags
& TH_URG
) && th
->th_urp
) {
2453 th
->th_sum
= pf_cksum_fixup(th
->th_sum
, th
->th_urp
, 0, 0);
2458 /* copy back packet headers if we sanitized */
2459 /* Process options */
2461 int rv
= pf_normalize_tcpopt(r
, dir
, kif
, pd
, pbuf
, th
, off
,
2469 if (pf_lazy_makewritable(pd
, pbuf
,
2470 off
+ sizeof (*th
)) == NULL
) {
2471 REASON_SET(&reason
, PFRES_MEMORY
);
2473 PFLOG_PACKET(kif
, h
, pbuf
, AF_INET
, dir
, reason
,
2478 pbuf_copy_back(pbuf
, off
, sizeof (*th
), th
);
2484 REASON_SET(&reason
, PFRES_NORM
);
2485 if (rm
!= NULL
&& r
->log
)
2486 PFLOG_PACKET(kif
, h
, pbuf
, AF_INET
, dir
, reason
, r
, NULL
, NULL
, pd
);
2491 pf_normalize_tcp_init(pbuf_t
*pbuf
, int off
, struct pf_pdesc
*pd
,
2492 struct tcphdr
*th
, struct pf_state_peer
*src
, struct pf_state_peer
*dst
)
2495 u_int32_t tsval
, tsecr
;
2499 VERIFY(src
->scrub
== NULL
);
2501 src
->scrub
= pool_get(&pf_state_scrub_pl
, PR_NOWAIT
);
2502 if (src
->scrub
== NULL
)
2504 bzero(src
->scrub
, sizeof (*src
->scrub
));
2509 struct ip
*h
= pbuf
->pb_data
;
2510 src
->scrub
->pfss_ttl
= h
->ip_ttl
;
2516 struct ip6_hdr
*h
= pbuf
->pb_data
;
2517 src
->scrub
->pfss_ttl
= h
->ip6_hlim
;
2525 * All normalizations below are only begun if we see the start of
2526 * the connections. They must all set an enabled bit in pfss_flags
2528 if ((th
->th_flags
& TH_SYN
) == 0)
2532 if (th
->th_off
> (sizeof (struct tcphdr
) >> 2) && src
->scrub
&&
2533 pf_pull_hdr(pbuf
, off
, hdr
, th
->th_off
<< 2, NULL
, NULL
, pd
->af
)) {
2534 /* Diddle with TCP options */
2536 opt
= hdr
+ sizeof (struct tcphdr
);
2537 hlen
= (th
->th_off
<< 2) - sizeof (struct tcphdr
);
2538 while (hlen
>= TCPOLEN_TIMESTAMP
) {
2540 case TCPOPT_EOL
: /* FALLTHROUGH */
2545 case TCPOPT_TIMESTAMP
:
2546 if (opt
[1] >= TCPOLEN_TIMESTAMP
) {
2547 src
->scrub
->pfss_flags
|=
2549 src
->scrub
->pfss_ts_mod
=
2552 /* note PFSS_PAWS not set yet */
2553 memcpy(&tsval
, &opt
[2],
2554 sizeof (u_int32_t
));
2555 memcpy(&tsecr
, &opt
[6],
2556 sizeof (u_int32_t
));
2557 src
->scrub
->pfss_tsval0
= ntohl(tsval
);
2558 src
->scrub
->pfss_tsval
= ntohl(tsval
);
2559 src
->scrub
->pfss_tsecr
= ntohl(tsecr
);
2560 getmicrouptime(&src
->scrub
->pfss_last
);
2564 hlen
-= MAX(opt
[1], 2);
2565 opt
+= MAX(opt
[1], 2);
2575 pf_normalize_tcp_cleanup(struct pf_state
*state
)
2577 if (state
->src
.scrub
)
2578 pool_put(&pf_state_scrub_pl
, state
->src
.scrub
);
2579 if (state
->dst
.scrub
)
2580 pool_put(&pf_state_scrub_pl
, state
->dst
.scrub
);
2582 /* Someday... flush the TCP segment reassembly descriptors. */
2586 pf_normalize_tcp_stateful(pbuf_t
*pbuf
, int off
, struct pf_pdesc
*pd
,
2587 u_short
*reason
, struct tcphdr
*th
, struct pf_state
*state
,
2588 struct pf_state_peer
*src
, struct pf_state_peer
*dst
, int *writeback
)
2590 struct timeval uptime
;
2591 u_int32_t tsval
= 0, tsecr
= 0;
2592 u_int tsval_from_last
;
2598 VERIFY(src
->scrub
|| dst
->scrub
);
2601 * Enforce the minimum TTL seen for this connection. Negate a common
2602 * technique to evade an intrusion detection system and confuse
2603 * firewall state code.
2609 struct ip
*h
= pbuf
->pb_data
;
2610 if (h
->ip_ttl
> src
->scrub
->pfss_ttl
)
2611 src
->scrub
->pfss_ttl
= h
->ip_ttl
;
2612 h
->ip_ttl
= src
->scrub
->pfss_ttl
;
2620 struct ip6_hdr
*h
= pbuf
->pb_data
;
2621 if (h
->ip6_hlim
> src
->scrub
->pfss_ttl
)
2622 src
->scrub
->pfss_ttl
= h
->ip6_hlim
;
2623 h
->ip6_hlim
= src
->scrub
->pfss_ttl
;
2630 if (th
->th_off
> (sizeof (struct tcphdr
) >> 2) &&
2631 ((src
->scrub
&& (src
->scrub
->pfss_flags
& PFSS_TIMESTAMP
)) ||
2632 (dst
->scrub
&& (dst
->scrub
->pfss_flags
& PFSS_TIMESTAMP
))) &&
2633 pf_pull_hdr(pbuf
, off
, hdr
, th
->th_off
<< 2, NULL
, NULL
, pd
->af
)) {
2634 /* Diddle with TCP options */
2636 opt
= hdr
+ sizeof (struct tcphdr
);
2637 hlen
= (th
->th_off
<< 2) - sizeof (struct tcphdr
);
2638 while (hlen
>= TCPOLEN_TIMESTAMP
) {
2640 case TCPOPT_EOL
: /* FALLTHROUGH */
2645 case TCPOPT_TIMESTAMP
:
2647 * Modulate the timestamps. Can be used for
2648 * NAT detection, OS uptime determination or
2653 /* Huh? Multiple timestamps!? */
2654 if (pf_status
.debug
>= PF_DEBUG_MISC
) {
2655 DPFPRINTF(("multiple TS??"));
2656 pf_print_state(state
);
2659 REASON_SET(reason
, PFRES_TS
);
2662 if (opt
[1] >= TCPOLEN_TIMESTAMP
) {
2663 memcpy(&tsval
, &opt
[2],
2664 sizeof (u_int32_t
));
2665 if (tsval
&& src
->scrub
&&
2666 (src
->scrub
->pfss_flags
&
2668 tsval
= ntohl(tsval
);
2669 pf_change_a(&opt
[2],
2672 src
->scrub
->pfss_ts_mod
),
2677 /* Modulate TS reply iff valid (!0) */
2678 memcpy(&tsecr
, &opt
[6],
2679 sizeof (u_int32_t
));
2680 if (tsecr
&& dst
->scrub
&&
2681 (dst
->scrub
->pfss_flags
&
2683 tsecr
= ntohl(tsecr
)
2684 - dst
->scrub
->pfss_ts_mod
;
2685 pf_change_a(&opt
[6],
2686 &th
->th_sum
, htonl(tsecr
),
2694 hlen
-= MAX(opt
[1], 2);
2695 opt
+= MAX(opt
[1], 2);
2700 /* Copyback the options, caller copys back header */
2701 int optoff
= off
+ sizeof (*th
);
2702 int optlen
= (th
->th_off
<< 2) - sizeof (*th
);
2703 if (pf_lazy_makewritable(pd
, pbuf
, optoff
+ optlen
) ==
2705 REASON_SET(reason
, PFRES_MEMORY
);
2708 *writeback
= optoff
+ optlen
;
2709 pbuf_copy_back(pbuf
, optoff
, optlen
, hdr
+ sizeof(*th
));
2715 * Must invalidate PAWS checks on connections idle for too long.
2716 * The fastest allowed timestamp clock is 1ms. That turns out to
2717 * be about 24 days before it wraps. XXX Right now our lowerbound
2718 * TS echo check only works for the first 12 days of a connection
2719 * when the TS has exhausted half its 32bit space
2721 #define TS_MAX_IDLE (24*24*60*60)
2722 #define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */
2724 getmicrouptime(&uptime
);
2725 if (src
->scrub
&& (src
->scrub
->pfss_flags
& PFSS_PAWS
) &&
2726 (uptime
.tv_sec
- src
->scrub
->pfss_last
.tv_sec
> TS_MAX_IDLE
||
2727 pf_time_second() - state
->creation
> TS_MAX_CONN
)) {
2728 if (pf_status
.debug
>= PF_DEBUG_MISC
) {
2729 DPFPRINTF(("src idled out of PAWS\n"));
2730 pf_print_state(state
);
2733 src
->scrub
->pfss_flags
= (src
->scrub
->pfss_flags
& ~PFSS_PAWS
)
2736 if (dst
->scrub
&& (dst
->scrub
->pfss_flags
& PFSS_PAWS
) &&
2737 uptime
.tv_sec
- dst
->scrub
->pfss_last
.tv_sec
> TS_MAX_IDLE
) {
2738 if (pf_status
.debug
>= PF_DEBUG_MISC
) {
2739 DPFPRINTF(("dst idled out of PAWS\n"));
2740 pf_print_state(state
);
2743 dst
->scrub
->pfss_flags
= (dst
->scrub
->pfss_flags
& ~PFSS_PAWS
)
2747 if (got_ts
&& src
->scrub
&& dst
->scrub
&&
2748 (src
->scrub
->pfss_flags
& PFSS_PAWS
) &&
2749 (dst
->scrub
->pfss_flags
& PFSS_PAWS
)) {
2751 * Validate that the timestamps are "in-window".
2752 * RFC1323 describes TCP Timestamp options that allow
2753 * measurement of RTT (round trip time) and PAWS
2754 * (protection against wrapped sequence numbers). PAWS
2755 * gives us a set of rules for rejecting packets on
2756 * long fat pipes (packets that were somehow delayed
2757 * in transit longer than the time it took to send the
2758 * full TCP sequence space of 4Gb). We can use these
2759 * rules and infer a few others that will let us treat
2760 * the 32bit timestamp and the 32bit echoed timestamp
2761 * as sequence numbers to prevent a blind attacker from
2762 * inserting packets into a connection.
2765 * - The timestamp on this packet must be greater than
2766 * or equal to the last value echoed by the other
2767 * endpoint. The RFC says those will be discarded
2768 * since it is a dup that has already been acked.
2769 * This gives us a lowerbound on the timestamp.
2770 * timestamp >= other last echoed timestamp
2771 * - The timestamp will be less than or equal to
2772 * the last timestamp plus the time between the
2773 * last packet and now. The RFC defines the max
2774 * clock rate as 1ms. We will allow clocks to be
2775 * up to 10% fast and will allow a total difference
2776 * or 30 seconds due to a route change. And this
2777 * gives us an upperbound on the timestamp.
2778 * timestamp <= last timestamp + max ticks
2779 * We have to be careful here. Windows will send an
2780 * initial timestamp of zero and then initialize it
2781 * to a random value after the 3whs; presumably to
2782 * avoid a DoS by having to call an expensive RNG
2783 * during a SYN flood. Proof MS has at least one
2784 * good security geek.
2786 * - The TCP timestamp option must also echo the other
2787 * endpoints timestamp. The timestamp echoed is the
2788 * one carried on the earliest unacknowledged segment
2789 * on the left edge of the sequence window. The RFC
2790 * states that the host will reject any echoed
2791 * timestamps that were larger than any ever sent.
2792 * This gives us an upperbound on the TS echo.
2793 * tescr <= largest_tsval
2794 * - The lowerbound on the TS echo is a little more
2795 * tricky to determine. The other endpoint's echoed
2796 * values will not decrease. But there may be
2797 * network conditions that re-order packets and
2798 * cause our view of them to decrease. For now the
2799 * only lowerbound we can safely determine is that
2800 * the TS echo will never be less than the original
2801 * TS. XXX There is probably a better lowerbound.
2802 * Remove TS_MAX_CONN with better lowerbound check.
2803 * tescr >= other original TS
2805 * It is also important to note that the fastest
2806 * timestamp clock of 1ms will wrap its 32bit space in
2807 * 24 days. So we just disable TS checking after 24
2808 * days of idle time. We actually must use a 12d
2809 * connection limit until we can come up with a better
2810 * lowerbound to the TS echo check.
2812 struct timeval delta_ts
;
2817 * PFTM_TS_DIFF is how many seconds of leeway to allow
2818 * a host's timestamp. This can happen if the previous
2819 * packet got delayed in transit for much longer than
2822 if ((ts_fudge
= state
->rule
.ptr
->timeout
[PFTM_TS_DIFF
]) == 0)
2823 ts_fudge
= pf_default_rule
.timeout
[PFTM_TS_DIFF
];
2826 /* Calculate max ticks since the last timestamp */
2827 #define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */
2828 #define TS_MICROSECS 1000000 /* microseconds per second */
2829 timersub(&uptime
, &src
->scrub
->pfss_last
, &delta_ts
);
2830 tsval_from_last
= (delta_ts
.tv_sec
+ ts_fudge
) * TS_MAXFREQ
;
2831 tsval_from_last
+= delta_ts
.tv_usec
/ (TS_MICROSECS
/TS_MAXFREQ
);
2834 if ((src
->state
>= TCPS_ESTABLISHED
&&
2835 dst
->state
>= TCPS_ESTABLISHED
) &&
2836 (SEQ_LT(tsval
, dst
->scrub
->pfss_tsecr
) ||
2837 SEQ_GT(tsval
, src
->scrub
->pfss_tsval
+ tsval_from_last
) ||
2838 (tsecr
&& (SEQ_GT(tsecr
, dst
->scrub
->pfss_tsval
) ||
2839 SEQ_LT(tsecr
, dst
->scrub
->pfss_tsval0
))))) {
2841 * Bad RFC1323 implementation or an insertion attack.
2843 * - Solaris 2.6 and 2.7 are known to send another ACK
2844 * after the FIN,FIN|ACK,ACK closing that carries
2848 DPFPRINTF(("Timestamp failed %c%c%c%c\n",
2849 SEQ_LT(tsval
, dst
->scrub
->pfss_tsecr
) ? '0' : ' ',
2850 SEQ_GT(tsval
, src
->scrub
->pfss_tsval
+
2851 tsval_from_last
) ? '1' : ' ',
2852 SEQ_GT(tsecr
, dst
->scrub
->pfss_tsval
) ? '2' : ' ',
2853 SEQ_LT(tsecr
, dst
->scrub
->pfss_tsval0
)? '3' : ' '));
2854 DPFPRINTF((" tsval: %u tsecr: %u +ticks: %u "
2855 "idle: %lus %ums\n",
2856 tsval
, tsecr
, tsval_from_last
, delta_ts
.tv_sec
,
2857 delta_ts
.tv_usec
/ 1000));
2858 DPFPRINTF((" src->tsval: %u tsecr: %u\n",
2859 src
->scrub
->pfss_tsval
, src
->scrub
->pfss_tsecr
));
2860 DPFPRINTF((" dst->tsval: %u tsecr: %u tsval0: %u\n",
2861 dst
->scrub
->pfss_tsval
, dst
->scrub
->pfss_tsecr
,
2862 dst
->scrub
->pfss_tsval0
));
2863 if (pf_status
.debug
>= PF_DEBUG_MISC
) {
2864 pf_print_state(state
);
2865 pf_print_flags(th
->th_flags
);
2868 REASON_SET(reason
, PFRES_TS
);
2872 /* XXX I'd really like to require tsecr but it's optional */
2874 } else if (!got_ts
&& (th
->th_flags
& TH_RST
) == 0 &&
2875 ((src
->state
== TCPS_ESTABLISHED
&& dst
->state
== TCPS_ESTABLISHED
)
2876 || pd
->p_len
> 0 || (th
->th_flags
& TH_SYN
)) &&
2877 src
->scrub
&& dst
->scrub
&&
2878 (src
->scrub
->pfss_flags
& PFSS_PAWS
) &&
2879 (dst
->scrub
->pfss_flags
& PFSS_PAWS
)) {
2881 * Didn't send a timestamp. Timestamps aren't really useful
2883 * - connection opening or closing (often not even sent).
2884 * but we must not let an attacker to put a FIN on a
2885 * data packet to sneak it through our ESTABLISHED check.
2886 * - on a TCP reset. RFC suggests not even looking at TS.
2887 * - on an empty ACK. The TS will not be echoed so it will
2888 * probably not help keep the RTT calculation in sync and
2889 * there isn't as much danger when the sequence numbers
2890 * got wrapped. So some stacks don't include TS on empty
2893 * To minimize the disruption to mostly RFC1323 conformant
2894 * stacks, we will only require timestamps on data packets.
2896 * And what do ya know, we cannot require timestamps on data
2897 * packets. There appear to be devices that do legitimate
2898 * TCP connection hijacking. There are HTTP devices that allow
2899 * a 3whs (with timestamps) and then buffer the HTTP request.
2900 * If the intermediate device has the HTTP response cache, it
2901 * will spoof the response but not bother timestamping its
2902 * packets. So we can look for the presence of a timestamp in
2903 * the first data packet and if there, require it in all future
2907 if (pd
->p_len
> 0 && (src
->scrub
->pfss_flags
& PFSS_DATA_TS
)) {
2909 * Hey! Someone tried to sneak a packet in. Or the
2910 * stack changed its RFC1323 behavior?!?!
2912 if (pf_status
.debug
>= PF_DEBUG_MISC
) {
2913 DPFPRINTF(("Did not receive expected RFC1323 "
2915 pf_print_state(state
);
2916 pf_print_flags(th
->th_flags
);
2919 REASON_SET(reason
, PFRES_TS
);
2926 * We will note if a host sends his data packets with or without
2927 * timestamps. And require all data packets to contain a timestamp
2928 * if the first does. PAWS implicitly requires that all data packets be
2929 * timestamped. But I think there are middle-man devices that hijack
2930 * TCP streams immediately after the 3whs and don't timestamp their
2931 * packets (seen in a WWW accelerator or cache).
2933 if (pd
->p_len
> 0 && src
->scrub
&& (src
->scrub
->pfss_flags
&
2934 (PFSS_TIMESTAMP
|PFSS_DATA_TS
|PFSS_DATA_NOTS
)) == PFSS_TIMESTAMP
) {
2936 src
->scrub
->pfss_flags
|= PFSS_DATA_TS
;
2938 src
->scrub
->pfss_flags
|= PFSS_DATA_NOTS
;
2939 if (pf_status
.debug
>= PF_DEBUG_MISC
&& dst
->scrub
&&
2940 (dst
->scrub
->pfss_flags
& PFSS_TIMESTAMP
)) {
2941 /* Don't warn if other host rejected RFC1323 */
2942 DPFPRINTF(("Broken RFC1323 stack did not "
2943 "timestamp data packet. Disabled PAWS "
2945 pf_print_state(state
);
2946 pf_print_flags(th
->th_flags
);
2954 * Update PAWS values
2956 if (got_ts
&& src
->scrub
&& PFSS_TIMESTAMP
== (src
->scrub
->pfss_flags
&
2957 (PFSS_PAWS_IDLED
|PFSS_TIMESTAMP
))) {
2958 getmicrouptime(&src
->scrub
->pfss_last
);
2959 if (SEQ_GEQ(tsval
, src
->scrub
->pfss_tsval
) ||
2960 (src
->scrub
->pfss_flags
& PFSS_PAWS
) == 0)
2961 src
->scrub
->pfss_tsval
= tsval
;
2964 if (SEQ_GEQ(tsecr
, src
->scrub
->pfss_tsecr
) ||
2965 (src
->scrub
->pfss_flags
& PFSS_PAWS
) == 0)
2966 src
->scrub
->pfss_tsecr
= tsecr
;
2968 if ((src
->scrub
->pfss_flags
& PFSS_PAWS
) == 0 &&
2969 (SEQ_LT(tsval
, src
->scrub
->pfss_tsval0
) ||
2970 src
->scrub
->pfss_tsval0
== 0)) {
2971 /* tsval0 MUST be the lowest timestamp */
2972 src
->scrub
->pfss_tsval0
= tsval
;
2975 /* Only fully initialized after a TS gets echoed */
2976 if ((src
->scrub
->pfss_flags
& PFSS_PAWS
) == 0)
2977 src
->scrub
->pfss_flags
|= PFSS_PAWS
;
2981 /* I have a dream.... TCP segment reassembly.... */
2986 pf_normalize_tcpopt(struct pf_rule
*r
, int dir
, struct pfi_kif
*kif
,
2987 struct pf_pdesc
*pd
, pbuf_t
*pbuf
, struct tcphdr
*th
, int off
,
2990 #pragma unused(dir, kif)
2991 sa_family_t af
= pd
->af
;
2994 int opt
, cnt
, optlen
= 0;
2996 u_char opts
[MAX_TCPOPTLEN
];
2997 u_char
*optp
= opts
;
2999 thoff
= th
->th_off
<< 2;
3000 cnt
= thoff
- sizeof (struct tcphdr
);
3002 if (cnt
> 0 && !pf_pull_hdr(pbuf
, off
+ sizeof (*th
), opts
, cnt
,
3006 for (; cnt
> 0; cnt
-= optlen
, optp
+= optlen
) {
3008 if (opt
== TCPOPT_EOL
)
3010 if (opt
== TCPOPT_NOP
)
3016 if (optlen
< 2 || optlen
> cnt
)
3021 mss
= (u_int16_t
*)(void *)(optp
+ 2);
3022 if ((ntohs(*mss
)) > r
->max_mss
) {
3025 * Only do the TCP checksum fixup if delayed
3026 * checksum calculation will not be performed.
3029 !(*pbuf
->pb_csum_flags
& CSUM_TCP
))
3030 th
->th_sum
= pf_cksum_fixup(th
->th_sum
,
3031 *mss
, htons(r
->max_mss
), 0);
3032 *mss
= htons(r
->max_mss
);
3044 VERIFY(pbuf
== pd
->mp
);
3046 if (pf_lazy_makewritable(pd
, pd
->mp
,
3047 off
+ sizeof (*th
) + thoff
) == NULL
) {
3048 REASON_SET(&reason
, PFRES_MEMORY
);
3050 PFLOG_PACKET(kif
, h
, pbuf
, AF_INET
, dir
, reason
,
3056 pbuf_copy_back(pd
->mp
, off
+ sizeof (*th
), thoff
- sizeof (*th
), opts
);