]>
git.saurik.com Git - apple/xnu.git/blob - bsd/net/pf_norm.c
2 * Copyright (c) 2007-2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 /* $apfw: pf_norm.c,v 1.10 2008/08/28 19:10:53 jhw Exp $ */
30 /* $OpenBSD: pf_norm.c,v 1.107 2006/04/16 00:59:52 pascoe Exp $ */
33 * Copyright 2001 Niels Provos <provos@citi.umich.edu>
34 * All rights reserved.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
45 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
46 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
47 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
48 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
49 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
50 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
51 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
52 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
53 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
54 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
57 #include <sys/param.h>
58 #include <sys/systm.h>
60 #include <sys/filio.h>
61 #include <sys/fcntl.h>
62 #include <sys/socket.h>
63 #include <sys/kernel.h>
65 #include <sys/random.h>
66 #include <sys/mcache.h>
69 #include <net/if_types.h>
71 #include <net/route.h>
72 #include <net/if_pflog.h>
74 #include <netinet/in.h>
75 #include <netinet/in_var.h>
76 #include <netinet/in_systm.h>
77 #include <netinet/ip.h>
78 #include <netinet/ip_var.h>
79 #include <netinet/tcp.h>
80 #include <netinet/tcp_seq.h>
81 #include <netinet/tcp_fsm.h>
82 #include <netinet/udp.h>
83 #include <netinet/ip_icmp.h>
85 #include <netinet/ip6.h>
86 #include <netinet6/ip6_var.h>
88 #include <net/pfvar.h>
91 LIST_ENTRY(pf_frent
) fr_next
;
93 #define fr_ip fr_u.fru_ipv4
94 #define fr_ip6 fr_u.fru_ipv6
97 struct ip6_hdr
*fru_ipv6
;
99 struct ip6_frag fr_ip6f_opt
;
100 uint16_t fr_ip6f_hlen
; /* total header length */
101 uint16_t fr_ip6f_extoff
; /* last extension header offset or 0 */
105 LIST_ENTRY(pf_frcache
) fr_next
;
110 #define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */
111 #define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */
112 #define PFFRAG_DROP 0x0004 /* Drop all fragments */
113 #define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER))
116 RB_ENTRY(pf_fragment
) fr_entry
;
117 TAILQ_ENTRY(pf_fragment
) frag_next
;
118 struct pf_addr fr_srcx
;
119 struct pf_addr fr_dstx
;
120 u_int8_t fr_p
; /* protocol of this fragment */
121 u_int8_t fr_flags
; /* status flags */
122 u_int16_t fr_max
; /* fragment data max */
123 #define fr_id fr_uid.fru_id4
124 #define fr_id6 fr_uid.fru_id6
130 u_int32_t fr_timeout
;
131 #define fr_queue fr_u.fru_queue
132 #define fr_cache fr_u.fru_cache
134 LIST_HEAD(pf_fragq
, pf_frent
) fru_queue
; /* buffering */
135 LIST_HEAD(pf_cacheq
, pf_frcache
) fru_cache
; /* non-buf */
137 uint32_t fr_csum_flags
; /* checksum flags */
138 uint32_t fr_csum
; /* partial checksum value */
139 uint16_t fr_ip6_maxlen
; /* maximum length of a single fragment in IPv6 */
142 static TAILQ_HEAD(pf_fragqueue
, pf_fragment
) pf_fragqueue
;
143 static TAILQ_HEAD(pf_cachequeue
, pf_fragment
) pf_cachequeue
;
145 static __inline
int pf_frag_compare(struct pf_fragment
*,
146 struct pf_fragment
*);
147 static RB_HEAD(pf_frag_tree
, pf_fragment
) pf_frag_tree
, pf_cache_tree
;
148 RB_PROTOTYPE_SC(__private_extern__
, pf_frag_tree
, pf_fragment
, fr_entry
,
150 RB_GENERATE(pf_frag_tree
, pf_fragment
, fr_entry
, pf_frag_compare
);
152 /* Private prototypes */
153 static void pf_ip6hdr2key(struct pf_fragment
*, struct ip6_hdr
*,
155 static void pf_ip2key(struct pf_fragment
*, struct ip
*);
156 static void pf_remove_fragment(struct pf_fragment
*);
157 static void pf_flush_fragments(void);
158 static void pf_free_fragment(struct pf_fragment
*);
159 static struct pf_fragment
*pf_find_fragment_by_key(struct pf_fragment
*,
160 struct pf_frag_tree
*);
161 static __inline
struct pf_fragment
*
162 pf_find_fragment_by_ipv4_header(struct ip
*, struct pf_frag_tree
*);
163 static struct mbuf
*pf_reassemble(struct mbuf
*, struct pf_fragment
**,
164 struct pf_frent
*, int);
165 static struct mbuf
*pf_fragcache(struct mbuf
**, struct ip
*,
166 struct pf_fragment
**, int, int, int *);
167 static int pf_normalize_tcpopt(struct pf_rule
*, int, struct pfi_kif
*,
168 struct pf_pdesc
*, pbuf_t
*, struct tcphdr
*, int, int *);
169 static __inline
struct pf_fragment
*
170 pf_find_fragment_by_ipv6_header(struct ip6_hdr
*, struct ip6_frag
*,
171 struct pf_frag_tree
*);
172 static struct mbuf
*pf_reassemble6(struct mbuf
**, struct pf_fragment
**,
173 struct pf_frent
*, int);
174 static struct mbuf
*pf_frag6cache(struct mbuf
**, struct ip6_hdr
*,
175 struct ip6_frag
*, struct pf_fragment
**, int, int, int, int *);
177 #define DPFPRINTF(x) do { \
178 if (pf_status.debug >= PF_DEBUG_MISC) { \
179 printf("%s: ", __func__); \
185 struct pool pf_frent_pl
, pf_frag_pl
;
186 static struct pool pf_cache_pl
, pf_cent_pl
;
187 struct pool pf_state_scrub_pl
;
189 static int pf_nfrents
, pf_ncache
;
192 pf_normalize_init(void)
194 pool_init(&pf_frent_pl
, sizeof(struct pf_frent
), 0, 0, 0, "pffrent",
196 pool_init(&pf_frag_pl
, sizeof(struct pf_fragment
), 0, 0, 0, "pffrag",
198 pool_init(&pf_cache_pl
, sizeof(struct pf_fragment
), 0, 0, 0,
200 pool_init(&pf_cent_pl
, sizeof(struct pf_frcache
), 0, 0, 0, "pffrcent",
202 pool_init(&pf_state_scrub_pl
, sizeof(struct pf_state_scrub
), 0, 0, 0,
205 pool_sethiwat(&pf_frag_pl
, PFFRAG_FRAG_HIWAT
);
206 pool_sethardlimit(&pf_frent_pl
, PFFRAG_FRENT_HIWAT
, NULL
, 0);
207 pool_sethardlimit(&pf_cache_pl
, PFFRAG_FRCACHE_HIWAT
, NULL
, 0);
208 pool_sethardlimit(&pf_cent_pl
, PFFRAG_FRCENT_HIWAT
, NULL
, 0);
210 TAILQ_INIT(&pf_fragqueue
);
211 TAILQ_INIT(&pf_cachequeue
);
216 pf_normalize_destroy(void)
218 pool_destroy(&pf_state_scrub_pl
);
219 pool_destroy(&pf_cent_pl
);
220 pool_destroy(&pf_cache_pl
);
221 pool_destroy(&pf_frag_pl
);
222 pool_destroy(&pf_frent_pl
);
227 pf_normalize_isempty(void)
229 return TAILQ_EMPTY(&pf_fragqueue
) && TAILQ_EMPTY(&pf_cachequeue
);
233 pf_frag_compare(struct pf_fragment
*a
, struct pf_fragment
*b
)
237 if ((diff
= a
->fr_af
- b
->fr_af
)) {
239 } else if ((diff
= a
->fr_p
- b
->fr_p
)) {
242 struct pf_addr
*sa
= &a
->fr_srcx
;
243 struct pf_addr
*sb
= &b
->fr_srcx
;
244 struct pf_addr
*da
= &a
->fr_dstx
;
245 struct pf_addr
*db
= &b
->fr_dstx
;
250 if ((diff
= a
->fr_id
- b
->fr_id
)) {
252 } else if (sa
->v4addr
.s_addr
< sb
->v4addr
.s_addr
) {
254 } else if (sa
->v4addr
.s_addr
> sb
->v4addr
.s_addr
) {
256 } else if (da
->v4addr
.s_addr
< db
->v4addr
.s_addr
) {
258 } else if (da
->v4addr
.s_addr
> db
->v4addr
.s_addr
) {
264 if ((diff
= a
->fr_id6
- b
->fr_id6
)) {
266 } else if (sa
->addr32
[3] < sb
->addr32
[3]) {
268 } else if (sa
->addr32
[3] > sb
->addr32
[3]) {
270 } else if (sa
->addr32
[2] < sb
->addr32
[2]) {
272 } else if (sa
->addr32
[2] > sb
->addr32
[2]) {
274 } else if (sa
->addr32
[1] < sb
->addr32
[1]) {
276 } else if (sa
->addr32
[1] > sb
->addr32
[1]) {
278 } else if (sa
->addr32
[0] < sb
->addr32
[0]) {
280 } else if (sa
->addr32
[0] > sb
->addr32
[0]) {
282 } else if (da
->addr32
[3] < db
->addr32
[3]) {
284 } else if (da
->addr32
[3] > db
->addr32
[3]) {
286 } else if (da
->addr32
[2] < db
->addr32
[2]) {
288 } else if (da
->addr32
[2] > db
->addr32
[2]) {
290 } else if (da
->addr32
[1] < db
->addr32
[1]) {
292 } else if (da
->addr32
[1] > db
->addr32
[1]) {
294 } else if (da
->addr32
[0] < db
->addr32
[0]) {
296 } else if (da
->addr32
[0] > db
->addr32
[0]) {
301 VERIFY(!0 && "only IPv4 and IPv6 supported!");
309 pf_purge_expired_fragments(void)
311 struct pf_fragment
*frag
;
312 u_int32_t expire
= pf_time_second() -
313 pf_default_rule
.timeout
[PFTM_FRAG
];
315 while ((frag
= TAILQ_LAST(&pf_fragqueue
, pf_fragqueue
)) != NULL
) {
316 VERIFY(BUFFER_FRAGMENTS(frag
));
317 if (frag
->fr_timeout
> expire
) {
321 switch (frag
->fr_af
) {
323 DPFPRINTF(("expiring IPv4 %d(0x%llx) from queue.\n",
325 (uint64_t)VM_KERNEL_ADDRPERM(frag
)));
328 DPFPRINTF(("expiring IPv6 %d(0x%llx) from queue.\n",
330 (uint64_t)VM_KERNEL_ADDRPERM(frag
)));
333 VERIFY(0 && "only IPv4 and IPv6 supported");
336 pf_free_fragment(frag
);
339 while ((frag
= TAILQ_LAST(&pf_cachequeue
, pf_cachequeue
)) != NULL
) {
340 VERIFY(!BUFFER_FRAGMENTS(frag
));
341 if (frag
->fr_timeout
> expire
) {
345 switch (frag
->fr_af
) {
347 DPFPRINTF(("expiring IPv4 %d(0x%llx) from cache.\n",
349 (uint64_t)VM_KERNEL_ADDRPERM(frag
)));
352 DPFPRINTF(("expiring IPv6 %d(0x%llx) from cache.\n",
354 (uint64_t)VM_KERNEL_ADDRPERM(frag
)));
357 VERIFY(0 && "only IPv4 and IPv6 supported");
360 pf_free_fragment(frag
);
361 VERIFY(TAILQ_EMPTY(&pf_cachequeue
) ||
362 TAILQ_LAST(&pf_cachequeue
, pf_cachequeue
) != frag
);
367 * Try to flush old fragments to make space for new ones
371 pf_flush_fragments(void)
373 struct pf_fragment
*frag
;
376 goal
= pf_nfrents
* 9 / 10;
377 DPFPRINTF(("trying to free > %d frents\n",
379 while (goal
< pf_nfrents
) {
380 frag
= TAILQ_LAST(&pf_fragqueue
, pf_fragqueue
);
384 pf_free_fragment(frag
);
388 goal
= pf_ncache
* 9 / 10;
389 DPFPRINTF(("trying to free > %d cache entries\n",
391 while (goal
< pf_ncache
) {
392 frag
= TAILQ_LAST(&pf_cachequeue
, pf_cachequeue
);
396 pf_free_fragment(frag
);
400 /* Frees the fragments and all associated entries */
403 pf_free_fragment(struct pf_fragment
*frag
)
405 struct pf_frent
*frent
;
406 struct pf_frcache
*frcache
;
408 /* Free all fragments */
409 if (BUFFER_FRAGMENTS(frag
)) {
410 for (frent
= LIST_FIRST(&frag
->fr_queue
); frent
;
411 frent
= LIST_FIRST(&frag
->fr_queue
)) {
412 LIST_REMOVE(frent
, fr_next
);
414 m_freem(frent
->fr_m
);
415 pool_put(&pf_frent_pl
, frent
);
419 for (frcache
= LIST_FIRST(&frag
->fr_cache
); frcache
;
420 frcache
= LIST_FIRST(&frag
->fr_cache
)) {
421 LIST_REMOVE(frcache
, fr_next
);
423 VERIFY(LIST_EMPTY(&frag
->fr_cache
) ||
424 LIST_FIRST(&frag
->fr_cache
)->fr_off
>
427 pool_put(&pf_cent_pl
, frcache
);
432 pf_remove_fragment(frag
);
436 pf_ip6hdr2key(struct pf_fragment
*key
, struct ip6_hdr
*ip6
,
439 key
->fr_p
= fh
->ip6f_nxt
;
440 key
->fr_id6
= fh
->ip6f_ident
;
441 key
->fr_af
= AF_INET6
;
442 key
->fr_srcx
.v6addr
= ip6
->ip6_src
;
443 key
->fr_dstx
.v6addr
= ip6
->ip6_dst
;
447 pf_ip2key(struct pf_fragment
*key
, struct ip
*ip
)
449 key
->fr_p
= ip
->ip_p
;
450 key
->fr_id
= ip
->ip_id
;
451 key
->fr_af
= AF_INET
;
452 key
->fr_srcx
.v4addr
.s_addr
= ip
->ip_src
.s_addr
;
453 key
->fr_dstx
.v4addr
.s_addr
= ip
->ip_dst
.s_addr
;
456 static struct pf_fragment
*
457 pf_find_fragment_by_key(struct pf_fragment
*key
, struct pf_frag_tree
*tree
)
459 struct pf_fragment
*frag
;
461 frag
= RB_FIND(pf_frag_tree
, tree
, key
);
463 /* XXX Are we sure we want to update the timeout? */
464 frag
->fr_timeout
= pf_time_second();
465 if (BUFFER_FRAGMENTS(frag
)) {
466 TAILQ_REMOVE(&pf_fragqueue
, frag
, frag_next
);
467 TAILQ_INSERT_HEAD(&pf_fragqueue
, frag
, frag_next
);
469 TAILQ_REMOVE(&pf_cachequeue
, frag
, frag_next
);
470 TAILQ_INSERT_HEAD(&pf_cachequeue
, frag
, frag_next
);
477 static __inline
struct pf_fragment
*
478 pf_find_fragment_by_ipv4_header(struct ip
*ip
, struct pf_frag_tree
*tree
)
480 struct pf_fragment key
;
482 return pf_find_fragment_by_key(&key
, tree
);
485 /* Removes a fragment from the fragment queue and frees the fragment */
487 pf_remove_fragment(struct pf_fragment
*frag
)
489 if (BUFFER_FRAGMENTS(frag
)) {
490 RB_REMOVE(pf_frag_tree
, &pf_frag_tree
, frag
);
491 TAILQ_REMOVE(&pf_fragqueue
, frag
, frag_next
);
492 pool_put(&pf_frag_pl
, frag
);
494 RB_REMOVE(pf_frag_tree
, &pf_cache_tree
, frag
);
495 TAILQ_REMOVE(&pf_cachequeue
, frag
, frag_next
);
496 pool_put(&pf_cache_pl
, frag
);
500 #define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
502 pf_reassemble(struct mbuf
*m0
, struct pf_fragment
**frag
,
503 struct pf_frent
*frent
, int mff
)
505 struct mbuf
*m
= m0
, *m2
;
506 struct pf_frent
*frea
, *next
;
507 struct pf_frent
*frep
= NULL
;
508 struct ip
*ip
= frent
->fr_ip
;
509 uint32_t hlen
= ip
->ip_hl
<< 2;
510 u_int16_t off
= (ntohs(ip
->ip_off
) & IP_OFFMASK
) << 3;
511 u_int16_t ip_len
= ntohs(ip
->ip_len
) - ip
->ip_hl
* 4;
512 u_int16_t fr_max
= ip_len
+ off
;
513 uint32_t csum
, csum_flags
;
515 VERIFY(*frag
== NULL
|| BUFFER_FRAGMENTS(*frag
));
518 * Leverage partial checksum offload for IP fragments. Narrow down
519 * the scope to cover only UDP without IP options, as that is the
522 * Perform 1's complement adjustment of octets that got included/
523 * excluded in the hardware-calculated checksum value. Ignore cases
524 * where the value includes the entire IPv4 header span, as the sum
525 * for those octets would already be 0 by the time we get here; IP
526 * has already performed its header checksum validation. Also take
527 * care of any trailing bytes and subtract out their partial sum.
529 if (ip
->ip_p
== IPPROTO_UDP
&& hlen
== sizeof(struct ip
) &&
530 (m
->m_pkthdr
.csum_flags
&
531 (CSUM_DATA_VALID
| CSUM_PARTIAL
| CSUM_PSEUDO_HDR
)) ==
532 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
533 uint32_t start
= m
->m_pkthdr
.csum_rx_start
;
534 int32_t trailer
= (m_pktlen(m
) - ntohs(ip
->ip_len
));
535 uint32_t swbytes
= (uint32_t)trailer
;
537 csum
= m
->m_pkthdr
.csum_rx_val
;
539 ASSERT(trailer
>= 0);
540 if ((start
!= 0 && start
!= hlen
) || trailer
!= 0) {
541 #if BYTE_ORDER != BIG_ENDIAN
546 #endif /* BYTE_ORDER != BIG_ENDIAN */
547 /* callee folds in sum */
548 csum
= m_adj_sum16(m
, start
, hlen
,
549 (ip
->ip_len
- hlen
), csum
);
551 swbytes
+= (hlen
- start
);
553 swbytes
+= (start
- hlen
);
555 #if BYTE_ORDER != BIG_ENDIAN
560 #endif /* BYTE_ORDER != BIG_ENDIAN */
562 csum_flags
= m
->m_pkthdr
.csum_flags
;
565 udp_in_cksum_stats(swbytes
);
575 /* Invalidate checksum */
576 m
->m_pkthdr
.csum_flags
&= ~CSUM_DATA_VALID
;
578 /* Strip off ip header */
582 /* Create a new reassembly queue for this packet */
584 *frag
= pool_get(&pf_frag_pl
, PR_NOWAIT
);
586 pf_flush_fragments();
587 *frag
= pool_get(&pf_frag_pl
, PR_NOWAIT
);
593 (*frag
)->fr_flags
= 0;
595 (*frag
)->fr_af
= AF_INET
;
596 (*frag
)->fr_srcx
.v4addr
= frent
->fr_ip
->ip_src
;
597 (*frag
)->fr_dstx
.v4addr
= frent
->fr_ip
->ip_dst
;
598 (*frag
)->fr_p
= frent
->fr_ip
->ip_p
;
599 (*frag
)->fr_id
= frent
->fr_ip
->ip_id
;
600 (*frag
)->fr_timeout
= pf_time_second();
601 if (csum_flags
!= 0) {
602 (*frag
)->fr_csum_flags
= csum_flags
;
603 (*frag
)->fr_csum
= csum
;
605 LIST_INIT(&(*frag
)->fr_queue
);
607 RB_INSERT(pf_frag_tree
, &pf_frag_tree
, *frag
);
608 TAILQ_INSERT_HEAD(&pf_fragqueue
, *frag
, frag_next
);
610 /* We do not have a previous fragment */
616 * If this fragment contains similar checksum offload info
617 * as that of the existing ones, accumulate checksum. Otherwise,
618 * invalidate checksum offload info for the entire datagram.
620 if (csum_flags
!= 0 && csum_flags
== (*frag
)->fr_csum_flags
) {
621 (*frag
)->fr_csum
+= csum
;
622 } else if ((*frag
)->fr_csum_flags
!= 0) {
623 (*frag
)->fr_csum_flags
= 0;
627 * Find a fragment after the current one:
628 * - off contains the real shifted offset.
630 LIST_FOREACH(frea
, &(*frag
)->fr_queue
, fr_next
) {
631 if (FR_IP_OFF(frea
) > off
) {
637 VERIFY(frep
!= NULL
|| frea
!= NULL
);
640 FR_IP_OFF(frep
) + ntohs(frep
->fr_ip
->ip_len
) - frep
->fr_ip
->ip_hl
*
644 precut
= FR_IP_OFF(frep
) + ntohs(frep
->fr_ip
->ip_len
) -
645 frep
->fr_ip
->ip_hl
* 4 - off
;
646 if (precut
>= ip_len
) {
649 m_adj(frent
->fr_m
, precut
);
650 DPFPRINTF(("overlap -%d\n", precut
));
651 /* Enforce 8 byte boundaries */
652 ip
->ip_off
= htons(ntohs(ip
->ip_off
) + (precut
>> 3));
653 off
= (ntohs(ip
->ip_off
) & IP_OFFMASK
) << 3;
655 ip
->ip_len
= htons(ip_len
);
658 for (; frea
!= NULL
&& ip_len
+ off
> FR_IP_OFF(frea
);
662 aftercut
= ip_len
+ off
- FR_IP_OFF(frea
);
663 DPFPRINTF(("adjust overlap %d\n", aftercut
));
664 if (aftercut
< ntohs(frea
->fr_ip
->ip_len
) - frea
->fr_ip
->ip_hl
666 frea
->fr_ip
->ip_len
=
667 htons(ntohs(frea
->fr_ip
->ip_len
) - aftercut
);
668 frea
->fr_ip
->ip_off
= htons(ntohs(frea
->fr_ip
->ip_off
) +
670 m_adj(frea
->fr_m
, aftercut
);
674 /* This fragment is completely overlapped, lose it */
675 next
= LIST_NEXT(frea
, fr_next
);
677 LIST_REMOVE(frea
, fr_next
);
678 pool_put(&pf_frent_pl
, frea
);
683 /* Update maximum data size */
684 if ((*frag
)->fr_max
< fr_max
) {
685 (*frag
)->fr_max
= fr_max
;
687 /* This is the last segment */
689 (*frag
)->fr_flags
|= PFFRAG_SEENLAST
;
693 LIST_INSERT_HEAD(&(*frag
)->fr_queue
, frent
, fr_next
);
695 LIST_INSERT_AFTER(frep
, frent
, fr_next
);
698 /* Check if we are completely reassembled */
699 if (!((*frag
)->fr_flags
& PFFRAG_SEENLAST
)) {
703 /* Check if we have all the data */
705 for (frep
= LIST_FIRST(&(*frag
)->fr_queue
); frep
; frep
= next
) {
706 next
= LIST_NEXT(frep
, fr_next
);
708 off
+= ntohs(frep
->fr_ip
->ip_len
) - frep
->fr_ip
->ip_hl
* 4;
709 if (off
< (*frag
)->fr_max
&&
710 (next
== NULL
|| FR_IP_OFF(next
) != off
)) {
711 DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
712 off
, next
== NULL
? -1 : FR_IP_OFF(next
),
717 DPFPRINTF(("%d < %d?\n", off
, (*frag
)->fr_max
));
718 if (off
< (*frag
)->fr_max
) {
722 /* We have all the data */
723 frent
= LIST_FIRST(&(*frag
)->fr_queue
);
724 VERIFY(frent
!= NULL
);
725 if ((frent
->fr_ip
->ip_hl
<< 2) + off
> IP_MAXPACKET
) {
726 DPFPRINTF(("drop: too big: %d\n", off
));
727 pf_free_fragment(*frag
);
731 next
= LIST_NEXT(frent
, fr_next
);
733 /* Magic from ip_input */
739 pool_put(&pf_frent_pl
, frent
);
741 for (frent
= next
; frent
!= NULL
; frent
= next
) {
742 next
= LIST_NEXT(frent
, fr_next
);
745 pool_put(&pf_frent_pl
, frent
);
750 ip
->ip_src
= (*frag
)->fr_srcx
.v4addr
;
751 ip
->ip_dst
= (*frag
)->fr_dstx
.v4addr
;
753 if ((*frag
)->fr_csum_flags
!= 0) {
754 csum
= (*frag
)->fr_csum
;
758 m
->m_pkthdr
.csum_rx_val
= csum
;
759 m
->m_pkthdr
.csum_rx_start
= sizeof(struct ip
);
760 m
->m_pkthdr
.csum_flags
= (*frag
)->fr_csum_flags
;
761 } else if ((m
->m_pkthdr
.rcvif
->if_flags
& IFF_LOOPBACK
) ||
762 (m
->m_pkthdr
.pkt_flags
& PKTF_LOOP
)) {
763 /* loopback checksums are always OK */
764 m
->m_pkthdr
.csum_data
= 0xffff;
765 m
->m_pkthdr
.csum_flags
=
766 CSUM_DATA_VALID
| CSUM_PSEUDO_HDR
|
767 CSUM_IP_CHECKED
| CSUM_IP_VALID
;
770 /* Remove from fragment queue */
771 pf_remove_fragment(*frag
);
774 hlen
= ip
->ip_hl
<< 2;
775 ip
->ip_len
= htons(off
+ hlen
);
779 /* some debugging cruft by sklower, below, will go away soon */
780 /* XXX this should be done elsewhere */
781 if (m
->m_flags
& M_PKTHDR
) {
783 for (m2
= m
; m2
; m2
= m2
->m_next
) {
786 m
->m_pkthdr
.len
= plen
;
789 DPFPRINTF(("complete: 0x%llx(%d)\n",
790 (uint64_t)VM_KERNEL_ADDRPERM(m
), ntohs(ip
->ip_len
)));
794 /* Oops - fail safe - drop packet */
795 pool_put(&pf_frent_pl
, frent
);
802 pf_fragcache(struct mbuf
**m0
, struct ip
*h
, struct pf_fragment
**frag
, int mff
,
803 int drop
, int *nomem
)
805 struct mbuf
*m
= *m0
;
806 struct pf_frcache
*frp
, *fra
, *cur
= NULL
;
807 int ip_len
= ntohs(h
->ip_len
) - (h
->ip_hl
<< 2);
808 u_int16_t off
= ntohs(h
->ip_off
) << 3;
809 u_int16_t fr_max
= ip_len
+ off
;
812 VERIFY(*frag
== NULL
|| !BUFFER_FRAGMENTS(*frag
));
814 /* Create a new range queue for this packet */
816 *frag
= pool_get(&pf_cache_pl
, PR_NOWAIT
);
818 pf_flush_fragments();
819 *frag
= pool_get(&pf_cache_pl
, PR_NOWAIT
);
825 /* Get an entry for the queue */
826 cur
= pool_get(&pf_cent_pl
, PR_NOWAIT
);
828 pool_put(&pf_cache_pl
, *frag
);
834 (*frag
)->fr_flags
= PFFRAG_NOBUFFER
;
836 (*frag
)->fr_af
= AF_INET
;
837 (*frag
)->fr_srcx
.v4addr
= h
->ip_src
;
838 (*frag
)->fr_dstx
.v4addr
= h
->ip_dst
;
839 (*frag
)->fr_p
= h
->ip_p
;
840 (*frag
)->fr_id
= h
->ip_id
;
841 (*frag
)->fr_timeout
= pf_time_second();
844 cur
->fr_end
= fr_max
;
845 LIST_INIT(&(*frag
)->fr_cache
);
846 LIST_INSERT_HEAD(&(*frag
)->fr_cache
, cur
, fr_next
);
848 RB_INSERT(pf_frag_tree
, &pf_cache_tree
, *frag
);
849 TAILQ_INSERT_HEAD(&pf_cachequeue
, *frag
, frag_next
);
851 DPFPRINTF(("fragcache[%d]: new %d-%d\n", h
->ip_id
, off
,
858 * Find a fragment after the current one:
859 * - off contains the real shifted offset.
862 LIST_FOREACH(fra
, &(*frag
)->fr_cache
, fr_next
) {
863 if (fra
->fr_off
> off
) {
869 VERIFY(frp
!= NULL
|| fra
!= NULL
);
874 precut
= frp
->fr_end
- off
;
875 if (precut
>= ip_len
) {
876 /* Fragment is entirely a duplicate */
877 DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
878 h
->ip_id
, frp
->fr_off
, frp
->fr_end
, off
, fr_max
));
882 /* They are adjacent. Fixup cache entry */
883 DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
884 h
->ip_id
, frp
->fr_off
, frp
->fr_end
, off
, fr_max
));
885 frp
->fr_end
= fr_max
;
886 } else if (precut
> 0) {
888 * The first part of this payload overlaps with a
889 * fragment that has already been passed.
890 * Need to trim off the first part of the payload.
891 * But to do so easily, we need to create another
892 * mbuf to throw the original header into.
895 DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
896 h
->ip_id
, precut
, frp
->fr_off
, frp
->fr_end
, off
,
901 /* Update the previous frag to encompass this one */
902 frp
->fr_end
= fr_max
;
906 * XXX Optimization opportunity
907 * This is a very heavy way to trim the payload.
908 * we could do it much faster by diddling mbuf
909 * internals but that would be even less legible
910 * than this mbuf magic. For my next trick,
911 * I'll pull a rabbit out of my laptop.
913 *m0
= m_copym(m
, 0, h
->ip_hl
<< 2, M_NOWAIT
);
917 VERIFY((*m0
)->m_next
== NULL
);
918 m_adj(m
, precut
+ (h
->ip_hl
<< 2));
921 if (m
->m_flags
& M_PKTHDR
) {
924 for (t
= m
; t
; t
= t
->m_next
) {
927 m
->m_pkthdr
.len
= plen
;
931 h
= mtod(m
, struct ip
*);
934 VERIFY((int)m
->m_len
==
935 ntohs(h
->ip_len
) - precut
);
936 h
->ip_off
= htons(ntohs(h
->ip_off
) +
938 h
->ip_len
= htons(ntohs(h
->ip_len
) - precut
);
943 /* There is a gap between fragments */
945 DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
946 h
->ip_id
, -precut
, frp
->fr_off
, frp
->fr_end
, off
,
949 cur
= pool_get(&pf_cent_pl
, PR_NOWAIT
);
956 cur
->fr_end
= fr_max
;
957 LIST_INSERT_AFTER(frp
, cur
, fr_next
);
965 aftercut
= fr_max
- fra
->fr_off
;
967 /* Adjacent fragments */
968 DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
969 h
->ip_id
, off
, fr_max
, fra
->fr_off
, fra
->fr_end
));
972 } else if (aftercut
> 0) {
973 /* Need to chop off the tail of this fragment */
974 DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
975 h
->ip_id
, aftercut
, off
, fr_max
, fra
->fr_off
,
984 if (m
->m_flags
& M_PKTHDR
) {
987 for (t
= m
; t
; t
= t
->m_next
) {
990 m
->m_pkthdr
.len
= plen
;
992 h
= mtod(m
, struct ip
*);
993 VERIFY((int)m
->m_len
==
994 ntohs(h
->ip_len
) - aftercut
);
995 h
->ip_len
= htons(ntohs(h
->ip_len
) - aftercut
);
999 } else if (frp
== NULL
) {
1000 /* There is a gap between fragments */
1001 DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
1002 h
->ip_id
, -aftercut
, off
, fr_max
, fra
->fr_off
,
1005 cur
= pool_get(&pf_cent_pl
, PR_NOWAIT
);
1012 cur
->fr_end
= fr_max
;
1013 LIST_INSERT_BEFORE(fra
, cur
, fr_next
);
1017 /* Need to glue together two separate fragment descriptors */
1019 if (cur
&& fra
->fr_off
<= cur
->fr_end
) {
1020 /* Need to merge in a previous 'cur' */
1021 DPFPRINTF(("fragcache[%d]: adjacent(merge "
1022 "%d-%d) %d-%d (%d-%d)\n",
1023 h
->ip_id
, cur
->fr_off
, cur
->fr_end
, off
,
1024 fr_max
, fra
->fr_off
, fra
->fr_end
));
1025 fra
->fr_off
= cur
->fr_off
;
1026 LIST_REMOVE(cur
, fr_next
);
1027 pool_put(&pf_cent_pl
, cur
);
1030 } else if (frp
&& fra
->fr_off
<= frp
->fr_end
) {
1031 /* Need to merge in a modified 'frp' */
1032 VERIFY(cur
== NULL
);
1033 DPFPRINTF(("fragcache[%d]: adjacent(merge "
1034 "%d-%d) %d-%d (%d-%d)\n",
1035 h
->ip_id
, frp
->fr_off
, frp
->fr_end
, off
,
1036 fr_max
, fra
->fr_off
, fra
->fr_end
));
1037 fra
->fr_off
= frp
->fr_off
;
1038 LIST_REMOVE(frp
, fr_next
);
1039 pool_put(&pf_cent_pl
, frp
);
1048 * We must keep tracking the overall fragment even when
1049 * we're going to drop it anyway so that we know when to
1050 * free the overall descriptor. Thus we drop the frag late.
1057 /* Update maximum data size */
1058 if ((*frag
)->fr_max
< fr_max
) {
1059 (*frag
)->fr_max
= fr_max
;
1062 /* This is the last segment */
1064 (*frag
)->fr_flags
|= PFFRAG_SEENLAST
;
1067 /* Check if we are completely reassembled */
1068 if (((*frag
)->fr_flags
& PFFRAG_SEENLAST
) &&
1069 LIST_FIRST(&(*frag
)->fr_cache
)->fr_off
== 0 &&
1070 LIST_FIRST(&(*frag
)->fr_cache
)->fr_end
== (*frag
)->fr_max
) {
1071 /* Remove from fragment queue */
1072 DPFPRINTF(("fragcache[%d]: done 0-%d\n", h
->ip_id
,
1074 pf_free_fragment(*frag
);
1083 /* Still need to pay attention to !IP_MF */
1084 if (!mff
&& *frag
!= NULL
) {
1085 (*frag
)->fr_flags
|= PFFRAG_SEENLAST
;
1093 /* Still need to pay attention to !IP_MF */
1094 if (!mff
&& *frag
!= NULL
) {
1095 (*frag
)->fr_flags
|= PFFRAG_SEENLAST
;
1099 /* This fragment has been deemed bad. Don't reass */
1100 if (((*frag
)->fr_flags
& PFFRAG_DROP
) == 0) {
1101 DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
1104 (*frag
)->fr_flags
|= PFFRAG_DROP
;
1111 #define FR_IP6_OFF(fr) \
1112 (ntohs((fr)->fr_ip6f_opt.ip6f_offlg & IP6F_OFF_MASK))
1113 #define FR_IP6_PLEN(fr) (ntohs((fr)->fr_ip6->ip6_plen))
1115 pf_reassemble6(struct mbuf
**m0
, struct pf_fragment
**frag
,
1116 struct pf_frent
*frent
, int mff
)
1118 struct mbuf
*m
, *m2
;
1119 struct pf_frent
*frea
, *frep
, *next
;
1120 struct ip6_hdr
*ip6
;
1121 struct ip6_frag
*ip6f
;
1122 int plen
, off
, fr_max
, pktlen
;
1123 uint32_t uoff
, csum
, csum_flags
;
1125 VERIFY(*frag
== NULL
|| BUFFER_FRAGMENTS(*frag
));
1128 ip6
= frent
->fr_ip6
;
1129 ip6f
= &frent
->fr_ip6f_opt
;
1130 off
= FR_IP6_OFF(frent
);
1131 uoff
= frent
->fr_ip6f_hlen
;
1132 plen
= FR_IP6_PLEN(frent
);
1133 fr_max
= off
+ plen
- (frent
->fr_ip6f_hlen
- sizeof(*ip6
));
1134 pktlen
= plen
+ sizeof(*ip6
);
1136 DPFPRINTF(("0x%llx IPv6 frag plen %u off %u fr_ip6f_hlen %u "
1137 "fr_max %u m_len %u\n", (uint64_t)VM_KERNEL_ADDRPERM(m
), plen
, off
,
1138 frent
->fr_ip6f_hlen
, fr_max
, m
->m_len
));
1141 * Leverage partial checksum offload for simple UDP/IP fragments,
1142 * as that is the most common case.
1144 * Perform 1's complement adjustment of octets that got included/
1145 * excluded in the hardware-calculated checksum value. Also take
1146 * care of any trailing bytes and subtract out their partial sum.
1148 if (ip6f
->ip6f_nxt
== IPPROTO_UDP
&&
1149 uoff
== (sizeof(*ip6
) + sizeof(*ip6f
)) &&
1150 (m
->m_pkthdr
.csum_flags
&
1151 (CSUM_DATA_VALID
| CSUM_PARTIAL
| CSUM_PSEUDO_HDR
)) ==
1152 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
1153 uint32_t start
= m
->m_pkthdr
.csum_rx_start
;
1154 uint32_t ip_len
= (sizeof(*ip6
) + ntohs(ip6
->ip6_plen
));
1155 int32_t trailer
= (m_pktlen(m
) - ip_len
);
1156 uint32_t swbytes
= (uint32_t)trailer
;
1158 csum
= m
->m_pkthdr
.csum_rx_val
;
1160 ASSERT(trailer
>= 0);
1161 if (start
!= uoff
|| trailer
!= 0) {
1162 uint16_t s
= 0, d
= 0;
1164 if (IN6_IS_SCOPE_EMBED(&ip6
->ip6_src
)) {
1165 s
= ip6
->ip6_src
.s6_addr16
[1];
1166 ip6
->ip6_src
.s6_addr16
[1] = 0;
1168 if (IN6_IS_SCOPE_EMBED(&ip6
->ip6_dst
)) {
1169 d
= ip6
->ip6_dst
.s6_addr16
[1];
1170 ip6
->ip6_dst
.s6_addr16
[1] = 0;
1173 /* callee folds in sum */
1174 csum
= m_adj_sum16(m
, start
, uoff
,
1175 (ip_len
- uoff
), csum
);
1177 swbytes
+= (uoff
- start
);
1179 swbytes
+= (start
- uoff
);
1182 if (IN6_IS_SCOPE_EMBED(&ip6
->ip6_src
)) {
1183 ip6
->ip6_src
.s6_addr16
[1] = s
;
1185 if (IN6_IS_SCOPE_EMBED(&ip6
->ip6_dst
)) {
1186 ip6
->ip6_dst
.s6_addr16
[1] = d
;
1189 csum_flags
= m
->m_pkthdr
.csum_flags
;
1192 udp_in6_cksum_stats(swbytes
);
1202 /* Invalidate checksum */
1203 m
->m_pkthdr
.csum_flags
&= ~CSUM_DATA_VALID
;
1205 /* strip off headers up to the fragment payload */
1206 m
->m_data
+= frent
->fr_ip6f_hlen
;
1207 m
->m_len
-= frent
->fr_ip6f_hlen
;
1209 /* Create a new reassembly queue for this packet */
1210 if (*frag
== NULL
) {
1211 *frag
= pool_get(&pf_frag_pl
, PR_NOWAIT
);
1212 if (*frag
== NULL
) {
1213 pf_flush_fragments();
1214 *frag
= pool_get(&pf_frag_pl
, PR_NOWAIT
);
1215 if (*frag
== NULL
) {
1220 (*frag
)->fr_flags
= 0;
1221 (*frag
)->fr_max
= 0;
1222 (*frag
)->fr_ip6_maxlen
= pktlen
;
1223 (*frag
)->fr_af
= AF_INET6
;
1224 (*frag
)->fr_srcx
.v6addr
= frent
->fr_ip6
->ip6_src
;
1225 (*frag
)->fr_dstx
.v6addr
= frent
->fr_ip6
->ip6_dst
;
1226 (*frag
)->fr_p
= frent
->fr_ip6f_opt
.ip6f_nxt
;
1227 (*frag
)->fr_id6
= frent
->fr_ip6f_opt
.ip6f_ident
;
1228 (*frag
)->fr_timeout
= pf_time_second();
1229 if (csum_flags
!= 0) {
1230 (*frag
)->fr_csum_flags
= csum_flags
;
1231 (*frag
)->fr_csum
= csum
;
1233 LIST_INIT(&(*frag
)->fr_queue
);
1235 RB_INSERT(pf_frag_tree
, &pf_frag_tree
, *frag
);
1236 TAILQ_INSERT_HEAD(&pf_fragqueue
, *frag
, frag_next
);
1238 /* We do not have a previous fragment */
1243 /* Remember maximum fragment len for refragmentation */
1244 if (pktlen
> (*frag
)->fr_ip6_maxlen
) {
1245 (*frag
)->fr_ip6_maxlen
= pktlen
;
1248 * If this fragment contains similar checksum offload info
1249 * as that of the existing ones, accumulate checksum. Otherwise,
1250 * invalidate checksum offload info for the entire datagram.
1252 if (csum_flags
!= 0 && csum_flags
== (*frag
)->fr_csum_flags
) {
1253 (*frag
)->fr_csum
+= csum
;
1254 } else if ((*frag
)->fr_csum_flags
!= 0) {
1255 (*frag
)->fr_csum_flags
= 0;
1259 * Find a fragment after the current one:
1260 * - off contains the real shifted offset.
1262 LIST_FOREACH(frea
, &(*frag
)->fr_queue
, fr_next
) {
1263 if (FR_IP6_OFF(frea
) > off
) {
1269 VERIFY(frep
!= NULL
|| frea
!= NULL
);
1272 FR_IP6_OFF(frep
) + FR_IP6_PLEN(frep
) - frep
->fr_ip6f_hlen
> off
) {
1275 precut
= FR_IP6_OFF(frep
) + FR_IP6_PLEN(frep
) -
1276 frep
->fr_ip6f_hlen
- off
;
1277 if (precut
>= plen
) {
1280 m_adj(frent
->fr_m
, precut
);
1281 DPFPRINTF(("overlap -%d\n", precut
));
1282 /* Enforce 8 byte boundaries */
1283 frent
->fr_ip6f_opt
.ip6f_offlg
=
1284 htons(ntohs(frent
->fr_ip6f_opt
.ip6f_offlg
) +
1286 off
= FR_IP6_OFF(frent
);
1288 ip6
->ip6_plen
= htons(plen
);
1291 for (; frea
!= NULL
&& plen
+ off
> FR_IP6_OFF(frea
); frea
= next
) {
1294 aftercut
= plen
+ off
- FR_IP6_OFF(frea
);
1295 DPFPRINTF(("adjust overlap %d\n", aftercut
));
1296 if (aftercut
< FR_IP6_PLEN(frea
) - frea
->fr_ip6f_hlen
) {
1297 frea
->fr_ip6
->ip6_plen
= htons(FR_IP6_PLEN(frea
) -
1299 frea
->fr_ip6f_opt
.ip6f_offlg
=
1300 htons(ntohs(frea
->fr_ip6f_opt
.ip6f_offlg
) +
1302 m_adj(frea
->fr_m
, aftercut
);
1306 /* This fragment is completely overlapped, lose it */
1307 next
= LIST_NEXT(frea
, fr_next
);
1308 m_freem(frea
->fr_m
);
1309 LIST_REMOVE(frea
, fr_next
);
1310 pool_put(&pf_frent_pl
, frea
);
1315 /* Update maximum data size */
1316 if ((*frag
)->fr_max
< fr_max
) {
1317 (*frag
)->fr_max
= fr_max
;
1319 /* This is the last segment */
1321 (*frag
)->fr_flags
|= PFFRAG_SEENLAST
;
1325 LIST_INSERT_HEAD(&(*frag
)->fr_queue
, frent
, fr_next
);
1327 LIST_INSERT_AFTER(frep
, frent
, fr_next
);
1330 /* Check if we are completely reassembled */
1331 if (!((*frag
)->fr_flags
& PFFRAG_SEENLAST
)) {
1335 /* Check if we have all the data */
1337 for (frep
= LIST_FIRST(&(*frag
)->fr_queue
); frep
; frep
= next
) {
1338 next
= LIST_NEXT(frep
, fr_next
);
1339 off
+= FR_IP6_PLEN(frep
) - (frent
->fr_ip6f_hlen
- sizeof *ip6
);
1340 DPFPRINTF(("frep at %d, next %d, max %d\n",
1341 off
, next
== NULL
? -1 : FR_IP6_OFF(next
),
1343 if (off
< (*frag
)->fr_max
&&
1344 (next
== NULL
|| FR_IP6_OFF(next
) != off
)) {
1345 DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
1346 off
, next
== NULL
? -1 : FR_IP6_OFF(next
),
1351 DPFPRINTF(("%d < %d?\n", off
, (*frag
)->fr_max
));
1352 if (off
< (*frag
)->fr_max
) {
1356 /* We have all the data */
1357 frent
= LIST_FIRST(&(*frag
)->fr_queue
);
1358 VERIFY(frent
!= NULL
);
1359 if (frent
->fr_ip6f_hlen
+ off
> IP_MAXPACKET
) {
1360 DPFPRINTF(("drop: too big: %d\n", off
));
1361 pf_free_fragment(*frag
);
1366 ASSERT(*frag
!= NULL
);
1367 ASSERT(frent
!= NULL
);
1368 next
= LIST_NEXT(frent
, fr_next
);
1370 DPFPRINTF(("drop: atomic fragment\n"));
1371 pf_free_fragment(*frag
);
1376 /* retrieve the values to be filled in to reassembled tag */
1377 uint16_t hdrlen
, unfragpartlen
, extoff
, maxlen
;
1380 /* Get total extension header length from the first fragment */
1381 hdrlen
= frent
->fr_ip6f_hlen
- sizeof(struct ip6_frag
);
1383 * Get total extension header length of per-fragment headers from the
1384 * subsequent fragment.
1386 unfragpartlen
= next
->fr_ip6f_hlen
- sizeof(struct ip6_frag
);
1387 extoff
= frent
->fr_ip6f_extoff
;
1388 maxlen
= (*frag
)->fr_ip6_maxlen
;
1389 id
= (*frag
)->fr_id6
;
1391 ip6
= frent
->fr_ip6
;
1392 ip6
->ip6_nxt
= (*frag
)->fr_p
;
1393 ip6
->ip6_plen
= htons(off
);
1394 ip6
->ip6_src
= (*frag
)->fr_srcx
.v6addr
;
1395 ip6
->ip6_dst
= (*frag
)->fr_dstx
.v6addr
;
1397 if ((*frag
)->fr_csum_flags
!= 0) {
1398 csum
= (*frag
)->fr_csum
;
1402 m
->m_pkthdr
.csum_rx_val
= csum
;
1403 m
->m_pkthdr
.csum_rx_start
= sizeof(struct ip6_hdr
);
1404 m
->m_pkthdr
.csum_flags
= (*frag
)->fr_csum_flags
;
1405 } else if ((m
->m_pkthdr
.rcvif
->if_flags
& IFF_LOOPBACK
) ||
1406 (m
->m_pkthdr
.pkt_flags
& PKTF_LOOP
)) {
1407 /* loopback checksums are always OK */
1408 m
->m_pkthdr
.csum_data
= 0xffff;
1409 m
->m_pkthdr
.csum_flags
= CSUM_DATA_VALID
| CSUM_PSEUDO_HDR
;
1412 /* Remove from fragment queue */
1413 pf_remove_fragment(*frag
);
1417 m
->m_len
+= sizeof(struct ip6_hdr
);
1418 m
->m_data
-= sizeof(struct ip6_hdr
);
1419 memmove(m
->m_data
, ip6
, sizeof(struct ip6_hdr
));
1421 next
= LIST_NEXT(frent
, fr_next
);
1422 pool_put(&pf_frent_pl
, frent
);
1424 for (frent
= next
; next
!= NULL
; frent
= next
) {
1428 next
= LIST_NEXT(frent
, fr_next
);
1429 pool_put(&pf_frent_pl
, frent
);
1433 /* XXX this should be done elsewhere */
1434 if (m
->m_flags
& M_PKTHDR
) {
1436 for (m2
= m
; m2
; m2
= m2
->m_next
) {
1439 m
->m_pkthdr
.len
= len
;
1442 DPFPRINTF(("complete: 0x%llx ip6_plen %d m_pkthdr.len %d\n",
1443 (uint64_t)VM_KERNEL_ADDRPERM(m
), ntohs(ip6
->ip6_plen
),
1446 /* Add the reassembled tag */
1448 struct pf_fragment_tag
*ftag
;
1449 mtag
= m_tag_create(KERNEL_MODULE_TAG_ID
, KERNEL_TAG_TYPE_PF_REASS
,
1450 sizeof(*ftag
), M_NOWAIT
, m
);
1452 /* XXX: add stats */
1456 ftag
= (struct pf_fragment_tag
*)(mtag
+ 1);
1457 ftag
->ft_hdrlen
= hdrlen
;
1458 ftag
->ft_unfragpartlen
= unfragpartlen
;
1459 ftag
->ft_extoff
= extoff
;
1460 ftag
->ft_maxlen
= maxlen
;
1462 m_tag_prepend(m
, mtag
);
1464 struct pf_mtag
*pftag
= pf_get_mtag(m
);
1465 ASSERT(pftag
!= NULL
);
1466 pftag
->pftag_flags
|= PF_TAG_REASSEMBLED
;
1470 /* Oops - fail safe - drop packet */
1471 pool_put(&pf_frent_pl
, frent
);
1477 static struct mbuf
*
1478 pf_frag6cache(struct mbuf
**m0
, struct ip6_hdr
*h
, struct ip6_frag
*fh
,
1479 struct pf_fragment
**frag
, int hlen
, int mff
, int drop
, int *nomem
)
1481 struct mbuf
*m
= *m0
;
1482 u_int16_t plen
, off
, fr_max
;
1483 struct pf_frcache
*frp
, *fra
, *cur
= NULL
;
1486 VERIFY(*frag
== NULL
|| !BUFFER_FRAGMENTS(*frag
));
1488 off
= ntohs(fh
->ip6f_offlg
& IP6F_OFF_MASK
);
1489 plen
= ntohs(h
->ip6_plen
) - (hlen
- sizeof *h
);
1492 * Apple Modification: dimambro@apple.com. The hlen, being passed
1493 * into this function Includes all the headers associated with
1494 * the packet, and may include routing headers, so to get to
1495 * the data payload as stored in the original IPv6 header we need
1496 * to subtract al those headers and the IP header.
1498 * The 'max' local variable should also contain the offset from the start
1499 * of the reassembled packet to the octet just past the end of the octets
1500 * in the current fragment where:
1501 * - 'off' is the offset from the start of the reassembled packet to the
1502 * first octet in the fragment,
1503 * - 'plen' is the length of the "payload data length" Excluding all the
1504 * IPv6 headers of the fragment.
1505 * - 'hlen' is computed in pf_normalize_ip6() as the offset from the start
1506 * of the IPv6 packet to the beginning of the data.
1508 fr_max
= off
+ plen
;
1510 DPFPRINTF(("0x%llx plen %u off %u fr_max %u\n",
1511 (uint64_t)VM_KERNEL_ADDRPERM(m
), plen
, off
, fr_max
));
1513 /* Create a new range queue for this packet */
1514 if (*frag
== NULL
) {
1515 *frag
= pool_get(&pf_cache_pl
, PR_NOWAIT
);
1516 if (*frag
== NULL
) {
1517 pf_flush_fragments();
1518 *frag
= pool_get(&pf_cache_pl
, PR_NOWAIT
);
1519 if (*frag
== NULL
) {
1524 /* Get an entry for the queue */
1525 cur
= pool_get(&pf_cent_pl
, PR_NOWAIT
);
1527 pool_put(&pf_cache_pl
, *frag
);
1533 (*frag
)->fr_flags
= PFFRAG_NOBUFFER
;
1534 (*frag
)->fr_max
= 0;
1535 (*frag
)->fr_af
= AF_INET6
;
1536 (*frag
)->fr_srcx
.v6addr
= h
->ip6_src
;
1537 (*frag
)->fr_dstx
.v6addr
= h
->ip6_dst
;
1538 (*frag
)->fr_p
= fh
->ip6f_nxt
;
1539 (*frag
)->fr_id6
= fh
->ip6f_ident
;
1540 (*frag
)->fr_timeout
= pf_time_second();
1543 cur
->fr_end
= fr_max
;
1544 LIST_INIT(&(*frag
)->fr_cache
);
1545 LIST_INSERT_HEAD(&(*frag
)->fr_cache
, cur
, fr_next
);
1547 RB_INSERT(pf_frag_tree
, &pf_cache_tree
, *frag
);
1548 TAILQ_INSERT_HEAD(&pf_cachequeue
, *frag
, frag_next
);
1550 DPFPRINTF(("frag6cache[%d]: new %d-%d\n", ntohl(fh
->ip6f_ident
),
1557 * Find a fragment after the current one:
1558 * - off contains the real shifted offset.
1561 LIST_FOREACH(fra
, &(*frag
)->fr_cache
, fr_next
) {
1562 if (fra
->fr_off
> off
) {
1568 VERIFY(frp
!= NULL
|| fra
!= NULL
);
1573 precut
= frp
->fr_end
- off
;
1574 if (precut
>= plen
) {
1575 /* Fragment is entirely a duplicate */
1576 DPFPRINTF(("frag6cache[%u]: dead (%d-%d) %d-%d\n",
1577 ntohl(fh
->ip6f_ident
), frp
->fr_off
, frp
->fr_end
,
1582 /* They are adjacent. Fixup cache entry */
1583 DPFPRINTF(("frag6cache[%u]: adjacent (%d-%d) %d-%d\n",
1584 ntohl(fh
->ip6f_ident
), frp
->fr_off
, frp
->fr_end
,
1586 frp
->fr_end
= fr_max
;
1587 } else if (precut
> 0) {
1588 /* The first part of this payload overlaps with a
1589 * fragment that has already been passed.
1590 * Need to trim off the first part of the payload.
1591 * But to do so easily, we need to create another
1592 * mbuf to throw the original header into.
1595 DPFPRINTF(("frag6cache[%u]: chop %d (%d-%d) %d-%d\n",
1596 ntohl(fh
->ip6f_ident
), precut
, frp
->fr_off
,
1597 frp
->fr_end
, off
, fr_max
));
1601 /* Update the previous frag to encompass this one */
1602 frp
->fr_end
= fr_max
;
1605 /* XXX Optimization opportunity
1606 * This is a very heavy way to trim the payload.
1607 * we could do it much faster by diddling mbuf
1608 * internals but that would be even less legible
1609 * than this mbuf magic. For my next trick,
1610 * I'll pull a rabbit out of my laptop.
1612 *m0
= m_copym(m
, 0, hlen
, M_NOWAIT
);
1616 VERIFY((*m0
)->m_next
== NULL
);
1617 m_adj(m
, precut
+ hlen
);
1620 if (m
->m_flags
& M_PKTHDR
) {
1623 for (t
= m
; t
; t
= t
->m_next
) {
1626 m
->m_pkthdr
.len
= pktlen
;
1629 h
= mtod(m
, struct ip6_hdr
*);
1631 VERIFY((int)m
->m_len
==
1632 ntohs(h
->ip6_plen
) - precut
);
1633 fh
->ip6f_offlg
&= ~IP6F_OFF_MASK
;
1635 htons(ntohs(fh
->ip6f_offlg
& IP6F_OFF_MASK
)
1637 h
->ip6_plen
= htons(ntohs(h
->ip6_plen
) -
1643 /* There is a gap between fragments */
1645 DPFPRINTF(("frag6cache[%u]: gap %d (%d-%d) %d-%d\n",
1646 ntohl(fh
->ip6f_ident
), -precut
, frp
->fr_off
,
1647 frp
->fr_end
, off
, fr_max
));
1649 cur
= pool_get(&pf_cent_pl
, PR_NOWAIT
);
1656 cur
->fr_end
= fr_max
;
1657 LIST_INSERT_AFTER(frp
, cur
, fr_next
);
1665 aftercut
= fr_max
- fra
->fr_off
;
1666 if (aftercut
== 0) {
1667 /* Adjacent fragments */
1668 DPFPRINTF(("frag6cache[%u]: adjacent %d-%d (%d-%d)\n",
1669 ntohl(fh
->ip6f_ident
), off
, fr_max
, fra
->fr_off
,
1673 } else if (aftercut
> 0) {
1674 /* Need to chop off the tail of this fragment */
1675 DPFPRINTF(("frag6cache[%u]: chop %d %d-%d (%d-%d)\n",
1676 ntohl(fh
->ip6f_ident
), aftercut
, off
, fr_max
,
1677 fra
->fr_off
, fra
->fr_end
));
1684 m_adj(m
, -aftercut
);
1685 if (m
->m_flags
& M_PKTHDR
) {
1688 for (t
= m
; t
; t
= t
->m_next
) {
1691 m
->m_pkthdr
.len
= pktlen
;
1693 h
= mtod(m
, struct ip6_hdr
*);
1694 VERIFY((int)m
->m_len
==
1695 ntohs(h
->ip6_plen
) - aftercut
);
1697 htons(ntohs(h
->ip6_plen
) - aftercut
);
1701 } else if (frp
== NULL
) {
1702 /* There is a gap between fragments */
1703 DPFPRINTF(("frag6cache[%u]: gap %d %d-%d (%d-%d)\n",
1704 ntohl(fh
->ip6f_ident
), -aftercut
, off
, fr_max
,
1705 fra
->fr_off
, fra
->fr_end
));
1707 cur
= pool_get(&pf_cent_pl
, PR_NOWAIT
);
1714 cur
->fr_end
= fr_max
;
1715 LIST_INSERT_BEFORE(fra
, cur
, fr_next
);
1718 /* Need to glue together two separate fragment descriptors */
1720 if (cur
&& fra
->fr_off
<= cur
->fr_end
) {
1721 /* Need to merge in a previous 'cur' */
1722 DPFPRINTF(("frag6cache[%u]: adjacent(merge "
1723 "%d-%d) %d-%d (%d-%d)\n",
1724 ntohl(fh
->ip6f_ident
), cur
->fr_off
,
1725 cur
->fr_end
, off
, fr_max
, fra
->fr_off
,
1727 fra
->fr_off
= cur
->fr_off
;
1728 LIST_REMOVE(cur
, fr_next
);
1729 pool_put(&pf_cent_pl
, cur
);
1732 } else if (frp
&& fra
->fr_off
<= frp
->fr_end
) {
1733 /* Need to merge in a modified 'frp' */
1734 VERIFY(cur
== NULL
);
1735 DPFPRINTF(("frag6cache[%u]: adjacent(merge "
1736 "%d-%d) %d-%d (%d-%d)\n",
1737 ntohl(fh
->ip6f_ident
), frp
->fr_off
,
1738 frp
->fr_end
, off
, fr_max
, fra
->fr_off
,
1740 fra
->fr_off
= frp
->fr_off
;
1741 LIST_REMOVE(frp
, fr_next
);
1742 pool_put(&pf_cent_pl
, frp
);
1751 * We must keep tracking the overall fragment even when
1752 * we're going to drop it anyway so that we know when to
1753 * free the overall descriptor. Thus we drop the frag late.
1759 /* Update maximum data size */
1760 if ((*frag
)->fr_max
< fr_max
) {
1761 (*frag
)->fr_max
= fr_max
;
1764 /* This is the last segment */
1766 (*frag
)->fr_flags
|= PFFRAG_SEENLAST
;
1769 /* Check if we are completely reassembled */
1770 if (((*frag
)->fr_flags
& PFFRAG_SEENLAST
) &&
1771 LIST_FIRST(&(*frag
)->fr_cache
)->fr_off
== 0 &&
1772 LIST_FIRST(&(*frag
)->fr_cache
)->fr_end
== (*frag
)->fr_max
) {
1773 /* Remove from fragment queue */
1774 DPFPRINTF(("frag6cache[%u]: done 0-%d\n",
1775 ntohl(fh
->ip6f_ident
), (*frag
)->fr_max
));
1776 pf_free_fragment(*frag
);
1785 /* Still need to pay attention to !IP_MF */
1786 if (!mff
&& *frag
!= NULL
) {
1787 (*frag
)->fr_flags
|= PFFRAG_SEENLAST
;
1795 /* Still need to pay attention to !IP_MF */
1796 if (!mff
&& *frag
!= NULL
) {
1797 (*frag
)->fr_flags
|= PFFRAG_SEENLAST
;
1801 /* This fragment has been deemed bad. Don't reass */
1802 if (((*frag
)->fr_flags
& PFFRAG_DROP
) == 0) {
1803 DPFPRINTF(("frag6cache[%u]: dropping overall fragment\n",
1804 ntohl(fh
->ip6f_ident
)));
1806 (*frag
)->fr_flags
|= PFFRAG_DROP
;
1814 pf_refragment6(struct ifnet
*ifp
, pbuf_t
**pbufp
, struct pf_fragment_tag
*ftag
)
1818 uint16_t hdrlen
, extoff
, maxlen
, unfragpartlen
;
1822 struct route_in6 ip6route
;
1823 struct route_in6
*ro
;
1824 struct sockaddr_in6
*dst
;
1825 struct ip6_hdr
*hdr
;
1826 struct pf_mtag
*mtag
;
1829 if (pbufp
== NULL
|| !pbuf_is_valid(*pbufp
) || ftag
== NULL
) {
1830 panic("pf_route6: invalid parameters");
1833 m
= pbuf_to_mbuf(*pbufp
, FALSE
);
1834 hdr
= mtod(m
, struct ip6_hdr
*);
1835 mtag
= pf_find_mtag(m
);
1836 hdrlen
= ftag
->ft_hdrlen
- sizeof(struct ip6_hdr
);
1837 extoff
= ftag
->ft_extoff
;
1838 maxlen
= ftag
->ft_maxlen
;
1839 frag_id
= ftag
->ft_id
;
1840 unfragpartlen
= ftag
->ft_unfragpartlen
;
1841 tag
= (struct m_tag
*)(void *)ftag
;
1843 m_tag_delete(m
, tag
);
1846 mtag
->pftag_flags
&= ~PF_TAG_REASSEMBLED
;
1848 bzero((caddr_t
)ro
, sizeof(*ro
));
1849 dst
= (struct sockaddr_in6
*)&ro
->ro_dst
;
1850 dst
->sin6_family
= AF_INET6
;
1851 dst
->sin6_len
= sizeof(*dst
);
1852 dst
->sin6_addr
= hdr
->ip6_dst
;
1856 struct mbuf
*mexthdr
;
1858 /* Use protocol from next field of last extension header */
1859 mexthdr
= m_getptr(m
, extoff
+
1860 offsetof(struct ip6_ext
, ip6e_nxt
), &off
);
1861 ASSERT(mexthdr
!= NULL
);
1862 lexthdrsp
= (mtod(mexthdr
, uint8_t *) + off
);
1864 if (proto
== IPPROTO_DSTOPTS
) {
1866 if (!pf_pull_hdr(*pbufp
, off
, &ext
, sizeof(ext
), NULL
,
1868 DPFPRINTF(("pkt too short"));
1872 proto
= ext
.ip6e_nxt
;
1876 proto
= hdr
->ip6_nxt
;
1880 * The MTU must be a multiple of 8 bytes, or we risk doing the
1881 * fragmentation wrong.
1883 maxlen
= maxlen
& ~7;
1885 error
= ip6_do_fragmentation(&m
, hdrlen
, NULL
, unfragpartlen
,
1886 hdr
, lexthdrsp
, maxlen
, proto
, frag_id
);
1890 * PF_TAG_REFRAGMENTED flag set to indicate ip6_forward()
1891 * and pf_route6() that the mbuf contains a chain of fragments.
1893 mtag
->pftag_flags
|= PF_TAG_REFRAGMENTED
;
1895 pbuf_init_mbuf(*pbufp
, m
, ifp
);
1897 DPFPRINTF(("refragment error %d", error
));
1906 pf_normalize_ip(pbuf_t
*pbuf
, int dir
, struct pfi_kif
*kif
, u_short
*reason
,
1907 struct pf_pdesc
*pd
)
1911 struct pf_frent
*frent
;
1912 struct pf_fragment
*frag
= NULL
;
1913 struct ip
*h
= pbuf
->pb_data
;
1914 int mff
= (ntohs(h
->ip_off
) & IP_MF
);
1915 int hlen
= h
->ip_hl
<< 2;
1916 u_int16_t fragoff
= (ntohs(h
->ip_off
) & IP_OFFMASK
) << 3;
1921 struct pf_ruleset
*ruleset
= NULL
;
1922 struct ifnet
*ifp
= pbuf
->pb_ifp
;
1924 r
= TAILQ_FIRST(pf_main_ruleset
.rules
[PF_RULESET_SCRUB
].active
.ptr
);
1927 if (pfi_kif_match(r
->kif
, kif
) == r
->ifnot
) {
1928 r
= r
->skip
[PF_SKIP_IFP
].ptr
;
1929 } else if (r
->direction
&& r
->direction
!= dir
) {
1930 r
= r
->skip
[PF_SKIP_DIR
].ptr
;
1931 } else if (r
->af
&& r
->af
!= AF_INET
) {
1932 r
= r
->skip
[PF_SKIP_AF
].ptr
;
1933 } else if (r
->proto
&& r
->proto
!= h
->ip_p
) {
1934 r
= r
->skip
[PF_SKIP_PROTO
].ptr
;
1935 } else if (PF_MISMATCHAW(&r
->src
.addr
,
1936 (struct pf_addr
*)&h
->ip_src
.s_addr
, AF_INET
,
1938 r
= r
->skip
[PF_SKIP_SRC_ADDR
].ptr
;
1939 } else if (PF_MISMATCHAW(&r
->dst
.addr
,
1940 (struct pf_addr
*)&h
->ip_dst
.s_addr
, AF_INET
,
1941 r
->dst
.neg
, NULL
)) {
1942 r
= r
->skip
[PF_SKIP_DST_ADDR
].ptr
;
1944 if (r
->anchor
== NULL
) {
1947 pf_step_into_anchor(&asd
, &ruleset
,
1948 PF_RULESET_SCRUB
, &r
, NULL
, NULL
);
1951 if (r
== NULL
&& pf_step_out_of_anchor(&asd
, &ruleset
,
1952 PF_RULESET_SCRUB
, &r
, NULL
, NULL
)) {
1957 if (r
== NULL
|| r
->action
== PF_NOSCRUB
) {
1960 r
->packets
[dir
== PF_OUT
]++;
1961 r
->bytes
[dir
== PF_OUT
] += pd
->tot_len
;
1964 /* Check for illegal packets */
1965 if (hlen
< (int)sizeof(struct ip
)) {
1969 if (hlen
> ntohs(h
->ip_len
)) {
1973 /* Clear IP_DF if the rule uses the no-df option */
1974 if (r
->rule_flag
& PFRULE_NODF
&& h
->ip_off
& htons(IP_DF
)) {
1975 u_int16_t ipoff
= h
->ip_off
;
1977 h
->ip_off
&= htons(~IP_DF
);
1978 h
->ip_sum
= pf_cksum_fixup(h
->ip_sum
, ipoff
, h
->ip_off
, 0);
1981 /* We will need other tests here */
1982 if (!fragoff
&& !mff
) {
1987 * We're dealing with a fragment now. Don't allow fragments
1988 * with IP_DF to enter the cache. If the flag was cleared by
1989 * no-df above, fine. Otherwise drop it.
1991 if (h
->ip_off
& htons(IP_DF
)) {
1992 DPFPRINTF(("IP_DF\n"));
1996 ip_len
= ntohs(h
->ip_len
) - hlen
;
1997 ip_off
= (ntohs(h
->ip_off
) & IP_OFFMASK
) << 3;
1999 /* All fragments are 8 byte aligned */
2000 if (mff
&& (ip_len
& 0x7)) {
2001 DPFPRINTF(("mff and %d\n", ip_len
));
2005 /* Respect maximum length */
2006 if (fragoff
+ ip_len
> IP_MAXPACKET
) {
2007 DPFPRINTF(("max packet %d\n", fragoff
+ ip_len
));
2010 fr_max
= fragoff
+ ip_len
;
2012 if ((r
->rule_flag
& (PFRULE_FRAGCROP
| PFRULE_FRAGDROP
)) == 0) {
2013 /* Fully buffer all of the fragments */
2015 frag
= pf_find_fragment_by_ipv4_header(h
, &pf_frag_tree
);
2016 /* Check if we saw the last fragment already */
2017 if (frag
!= NULL
&& (frag
->fr_flags
& PFFRAG_SEENLAST
) &&
2018 fr_max
> frag
->fr_max
) {
2022 if ((m
= pbuf_to_mbuf(pbuf
, TRUE
)) == NULL
) {
2023 REASON_SET(reason
, PFRES_MEMORY
);
2027 VERIFY(!pbuf_is_valid(pbuf
));
2029 /* Restore iph pointer after pbuf_to_mbuf() */
2030 h
= mtod(m
, struct ip
*);
2032 /* Get an entry for the fragment queue */
2033 frent
= pool_get(&pf_frent_pl
, PR_NOWAIT
);
2034 if (frent
== NULL
) {
2035 REASON_SET(reason
, PFRES_MEMORY
);
2043 /* Might return a completely reassembled mbuf, or NULL */
2044 DPFPRINTF(("reass IPv4 frag %d @ %d-%d\n", ntohs(h
->ip_id
),
2046 m
= pf_reassemble(m
, &frag
, frent
, mff
);
2052 VERIFY(m
->m_flags
& M_PKTHDR
);
2053 pbuf_init_mbuf(pbuf
, m
, ifp
);
2055 /* use mtag from concatenated mbuf chain */
2056 pd
->pf_mtag
= pf_find_mtag_pbuf(pbuf
);
2058 // SCW: This check is superfluous
2060 if (pd
->pf_mtag
== NULL
) {
2061 printf("%s: pf_find_mtag returned NULL(1)\n", __func__
);
2062 if ((pd
->pf_mtag
= pf_get_mtag(m
)) == NULL
) {
2071 h
= mtod(m
, struct ip
*);
2073 if (frag
!= NULL
&& (frag
->fr_flags
& PFFRAG_DROP
)) {
2077 /* non-buffering fragment cache (drops or masks overlaps) */
2080 if (dir
== PF_OUT
&& (pd
->pf_mtag
->pftag_flags
& PF_TAG_FRAGCACHE
)) {
2082 * Already passed the fragment cache in the
2083 * input direction. If we continued, it would
2084 * appear to be a dup and would be dropped.
2089 frag
= pf_find_fragment_by_ipv4_header(h
, &pf_cache_tree
);
2091 /* Check if we saw the last fragment already */
2092 if (frag
!= NULL
&& (frag
->fr_flags
& PFFRAG_SEENLAST
) &&
2093 fr_max
> frag
->fr_max
) {
2094 if (r
->rule_flag
& PFRULE_FRAGDROP
) {
2095 frag
->fr_flags
|= PFFRAG_DROP
;
2100 if ((m
= pbuf_to_mbuf(pbuf
, TRUE
)) == NULL
) {
2101 REASON_SET(reason
, PFRES_MEMORY
);
2105 VERIFY(!pbuf_is_valid(pbuf
));
2107 /* Restore iph pointer after pbuf_to_mbuf() */
2108 h
= mtod(m
, struct ip
*);
2110 m
= pf_fragcache(&m
, h
, &frag
, mff
,
2111 (r
->rule_flag
& PFRULE_FRAGDROP
) ? 1 : 0, &nomem
);
2113 // Note: pf_fragcache() has already m_freem'd the mbuf
2120 VERIFY(m
->m_flags
& M_PKTHDR
);
2121 pbuf_init_mbuf(pbuf
, m
, ifp
);
2123 /* use mtag from copied and trimmed mbuf chain */
2124 pd
->pf_mtag
= pf_find_mtag_pbuf(pbuf
);
2126 // SCW: This check is superfluous
2128 if (pd
->pf_mtag
== NULL
) {
2129 printf("%s: pf_find_mtag returned NULL(2)\n", __func__
);
2130 if ((pd
->pf_mtag
= pf_get_mtag(m
)) == NULL
) {
2139 pd
->pf_mtag
->pftag_flags
|= PF_TAG_FRAGCACHE
;
2142 if (frag
!= NULL
&& (frag
->fr_flags
& PFFRAG_DROP
)) {
2150 /* At this point, only IP_DF is allowed in ip_off */
2151 if (h
->ip_off
& ~htons(IP_DF
)) {
2152 u_int16_t ipoff
= h
->ip_off
;
2154 h
->ip_off
&= htons(IP_DF
);
2155 h
->ip_sum
= pf_cksum_fixup(h
->ip_sum
, ipoff
, h
->ip_off
, 0);
2158 /* Enforce a minimum ttl, may cause endless packet loops */
2159 if (r
->min_ttl
&& h
->ip_ttl
< r
->min_ttl
) {
2160 u_int16_t ip_ttl
= h
->ip_ttl
;
2162 h
->ip_ttl
= r
->min_ttl
;
2163 h
->ip_sum
= pf_cksum_fixup(h
->ip_sum
, ip_ttl
, h
->ip_ttl
, 0);
2165 if (r
->rule_flag
& PFRULE_RANDOMID
) {
2166 u_int16_t oip_id
= h
->ip_id
;
2168 if (rfc6864
&& IP_OFF_IS_ATOMIC(ntohs(h
->ip_off
))) {
2171 h
->ip_id
= ip_randomid();
2173 h
->ip_sum
= pf_cksum_fixup(h
->ip_sum
, oip_id
, h
->ip_id
, 0);
2175 if ((r
->rule_flag
& (PFRULE_FRAGCROP
| PFRULE_FRAGDROP
)) == 0) {
2176 pd
->flags
|= PFDESC_IP_REAS
;
2182 /* Enforce a minimum ttl, may cause endless packet loops */
2183 if (r
->min_ttl
&& h
->ip_ttl
< r
->min_ttl
) {
2184 u_int16_t ip_ttl
= h
->ip_ttl
;
2186 h
->ip_ttl
= r
->min_ttl
;
2187 h
->ip_sum
= pf_cksum_fixup(h
->ip_sum
, ip_ttl
, h
->ip_ttl
, 0);
2189 if ((r
->rule_flag
& (PFRULE_FRAGCROP
| PFRULE_FRAGDROP
)) == 0) {
2190 pd
->flags
|= PFDESC_IP_REAS
;
2195 REASON_SET(reason
, PFRES_MEMORY
);
2196 if (r
!= NULL
&& r
->log
&& pbuf_is_valid(pbuf
)) {
2197 PFLOG_PACKET(kif
, h
, pbuf
, AF_INET
, dir
, *reason
, r
,
2203 REASON_SET(reason
, PFRES_NORM
);
2204 if (r
!= NULL
&& r
->log
&& pbuf_is_valid(pbuf
)) {
2205 PFLOG_PACKET(kif
, h
, pbuf
, AF_INET
, dir
, *reason
, r
,
2211 DPFPRINTF(("dropping bad IPv4 fragment\n"));
2213 /* Free associated fragments */
2215 pf_free_fragment(frag
);
2218 REASON_SET(reason
, PFRES_FRAG
);
2219 if (r
!= NULL
&& r
->log
&& pbuf_is_valid(pbuf
)) {
2220 PFLOG_PACKET(kif
, h
, pbuf
, AF_INET
, dir
, *reason
, r
, NULL
, NULL
, pd
);
2226 static __inline
struct pf_fragment
*
2227 pf_find_fragment_by_ipv6_header(struct ip6_hdr
*ip6
, struct ip6_frag
*fh
,
2228 struct pf_frag_tree
*tree
)
2230 struct pf_fragment key
;
2231 pf_ip6hdr2key(&key
, ip6
, fh
);
2232 return pf_find_fragment_by_key(&key
, tree
);
2236 pf_normalize_ip6(pbuf_t
*pbuf
, int dir
, struct pfi_kif
*kif
,
2237 u_short
*reason
, struct pf_pdesc
*pd
)
2239 struct mbuf
*m
= NULL
;
2241 struct ip6_hdr
*h
= pbuf
->pb_data
;
2246 struct ip6_opt_jumbo jumbo
;
2249 struct ip6_frag frag
;
2250 u_int32_t jumbolen
= 0, plen
;
2251 u_int16_t fragoff
= 0;
2254 struct pf_frent
*frent
;
2255 struct pf_fragment
*pff
= NULL
;
2256 int mff
= 0, rh_cnt
= 0;
2259 struct pf_ruleset
*ruleset
= NULL
;
2260 struct ifnet
*ifp
= pbuf
->pb_ifp
;
2262 r
= TAILQ_FIRST(pf_main_ruleset
.rules
[PF_RULESET_SCRUB
].active
.ptr
);
2265 if (pfi_kif_match(r
->kif
, kif
) == r
->ifnot
) {
2266 r
= r
->skip
[PF_SKIP_IFP
].ptr
;
2267 } else if (r
->direction
&& r
->direction
!= dir
) {
2268 r
= r
->skip
[PF_SKIP_DIR
].ptr
;
2269 } else if (r
->af
&& r
->af
!= AF_INET6
) {
2270 r
= r
->skip
[PF_SKIP_AF
].ptr
;
2272 #if 0 /* header chain! */
2273 else if (r
->proto
&& r
->proto
!= h
->ip6_nxt
) {
2274 r
= r
->skip
[PF_SKIP_PROTO
].ptr
;
2277 else if (PF_MISMATCHAW(&r
->src
.addr
,
2278 (struct pf_addr
*)(uintptr_t)&h
->ip6_src
, AF_INET6
,
2280 r
= r
->skip
[PF_SKIP_SRC_ADDR
].ptr
;
2281 } else if (PF_MISMATCHAW(&r
->dst
.addr
,
2282 (struct pf_addr
*)(uintptr_t)&h
->ip6_dst
, AF_INET6
,
2283 r
->dst
.neg
, NULL
)) {
2284 r
= r
->skip
[PF_SKIP_DST_ADDR
].ptr
;
2286 if (r
->anchor
== NULL
) {
2289 pf_step_into_anchor(&asd
, &ruleset
,
2290 PF_RULESET_SCRUB
, &r
, NULL
, NULL
);
2293 if (r
== NULL
&& pf_step_out_of_anchor(&asd
, &ruleset
,
2294 PF_RULESET_SCRUB
, &r
, NULL
, NULL
)) {
2299 if (r
== NULL
|| r
->action
== PF_NOSCRUB
) {
2302 r
->packets
[dir
== PF_OUT
]++;
2303 r
->bytes
[dir
== PF_OUT
] += pd
->tot_len
;
2306 /* Check for illegal packets */
2307 if ((uint32_t)(sizeof(struct ip6_hdr
) + IPV6_MAXPACKET
) <
2308 pbuf
->pb_packet_len
) {
2313 off
= sizeof(struct ip6_hdr
);
2319 case IPPROTO_FRAGMENT
:
2322 case IPPROTO_ROUTING
:
2323 case IPPROTO_DSTOPTS
:
2324 if (!pf_pull_hdr(pbuf
, off
, &ext
, sizeof(ext
), NULL
,
2331 * Multiple routing headers not allowed.
2332 * Routing header type zero considered harmful.
2334 if (proto
== IPPROTO_ROUTING
) {
2335 const struct ip6_rthdr
*rh
=
2336 (const struct ip6_rthdr
*)&ext
;
2340 if (rh
->ip6r_type
== IPV6_RTHDR_TYPE_0
) {
2343 } else if (proto
== IPPROTO_AH
) {
2344 off
+= (ext
.ip6e_len
+ 2) * 4;
2346 off
+= (ext
.ip6e_len
+ 1) * 8;
2348 proto
= ext
.ip6e_nxt
;
2350 case IPPROTO_HOPOPTS
:
2351 if (!pf_pull_hdr(pbuf
, off
, &ext
, sizeof(ext
), NULL
,
2356 optend
= off
+ (ext
.ip6e_len
+ 1) * 8;
2357 ooff
= off
+ sizeof(ext
);
2359 if (!pf_pull_hdr(pbuf
, ooff
, &opt
.ip6o_type
,
2360 sizeof(opt
.ip6o_type
), NULL
, NULL
,
2364 if (opt
.ip6o_type
== IP6OPT_PAD1
) {
2368 if (!pf_pull_hdr(pbuf
, ooff
, &opt
, sizeof(opt
),
2369 NULL
, NULL
, AF_INET6
)) {
2372 if ((ooff
+ (int) sizeof(opt
) + opt
.ip6o_len
) >
2376 switch (opt
.ip6o_type
) {
2378 if (h
->ip6_plen
!= 0) {
2381 if (!pf_pull_hdr(pbuf
, ooff
, &jumbo
,
2382 sizeof(jumbo
), NULL
, NULL
,
2386 memcpy(&jumbolen
, jumbo
.ip6oj_jumbo_len
,
2388 jumbolen
= ntohl(jumbolen
);
2389 if (jumbolen
<= IPV6_MAXPACKET
) {
2392 if ((sizeof(struct ip6_hdr
) +
2393 jumbolen
) != pbuf
->pb_packet_len
) {
2400 ooff
+= sizeof(opt
) + opt
.ip6o_len
;
2401 } while (ooff
< optend
);
2404 proto
= ext
.ip6e_nxt
;
2410 } while (!terminal
);
2412 /* jumbo payload option must be present, or plen > 0 */
2413 if (ntohs(h
->ip6_plen
) == 0) {
2416 plen
= ntohs(h
->ip6_plen
);
2421 if ((uint32_t)(sizeof(struct ip6_hdr
) + plen
) > pbuf
->pb_packet_len
) {
2425 /* Enforce a minimum ttl, may cause endless packet loops */
2426 if (r
->min_ttl
&& h
->ip6_hlim
< r
->min_ttl
) {
2427 h
->ip6_hlim
= r
->min_ttl
;
2433 plen
= ntohs(h
->ip6_plen
);
2434 /* Jumbo payload packets cannot be fragmented */
2435 if (plen
== 0 || jumbolen
) {
2439 if (!pf_pull_hdr(pbuf
, off
, &frag
, sizeof(frag
), NULL
, NULL
, AF_INET6
)) {
2442 fragoff
= ntohs(frag
.ip6f_offlg
& IP6F_OFF_MASK
);
2443 pd
->proto
= frag
.ip6f_nxt
;
2444 mff
= ntohs(frag
.ip6f_offlg
& IP6F_MORE_FRAG
);
2445 off
+= sizeof(frag
);
2446 if (fragoff
+ (plen
- off
) > IPV6_MAXPACKET
) {
2450 fr_max
= fragoff
+ plen
- (off
- sizeof(struct ip6_hdr
));
2451 // XXX SCW: mbuf-specific
2452 // DPFPRINTF(("0x%llx IPv6 frag plen %u mff %d off %u fragoff %u "
2453 // "fr_max %u\n", (uint64_t)VM_KERNEL_ADDRPERM(m), plen, mff, off,
2454 // fragoff, fr_max));
2456 if ((r
->rule_flag
& (PFRULE_FRAGCROP
| PFRULE_FRAGDROP
)) == 0) {
2457 /* Fully buffer all of the fragments */
2458 pd
->flags
|= PFDESC_IP_REAS
;
2460 pff
= pf_find_fragment_by_ipv6_header(h
, &frag
,
2463 /* Check if we saw the last fragment already */
2464 if (pff
!= NULL
&& (pff
->fr_flags
& PFFRAG_SEENLAST
) &&
2465 fr_max
> pff
->fr_max
) {
2469 if ((m
= pbuf_to_mbuf(pbuf
, TRUE
)) == NULL
) {
2470 REASON_SET(reason
, PFRES_MEMORY
);
2474 /* Restore iph pointer after pbuf_to_mbuf() */
2475 h
= mtod(m
, struct ip6_hdr
*);
2477 /* Get an entry for the fragment queue */
2478 frent
= pool_get(&pf_frent_pl
, PR_NOWAIT
);
2479 if (frent
== NULL
) {
2480 REASON_SET(reason
, PFRES_MEMORY
);
2487 frent
->fr_ip6f_opt
= frag
;
2488 frent
->fr_ip6f_extoff
= extoff
;
2489 frent
->fr_ip6f_hlen
= off
;
2490 /* account for 2nd Destination Options header if present */
2491 if (pd
->proto
== IPPROTO_DSTOPTS
) {
2492 if (!pf_pull_hdr(pbuf
, off
, &ext
, sizeof(ext
), NULL
,
2496 frent
->fr_ip6f_hlen
+= (ext
.ip6e_len
+ 1) * 8;
2499 /* Might return a completely reassembled mbuf, or NULL */
2500 DPFPRINTF(("reass IPv6 frag %d @ %d-%d\n",
2501 ntohl(frag
.ip6f_ident
), fragoff
, fr_max
));
2502 m
= pf_reassemble6(&m
, &pff
, frent
, mff
);
2508 pbuf_init_mbuf(pbuf
, m
, ifp
);
2511 if (pff
!= NULL
&& (pff
->fr_flags
& PFFRAG_DROP
)) {
2514 } else if (dir
== PF_IN
||
2515 !(pd
->pf_mtag
->pftag_flags
& PF_TAG_FRAGCACHE
)) {
2516 /* non-buffering fragment cache (overlaps: see RFC 5722) */
2519 pff
= pf_find_fragment_by_ipv6_header(h
, &frag
,
2522 /* Check if we saw the last fragment already */
2523 if (pff
!= NULL
&& (pff
->fr_flags
& PFFRAG_SEENLAST
) &&
2524 fr_max
> pff
->fr_max
) {
2525 if (r
->rule_flag
& PFRULE_FRAGDROP
) {
2526 pff
->fr_flags
|= PFFRAG_DROP
;
2531 if ((m
= pbuf_to_mbuf(pbuf
, TRUE
)) == NULL
) {
2535 /* Restore iph pointer after pbuf_to_mbuf() */
2536 h
= mtod(m
, struct ip6_hdr
*);
2538 m
= pf_frag6cache(&m
, h
, &frag
, &pff
, off
, mff
,
2539 (r
->rule_flag
& PFRULE_FRAGDROP
) ? 1 : 0, &nomem
);
2541 // Note: pf_frag6cache() has already m_freem'd the mbuf
2548 pbuf_init_mbuf(pbuf
, m
, ifp
);
2549 pd
->pf_mtag
= pf_find_mtag_pbuf(pbuf
);
2553 pd
->pf_mtag
->pftag_flags
|= PF_TAG_FRAGCACHE
;
2556 if (pff
!= NULL
&& (pff
->fr_flags
& PFFRAG_DROP
)) {
2561 /* Enforce a minimum ttl, may cause endless packet loops */
2562 if (r
->min_ttl
&& h
->ip6_hlim
< r
->min_ttl
) {
2563 h
->ip6_hlim
= r
->min_ttl
;
2568 REASON_SET(reason
, PFRES_MEMORY
);
2572 REASON_SET(reason
, PFRES_SHORT
);
2576 REASON_SET(reason
, PFRES_NORM
);
2580 DPFPRINTF(("dropping bad IPv6 fragment\n"));
2581 REASON_SET(reason
, PFRES_FRAG
);
2586 pf_free_fragment(pff
);
2588 if (r
!= NULL
&& r
->log
&& pbuf_is_valid(pbuf
)) {
2589 PFLOG_PACKET(kif
, h
, pbuf
, AF_INET6
, dir
, *reason
, r
, NULL
, NULL
, pd
);
2595 pf_normalize_tcp(int dir
, struct pfi_kif
*kif
, pbuf_t
*pbuf
, int ipoff
,
2596 int off
, void *h
, struct pf_pdesc
*pd
)
2598 #pragma unused(ipoff, h)
2599 struct pf_rule
*r
, *rm
= NULL
;
2600 struct tcphdr
*th
= pd
->hdr
.tcp
;
2605 sa_family_t af
= pd
->af
;
2606 struct pf_ruleset
*ruleset
= NULL
;
2607 union pf_state_xport sxport
, dxport
;
2609 sxport
.port
= th
->th_sport
;
2610 dxport
.port
= th
->th_dport
;
2612 r
= TAILQ_FIRST(pf_main_ruleset
.rules
[PF_RULESET_SCRUB
].active
.ptr
);
2615 if (pfi_kif_match(r
->kif
, kif
) == r
->ifnot
) {
2616 r
= r
->skip
[PF_SKIP_IFP
].ptr
;
2617 } else if (r
->direction
&& r
->direction
!= dir
) {
2618 r
= r
->skip
[PF_SKIP_DIR
].ptr
;
2619 } else if (r
->af
&& r
->af
!= af
) {
2620 r
= r
->skip
[PF_SKIP_AF
].ptr
;
2621 } else if (r
->proto
&& r
->proto
!= pd
->proto
) {
2622 r
= r
->skip
[PF_SKIP_PROTO
].ptr
;
2623 } else if (PF_MISMATCHAW(&r
->src
.addr
, pd
->src
, af
,
2625 r
= r
->skip
[PF_SKIP_SRC_ADDR
].ptr
;
2626 } else if (r
->src
.xport
.range
.op
&&
2627 !pf_match_xport(r
->src
.xport
.range
.op
, r
->proto_variant
,
2628 &r
->src
.xport
, &sxport
)) {
2629 r
= r
->skip
[PF_SKIP_SRC_PORT
].ptr
;
2630 } else if (PF_MISMATCHAW(&r
->dst
.addr
, pd
->dst
, af
,
2631 r
->dst
.neg
, NULL
)) {
2632 r
= r
->skip
[PF_SKIP_DST_ADDR
].ptr
;
2633 } else if (r
->dst
.xport
.range
.op
&&
2634 !pf_match_xport(r
->dst
.xport
.range
.op
, r
->proto_variant
,
2635 &r
->dst
.xport
, &dxport
)) {
2636 r
= r
->skip
[PF_SKIP_DST_PORT
].ptr
;
2637 } else if (r
->os_fingerprint
!= PF_OSFP_ANY
&&
2638 !pf_osfp_match(pf_osfp_fingerprint(pd
, pbuf
, off
, th
),
2639 r
->os_fingerprint
)) {
2640 r
= TAILQ_NEXT(r
, entries
);
2642 if (r
->anchor
== NULL
) {
2646 pf_step_into_anchor(&asd
, &ruleset
,
2647 PF_RULESET_SCRUB
, &r
, NULL
, NULL
);
2650 if (r
== NULL
&& pf_step_out_of_anchor(&asd
, &ruleset
,
2651 PF_RULESET_SCRUB
, &r
, NULL
, NULL
)) {
2656 if (rm
== NULL
|| rm
->action
== PF_NOSCRUB
) {
2659 r
->packets
[dir
== PF_OUT
]++;
2660 r
->bytes
[dir
== PF_OUT
] += pd
->tot_len
;
2663 if (rm
->rule_flag
& PFRULE_REASSEMBLE_TCP
) {
2664 pd
->flags
|= PFDESC_TCP_NORM
;
2667 flags
= th
->th_flags
;
2668 if (flags
& TH_SYN
) {
2669 /* Illegal packet */
2670 if (flags
& TH_RST
) {
2674 if (flags
& TH_FIN
) {
2678 /* Illegal packet */
2679 if (!(flags
& (TH_ACK
| TH_RST
))) {
2684 if (!(flags
& TH_ACK
)) {
2685 /* These flags are only valid if ACK is set */
2686 if ((flags
& TH_FIN
) || (flags
& TH_PUSH
) || (flags
& TH_URG
)) {
2691 /* Check for illegal header length */
2692 if (th
->th_off
< (sizeof(struct tcphdr
) >> 2)) {
2696 /* If flags changed, or reserved data set, then adjust */
2697 if (flags
!= th
->th_flags
|| th
->th_x2
!= 0) {
2700 ov
= *(u_int16_t
*)(&th
->th_ack
+ 1);
2701 th
->th_flags
= flags
;
2703 nv
= *(u_int16_t
*)(&th
->th_ack
+ 1);
2705 th
->th_sum
= pf_cksum_fixup(th
->th_sum
, ov
, nv
, 0);
2709 /* Remove urgent pointer, if TH_URG is not set */
2710 if (!(flags
& TH_URG
) && th
->th_urp
) {
2711 th
->th_sum
= pf_cksum_fixup(th
->th_sum
, th
->th_urp
, 0, 0);
2716 /* copy back packet headers if we sanitized */
2717 /* Process options */
2719 int rv
= pf_normalize_tcpopt(r
, dir
, kif
, pd
, pbuf
, th
, off
,
2721 if (rv
== PF_DROP
) {
2728 if (pf_lazy_makewritable(pd
, pbuf
,
2729 off
+ sizeof(*th
)) == NULL
) {
2730 REASON_SET(&reason
, PFRES_MEMORY
);
2732 PFLOG_PACKET(kif
, h
, pbuf
, AF_INET
, dir
, reason
,
2738 pbuf_copy_back(pbuf
, off
, sizeof(*th
), th
);
2744 REASON_SET(&reason
, PFRES_NORM
);
2745 if (rm
!= NULL
&& r
->log
) {
2746 PFLOG_PACKET(kif
, h
, pbuf
, AF_INET
, dir
, reason
, r
, NULL
, NULL
, pd
);
2752 pf_normalize_tcp_init(pbuf_t
*pbuf
, int off
, struct pf_pdesc
*pd
,
2753 struct tcphdr
*th
, struct pf_state_peer
*src
, struct pf_state_peer
*dst
)
2756 u_int32_t tsval
, tsecr
;
2760 VERIFY(src
->scrub
== NULL
);
2762 src
->scrub
= pool_get(&pf_state_scrub_pl
, PR_NOWAIT
);
2763 if (src
->scrub
== NULL
) {
2766 bzero(src
->scrub
, sizeof(*src
->scrub
));
2771 struct ip
*h
= pbuf
->pb_data
;
2772 src
->scrub
->pfss_ttl
= h
->ip_ttl
;
2777 struct ip6_hdr
*h
= pbuf
->pb_data
;
2778 src
->scrub
->pfss_ttl
= h
->ip6_hlim
;
2785 * All normalizations below are only begun if we see the start of
2786 * the connections. They must all set an enabled bit in pfss_flags
2788 if ((th
->th_flags
& TH_SYN
) == 0) {
2793 if (th
->th_off
> (sizeof(struct tcphdr
) >> 2) && src
->scrub
&&
2794 pf_pull_hdr(pbuf
, off
, hdr
, th
->th_off
<< 2, NULL
, NULL
, pd
->af
)) {
2795 /* Diddle with TCP options */
2797 opt
= hdr
+ sizeof(struct tcphdr
);
2798 hlen
= (th
->th_off
<< 2) - sizeof(struct tcphdr
);
2799 while (hlen
>= TCPOLEN_TIMESTAMP
) {
2801 case TCPOPT_EOL
: /* FALLTHROUGH */
2806 case TCPOPT_TIMESTAMP
:
2807 if (opt
[1] >= TCPOLEN_TIMESTAMP
) {
2808 src
->scrub
->pfss_flags
|=
2810 src
->scrub
->pfss_ts_mod
=
2813 /* note PFSS_PAWS not set yet */
2814 memcpy(&tsval
, &opt
[2],
2816 memcpy(&tsecr
, &opt
[6],
2818 src
->scrub
->pfss_tsval0
= ntohl(tsval
);
2819 src
->scrub
->pfss_tsval
= ntohl(tsval
);
2820 src
->scrub
->pfss_tsecr
= ntohl(tsecr
);
2821 getmicrouptime(&src
->scrub
->pfss_last
);
2825 hlen
-= MAX(opt
[1], 2);
2826 opt
+= MAX(opt
[1], 2);
2836 pf_normalize_tcp_cleanup(struct pf_state
*state
)
2838 if (state
->src
.scrub
) {
2839 pool_put(&pf_state_scrub_pl
, state
->src
.scrub
);
2841 if (state
->dst
.scrub
) {
2842 pool_put(&pf_state_scrub_pl
, state
->dst
.scrub
);
2845 /* Someday... flush the TCP segment reassembly descriptors. */
2849 pf_normalize_tcp_stateful(pbuf_t
*pbuf
, int off
, struct pf_pdesc
*pd
,
2850 u_short
*reason
, struct tcphdr
*th
, struct pf_state
*state
,
2851 struct pf_state_peer
*src
, struct pf_state_peer
*dst
, int *writeback
)
2853 struct timeval uptime
;
2854 u_int32_t tsval
= 0, tsecr
= 0;
2855 u_int tsval_from_last
;
2861 VERIFY(src
->scrub
|| dst
->scrub
);
2864 * Enforce the minimum TTL seen for this connection. Negate a common
2865 * technique to evade an intrusion detection system and confuse
2866 * firewall state code.
2872 struct ip
*h
= pbuf
->pb_data
;
2873 if (h
->ip_ttl
> src
->scrub
->pfss_ttl
) {
2874 src
->scrub
->pfss_ttl
= h
->ip_ttl
;
2876 h
->ip_ttl
= src
->scrub
->pfss_ttl
;
2883 struct ip6_hdr
*h
= pbuf
->pb_data
;
2884 if (h
->ip6_hlim
> src
->scrub
->pfss_ttl
) {
2885 src
->scrub
->pfss_ttl
= h
->ip6_hlim
;
2887 h
->ip6_hlim
= src
->scrub
->pfss_ttl
;
2893 if (th
->th_off
> (sizeof(struct tcphdr
) >> 2) &&
2894 ((src
->scrub
&& (src
->scrub
->pfss_flags
& PFSS_TIMESTAMP
)) ||
2895 (dst
->scrub
&& (dst
->scrub
->pfss_flags
& PFSS_TIMESTAMP
))) &&
2896 pf_pull_hdr(pbuf
, off
, hdr
, th
->th_off
<< 2, NULL
, NULL
, pd
->af
)) {
2897 /* Diddle with TCP options */
2899 opt
= hdr
+ sizeof(struct tcphdr
);
2900 hlen
= (th
->th_off
<< 2) - sizeof(struct tcphdr
);
2901 while (hlen
>= TCPOLEN_TIMESTAMP
) {
2903 case TCPOPT_EOL
: /* FALLTHROUGH */
2908 case TCPOPT_TIMESTAMP
:
2910 * Modulate the timestamps. Can be used for
2911 * NAT detection, OS uptime determination or
2916 /* Huh? Multiple timestamps!? */
2917 if (pf_status
.debug
>= PF_DEBUG_MISC
) {
2918 DPFPRINTF(("multiple TS??"));
2919 pf_print_state(state
);
2922 REASON_SET(reason
, PFRES_TS
);
2925 if (opt
[1] >= TCPOLEN_TIMESTAMP
) {
2926 memcpy(&tsval
, &opt
[2],
2928 if (tsval
&& src
->scrub
&&
2929 (src
->scrub
->pfss_flags
&
2931 tsval
= ntohl(tsval
);
2932 pf_change_a(&opt
[2],
2935 src
->scrub
->pfss_ts_mod
),
2940 /* Modulate TS reply iff valid (!0) */
2941 memcpy(&tsecr
, &opt
[6],
2943 if (tsecr
&& dst
->scrub
&&
2944 (dst
->scrub
->pfss_flags
&
2946 tsecr
= ntohl(tsecr
)
2947 - dst
->scrub
->pfss_ts_mod
;
2948 pf_change_a(&opt
[6],
2949 &th
->th_sum
, htonl(tsecr
),
2957 hlen
-= MAX(opt
[1], 2);
2958 opt
+= MAX(opt
[1], 2);
2963 /* Copyback the options, caller copys back header */
2964 int optoff
= off
+ sizeof(*th
);
2965 int optlen
= (th
->th_off
<< 2) - sizeof(*th
);
2966 if (pf_lazy_makewritable(pd
, pbuf
, optoff
+ optlen
) ==
2968 REASON_SET(reason
, PFRES_MEMORY
);
2971 *writeback
= optoff
+ optlen
;
2972 pbuf_copy_back(pbuf
, optoff
, optlen
, hdr
+ sizeof(*th
));
2978 * Must invalidate PAWS checks on connections idle for too long.
2979 * The fastest allowed timestamp clock is 1ms. That turns out to
2980 * be about 24 days before it wraps. XXX Right now our lowerbound
2981 * TS echo check only works for the first 12 days of a connection
2982 * when the TS has exhausted half its 32bit space
2984 #define TS_MAX_IDLE (24*24*60*60)
2985 #define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */
2987 getmicrouptime(&uptime
);
2988 if (src
->scrub
&& (src
->scrub
->pfss_flags
& PFSS_PAWS
) &&
2989 (uptime
.tv_sec
- src
->scrub
->pfss_last
.tv_sec
> TS_MAX_IDLE
||
2990 pf_time_second() - state
->creation
> TS_MAX_CONN
)) {
2991 if (pf_status
.debug
>= PF_DEBUG_MISC
) {
2992 DPFPRINTF(("src idled out of PAWS\n"));
2993 pf_print_state(state
);
2996 src
->scrub
->pfss_flags
= (src
->scrub
->pfss_flags
& ~PFSS_PAWS
)
2999 if (dst
->scrub
&& (dst
->scrub
->pfss_flags
& PFSS_PAWS
) &&
3000 uptime
.tv_sec
- dst
->scrub
->pfss_last
.tv_sec
> TS_MAX_IDLE
) {
3001 if (pf_status
.debug
>= PF_DEBUG_MISC
) {
3002 DPFPRINTF(("dst idled out of PAWS\n"));
3003 pf_print_state(state
);
3006 dst
->scrub
->pfss_flags
= (dst
->scrub
->pfss_flags
& ~PFSS_PAWS
)
3010 if (got_ts
&& src
->scrub
&& dst
->scrub
&&
3011 (src
->scrub
->pfss_flags
& PFSS_PAWS
) &&
3012 (dst
->scrub
->pfss_flags
& PFSS_PAWS
)) {
3014 * Validate that the timestamps are "in-window".
3015 * RFC1323 describes TCP Timestamp options that allow
3016 * measurement of RTT (round trip time) and PAWS
3017 * (protection against wrapped sequence numbers). PAWS
3018 * gives us a set of rules for rejecting packets on
3019 * long fat pipes (packets that were somehow delayed
3020 * in transit longer than the time it took to send the
3021 * full TCP sequence space of 4Gb). We can use these
3022 * rules and infer a few others that will let us treat
3023 * the 32bit timestamp and the 32bit echoed timestamp
3024 * as sequence numbers to prevent a blind attacker from
3025 * inserting packets into a connection.
3028 * - The timestamp on this packet must be greater than
3029 * or equal to the last value echoed by the other
3030 * endpoint. The RFC says those will be discarded
3031 * since it is a dup that has already been acked.
3032 * This gives us a lowerbound on the timestamp.
3033 * timestamp >= other last echoed timestamp
3034 * - The timestamp will be less than or equal to
3035 * the last timestamp plus the time between the
3036 * last packet and now. The RFC defines the max
3037 * clock rate as 1ms. We will allow clocks to be
3038 * up to 10% fast and will allow a total difference
3039 * or 30 seconds due to a route change. And this
3040 * gives us an upperbound on the timestamp.
3041 * timestamp <= last timestamp + max ticks
3042 * We have to be careful here. Windows will send an
3043 * initial timestamp of zero and then initialize it
3044 * to a random value after the 3whs; presumably to
3045 * avoid a DoS by having to call an expensive RNG
3046 * during a SYN flood. Proof MS has at least one
3047 * good security geek.
3049 * - The TCP timestamp option must also echo the other
3050 * endpoints timestamp. The timestamp echoed is the
3051 * one carried on the earliest unacknowledged segment
3052 * on the left edge of the sequence window. The RFC
3053 * states that the host will reject any echoed
3054 * timestamps that were larger than any ever sent.
3055 * This gives us an upperbound on the TS echo.
3056 * tescr <= largest_tsval
3057 * - The lowerbound on the TS echo is a little more
3058 * tricky to determine. The other endpoint's echoed
3059 * values will not decrease. But there may be
3060 * network conditions that re-order packets and
3061 * cause our view of them to decrease. For now the
3062 * only lowerbound we can safely determine is that
3063 * the TS echo will never be less than the original
3064 * TS. XXX There is probably a better lowerbound.
3065 * Remove TS_MAX_CONN with better lowerbound check.
3066 * tescr >= other original TS
3068 * It is also important to note that the fastest
3069 * timestamp clock of 1ms will wrap its 32bit space in
3070 * 24 days. So we just disable TS checking after 24
3071 * days of idle time. We actually must use a 12d
3072 * connection limit until we can come up with a better
3073 * lowerbound to the TS echo check.
3075 struct timeval delta_ts
;
3080 * PFTM_TS_DIFF is how many seconds of leeway to allow
3081 * a host's timestamp. This can happen if the previous
3082 * packet got delayed in transit for much longer than
3085 if ((ts_fudge
= state
->rule
.ptr
->timeout
[PFTM_TS_DIFF
]) == 0) {
3086 ts_fudge
= pf_default_rule
.timeout
[PFTM_TS_DIFF
];
3090 /* Calculate max ticks since the last timestamp */
3091 #define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */
3092 #define TS_MICROSECS 1000000 /* microseconds per second */
3093 timersub(&uptime
, &src
->scrub
->pfss_last
, &delta_ts
);
3094 tsval_from_last
= (delta_ts
.tv_sec
+ ts_fudge
) * TS_MAXFREQ
;
3095 tsval_from_last
+= delta_ts
.tv_usec
/ (TS_MICROSECS
/ TS_MAXFREQ
);
3098 if ((src
->state
>= TCPS_ESTABLISHED
&&
3099 dst
->state
>= TCPS_ESTABLISHED
) &&
3100 (SEQ_LT(tsval
, dst
->scrub
->pfss_tsecr
) ||
3101 SEQ_GT(tsval
, src
->scrub
->pfss_tsval
+ tsval_from_last
) ||
3102 (tsecr
&& (SEQ_GT(tsecr
, dst
->scrub
->pfss_tsval
) ||
3103 SEQ_LT(tsecr
, dst
->scrub
->pfss_tsval0
))))) {
3105 * Bad RFC1323 implementation or an insertion attack.
3107 * - Solaris 2.6 and 2.7 are known to send another ACK
3108 * after the FIN,FIN|ACK,ACK closing that carries
3112 DPFPRINTF(("Timestamp failed %c%c%c%c\n",
3113 SEQ_LT(tsval
, dst
->scrub
->pfss_tsecr
) ? '0' : ' ',
3114 SEQ_GT(tsval
, src
->scrub
->pfss_tsval
+
3115 tsval_from_last
) ? '1' : ' ',
3116 SEQ_GT(tsecr
, dst
->scrub
->pfss_tsval
) ? '2' : ' ',
3117 SEQ_LT(tsecr
, dst
->scrub
->pfss_tsval0
)? '3' : ' '));
3118 DPFPRINTF((" tsval: %u tsecr: %u +ticks: %u "
3119 "idle: %lus %ums\n",
3120 tsval
, tsecr
, tsval_from_last
, delta_ts
.tv_sec
,
3121 delta_ts
.tv_usec
/ 1000));
3122 DPFPRINTF((" src->tsval: %u tsecr: %u\n",
3123 src
->scrub
->pfss_tsval
, src
->scrub
->pfss_tsecr
));
3124 DPFPRINTF((" dst->tsval: %u tsecr: %u tsval0: %u\n",
3125 dst
->scrub
->pfss_tsval
, dst
->scrub
->pfss_tsecr
,
3126 dst
->scrub
->pfss_tsval0
));
3127 if (pf_status
.debug
>= PF_DEBUG_MISC
) {
3128 pf_print_state(state
);
3129 pf_print_flags(th
->th_flags
);
3132 REASON_SET(reason
, PFRES_TS
);
3136 /* XXX I'd really like to require tsecr but it's optional */
3137 } else if (!got_ts
&& (th
->th_flags
& TH_RST
) == 0 &&
3138 ((src
->state
== TCPS_ESTABLISHED
&& dst
->state
== TCPS_ESTABLISHED
)
3139 || pd
->p_len
> 0 || (th
->th_flags
& TH_SYN
)) &&
3140 src
->scrub
&& dst
->scrub
&&
3141 (src
->scrub
->pfss_flags
& PFSS_PAWS
) &&
3142 (dst
->scrub
->pfss_flags
& PFSS_PAWS
)) {
3144 * Didn't send a timestamp. Timestamps aren't really useful
3146 * - connection opening or closing (often not even sent).
3147 * but we must not let an attacker to put a FIN on a
3148 * data packet to sneak it through our ESTABLISHED check.
3149 * - on a TCP reset. RFC suggests not even looking at TS.
3150 * - on an empty ACK. The TS will not be echoed so it will
3151 * probably not help keep the RTT calculation in sync and
3152 * there isn't as much danger when the sequence numbers
3153 * got wrapped. So some stacks don't include TS on empty
3156 * To minimize the disruption to mostly RFC1323 conformant
3157 * stacks, we will only require timestamps on data packets.
3159 * And what do ya know, we cannot require timestamps on data
3160 * packets. There appear to be devices that do legitimate
3161 * TCP connection hijacking. There are HTTP devices that allow
3162 * a 3whs (with timestamps) and then buffer the HTTP request.
3163 * If the intermediate device has the HTTP response cache, it
3164 * will spoof the response but not bother timestamping its
3165 * packets. So we can look for the presence of a timestamp in
3166 * the first data packet and if there, require it in all future
3170 if (pd
->p_len
> 0 && (src
->scrub
->pfss_flags
& PFSS_DATA_TS
)) {
3172 * Hey! Someone tried to sneak a packet in. Or the
3173 * stack changed its RFC1323 behavior?!?!
3175 if (pf_status
.debug
>= PF_DEBUG_MISC
) {
3176 DPFPRINTF(("Did not receive expected RFC1323 "
3178 pf_print_state(state
);
3179 pf_print_flags(th
->th_flags
);
3182 REASON_SET(reason
, PFRES_TS
);
3189 * We will note if a host sends his data packets with or without
3190 * timestamps. And require all data packets to contain a timestamp
3191 * if the first does. PAWS implicitly requires that all data packets be
3192 * timestamped. But I think there are middle-man devices that hijack
3193 * TCP streams immediately after the 3whs and don't timestamp their
3194 * packets (seen in a WWW accelerator or cache).
3196 if (pd
->p_len
> 0 && src
->scrub
&& (src
->scrub
->pfss_flags
&
3197 (PFSS_TIMESTAMP
| PFSS_DATA_TS
| PFSS_DATA_NOTS
)) == PFSS_TIMESTAMP
) {
3199 src
->scrub
->pfss_flags
|= PFSS_DATA_TS
;
3201 src
->scrub
->pfss_flags
|= PFSS_DATA_NOTS
;
3202 if (pf_status
.debug
>= PF_DEBUG_MISC
&& dst
->scrub
&&
3203 (dst
->scrub
->pfss_flags
& PFSS_TIMESTAMP
)) {
3204 /* Don't warn if other host rejected RFC1323 */
3205 DPFPRINTF(("Broken RFC1323 stack did not "
3206 "timestamp data packet. Disabled PAWS "
3208 pf_print_state(state
);
3209 pf_print_flags(th
->th_flags
);
3217 * Update PAWS values
3219 if (got_ts
&& src
->scrub
&& PFSS_TIMESTAMP
== (src
->scrub
->pfss_flags
&
3220 (PFSS_PAWS_IDLED
| PFSS_TIMESTAMP
))) {
3221 getmicrouptime(&src
->scrub
->pfss_last
);
3222 if (SEQ_GEQ(tsval
, src
->scrub
->pfss_tsval
) ||
3223 (src
->scrub
->pfss_flags
& PFSS_PAWS
) == 0) {
3224 src
->scrub
->pfss_tsval
= tsval
;
3228 if (SEQ_GEQ(tsecr
, src
->scrub
->pfss_tsecr
) ||
3229 (src
->scrub
->pfss_flags
& PFSS_PAWS
) == 0) {
3230 src
->scrub
->pfss_tsecr
= tsecr
;
3233 if ((src
->scrub
->pfss_flags
& PFSS_PAWS
) == 0 &&
3234 (SEQ_LT(tsval
, src
->scrub
->pfss_tsval0
) ||
3235 src
->scrub
->pfss_tsval0
== 0)) {
3236 /* tsval0 MUST be the lowest timestamp */
3237 src
->scrub
->pfss_tsval0
= tsval
;
3240 /* Only fully initialized after a TS gets echoed */
3241 if ((src
->scrub
->pfss_flags
& PFSS_PAWS
) == 0) {
3242 src
->scrub
->pfss_flags
|= PFSS_PAWS
;
3247 /* I have a dream.... TCP segment reassembly.... */
3252 pf_normalize_tcpopt(struct pf_rule
*r
, int dir
, struct pfi_kif
*kif
,
3253 struct pf_pdesc
*pd
, pbuf_t
*pbuf
, struct tcphdr
*th
, int off
,
3256 #pragma unused(dir, kif)
3257 sa_family_t af
= pd
->af
;
3260 int opt
, cnt
, optlen
= 0;
3262 u_char opts
[MAX_TCPOPTLEN
];
3263 u_char
*optp
= opts
;
3265 thoff
= th
->th_off
<< 2;
3266 cnt
= thoff
- sizeof(struct tcphdr
);
3268 if (cnt
> 0 && !pf_pull_hdr(pbuf
, off
+ sizeof(*th
), opts
, cnt
,
3273 for (; cnt
> 0; cnt
-= optlen
, optp
+= optlen
) {
3275 if (opt
== TCPOPT_EOL
) {
3278 if (opt
== TCPOPT_NOP
) {
3285 if (optlen
< 2 || optlen
> cnt
) {
3291 mss
= (u_int16_t
*)(void *)(optp
+ 2);
3292 if ((ntohs(*mss
)) > r
->max_mss
) {
3295 * Only do the TCP checksum fixup if delayed
3296 * checksum calculation will not be performed.
3299 !(*pbuf
->pb_csum_flags
& CSUM_TCP
)) {
3300 th
->th_sum
= pf_cksum_fixup(th
->th_sum
,
3301 *mss
, htons(r
->max_mss
), 0);
3303 *mss
= htons(r
->max_mss
);
3315 VERIFY(pbuf
== pd
->mp
);
3317 if (pf_lazy_makewritable(pd
, pd
->mp
,
3318 off
+ sizeof(*th
) + thoff
) == NULL
) {
3319 REASON_SET(&reason
, PFRES_MEMORY
);
3321 PFLOG_PACKET(kif
, h
, pbuf
, AF_INET
, dir
, reason
,
3328 pbuf_copy_back(pd
->mp
, off
+ sizeof(*th
), thoff
- sizeof(*th
), opts
);