]>
git.saurik.com Git - apple/xnu.git/blob - bsd/net/pf_norm.c
69283ce6ebe0b03ccd3f5eb489d11955faede15f
2 * Copyright (c) 2007-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 /* $apfw: pf_norm.c,v 1.10 2008/08/28 19:10:53 jhw Exp $ */
30 /* $OpenBSD: pf_norm.c,v 1.107 2006/04/16 00:59:52 pascoe Exp $ */
33 * Copyright 2001 Niels Provos <provos@citi.umich.edu>
34 * All rights reserved.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
45 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
46 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
47 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
48 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
49 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
50 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
51 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
52 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
53 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
54 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
57 #include <sys/param.h>
58 #include <sys/systm.h>
60 #include <sys/filio.h>
61 #include <sys/fcntl.h>
62 #include <sys/socket.h>
63 #include <sys/kernel.h>
65 #include <sys/random.h>
66 #include <sys/mcache.h>
69 #include <net/if_types.h>
71 #include <net/route.h>
72 #include <net/if_pflog.h>
74 #include <netinet/in.h>
75 #include <netinet/in_var.h>
76 #include <netinet/in_systm.h>
77 #include <netinet/ip.h>
78 #include <netinet/ip_var.h>
79 #include <netinet/tcp.h>
80 #include <netinet/tcp_seq.h>
81 #include <netinet/tcp_fsm.h>
82 #include <netinet/udp.h>
83 #include <netinet/ip_icmp.h>
86 #include <netinet/ip6.h>
89 #include <net/pfvar.h>
92 LIST_ENTRY(pf_frent
) fr_next
;
94 #define fr_ip fr_u.fru_ipv4
95 #define fr_ip6 fr_u.fru_ipv6
98 struct ip6_hdr
*fru_ipv6
;
100 struct ip6_frag fr_ip6f_opt
;
105 LIST_ENTRY(pf_frcache
) fr_next
;
110 #define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */
111 #define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */
112 #define PFFRAG_DROP 0x0004 /* Drop all fragments */
113 #define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER))
116 RB_ENTRY(pf_fragment
) fr_entry
;
117 TAILQ_ENTRY(pf_fragment
) frag_next
;
118 struct pf_addr fr_srcx
;
119 struct pf_addr fr_dstx
;
120 u_int8_t fr_p
; /* protocol of this fragment */
121 u_int8_t fr_flags
; /* status flags */
122 u_int16_t fr_max
; /* fragment data max */
123 #define fr_id fr_uid.fru_id4
124 #define fr_id6 fr_uid.fru_id6
130 u_int32_t fr_timeout
;
131 #define fr_queue fr_u.fru_queue
132 #define fr_cache fr_u.fru_cache
134 LIST_HEAD(pf_fragq
, pf_frent
) fru_queue
; /* buffering */
135 LIST_HEAD(pf_cacheq
, pf_frcache
) fru_cache
; /* non-buf */
139 static TAILQ_HEAD(pf_fragqueue
, pf_fragment
) pf_fragqueue
;
140 static TAILQ_HEAD(pf_cachequeue
, pf_fragment
) pf_cachequeue
;
142 static __inline
int pf_frag_compare(struct pf_fragment
*,
143 struct pf_fragment
*);
144 static RB_HEAD(pf_frag_tree
, pf_fragment
) pf_frag_tree
, pf_cache_tree
;
145 RB_PROTOTYPE_SC(__private_extern__
, pf_frag_tree
, pf_fragment
, fr_entry
,
147 RB_GENERATE(pf_frag_tree
, pf_fragment
, fr_entry
, pf_frag_compare
);
149 /* Private prototypes */
150 static void pf_ip6hdr2key(struct pf_fragment
*, struct ip6_hdr
*,
152 static void pf_ip2key(struct pf_fragment
*, struct ip
*);
153 static void pf_remove_fragment(struct pf_fragment
*);
154 static void pf_flush_fragments(void);
155 static void pf_free_fragment(struct pf_fragment
*);
156 static struct pf_fragment
*pf_find_fragment_by_key(struct pf_fragment
*,
157 struct pf_frag_tree
*);
158 static __inline
struct pf_fragment
*
159 pf_find_fragment_by_ipv4_header(struct ip
*, struct pf_frag_tree
*);
160 static __inline
struct pf_fragment
*
161 pf_find_fragment_by_ipv6_header(struct ip6_hdr
*, struct ip6_frag
*,
162 struct pf_frag_tree
*);
163 static struct mbuf
*pf_reassemble(struct mbuf
**, struct pf_fragment
**,
164 struct pf_frent
*, int);
165 static struct mbuf
*pf_fragcache(struct mbuf
**, struct ip
*,
166 struct pf_fragment
**, int, int, int *);
167 static struct mbuf
*pf_reassemble6(struct mbuf
**, struct pf_fragment
**,
168 struct pf_frent
*, int);
169 static struct mbuf
*pf_frag6cache(struct mbuf
**, struct ip6_hdr
*,
170 struct ip6_frag
*, struct pf_fragment
**, int, int, int, int *);
171 static int pf_normalize_tcpopt(struct pf_rule
*, int, struct pfi_kif
*,
172 struct pf_pdesc
*, struct mbuf
*, struct tcphdr
*, int, int *);
174 #define DPFPRINTF(x) do { \
175 if (pf_status.debug >= PF_DEBUG_MISC) { \
176 printf("%s: ", __func__); \
182 struct pool pf_frent_pl
, pf_frag_pl
;
183 static struct pool pf_cache_pl
, pf_cent_pl
;
184 struct pool pf_state_scrub_pl
;
186 static int pf_nfrents
, pf_ncache
;
189 pf_normalize_init(void)
191 pool_init(&pf_frent_pl
, sizeof (struct pf_frent
), 0, 0, 0, "pffrent",
193 pool_init(&pf_frag_pl
, sizeof (struct pf_fragment
), 0, 0, 0, "pffrag",
195 pool_init(&pf_cache_pl
, sizeof (struct pf_fragment
), 0, 0, 0,
197 pool_init(&pf_cent_pl
, sizeof (struct pf_frcache
), 0, 0, 0, "pffrcent",
199 pool_init(&pf_state_scrub_pl
, sizeof (struct pf_state_scrub
), 0, 0, 0,
202 pool_sethiwat(&pf_frag_pl
, PFFRAG_FRAG_HIWAT
);
203 pool_sethardlimit(&pf_frent_pl
, PFFRAG_FRENT_HIWAT
, NULL
, 0);
204 pool_sethardlimit(&pf_cache_pl
, PFFRAG_FRCACHE_HIWAT
, NULL
, 0);
205 pool_sethardlimit(&pf_cent_pl
, PFFRAG_FRCENT_HIWAT
, NULL
, 0);
207 TAILQ_INIT(&pf_fragqueue
);
208 TAILQ_INIT(&pf_cachequeue
);
213 pf_normalize_destroy(void)
215 pool_destroy(&pf_state_scrub_pl
);
216 pool_destroy(&pf_cent_pl
);
217 pool_destroy(&pf_cache_pl
);
218 pool_destroy(&pf_frag_pl
);
219 pool_destroy(&pf_frent_pl
);
224 pf_normalize_isempty(void)
226 return (TAILQ_EMPTY(&pf_fragqueue
) && TAILQ_EMPTY(&pf_cachequeue
));
230 pf_frag_compare(struct pf_fragment
*a
, struct pf_fragment
*b
)
234 if ((diff
= a
->fr_af
- b
->fr_af
))
236 else if ((diff
= a
->fr_p
- b
->fr_p
))
239 struct pf_addr
*sa
= &a
->fr_srcx
;
240 struct pf_addr
*sb
= &b
->fr_srcx
;
241 struct pf_addr
*da
= &a
->fr_dstx
;
242 struct pf_addr
*db
= &b
->fr_dstx
;
247 if ((diff
= a
->fr_id
- b
->fr_id
))
249 else if (sa
->v4
.s_addr
< sb
->v4
.s_addr
)
251 else if (sa
->v4
.s_addr
> sb
->v4
.s_addr
)
253 else if (da
->v4
.s_addr
< db
->v4
.s_addr
)
255 else if (da
->v4
.s_addr
> db
->v4
.s_addr
)
261 if ((diff
= a
->fr_id6
- b
->fr_id6
))
263 else if (sa
->addr32
[3] < sb
->addr32
[3])
265 else if (sa
->addr32
[3] > sb
->addr32
[3])
267 else if (sa
->addr32
[2] < sb
->addr32
[2])
269 else if (sa
->addr32
[2] > sb
->addr32
[2])
271 else if (sa
->addr32
[1] < sb
->addr32
[1])
273 else if (sa
->addr32
[1] > sb
->addr32
[1])
275 else if (sa
->addr32
[0] < sb
->addr32
[0])
277 else if (sa
->addr32
[0] > sb
->addr32
[0])
279 else if (da
->addr32
[3] < db
->addr32
[3])
281 else if (da
->addr32
[3] > db
->addr32
[3])
283 else if (da
->addr32
[2] < db
->addr32
[2])
285 else if (da
->addr32
[2] > db
->addr32
[2])
287 else if (da
->addr32
[1] < db
->addr32
[1])
289 else if (da
->addr32
[1] > db
->addr32
[1])
291 else if (da
->addr32
[0] < db
->addr32
[0])
293 else if (da
->addr32
[0] > db
->addr32
[0])
298 VERIFY(!0 && "only IPv4 and IPv6 supported!");
306 pf_purge_expired_fragments(void)
308 struct pf_fragment
*frag
;
309 u_int32_t expire
= pf_time_second() -
310 pf_default_rule
.timeout
[PFTM_FRAG
];
312 while ((frag
= TAILQ_LAST(&pf_fragqueue
, pf_fragqueue
)) != NULL
) {
313 VERIFY(BUFFER_FRAGMENTS(frag
));
314 if (frag
->fr_timeout
> expire
)
317 switch (frag
->fr_af
) {
319 DPFPRINTF(("expiring IPv4 %d(%p) from queue.\n",
320 ntohs(frag
->fr_id
), frag
));
323 DPFPRINTF(("expiring IPv6 %d(%p) from queue.\n",
324 ntohl(frag
->fr_id6
), frag
));
327 VERIFY(0 && "only IPv4 and IPv6 supported");
330 pf_free_fragment(frag
);
333 while ((frag
= TAILQ_LAST(&pf_cachequeue
, pf_cachequeue
)) != NULL
) {
334 VERIFY(!BUFFER_FRAGMENTS(frag
));
335 if (frag
->fr_timeout
> expire
)
338 switch (frag
->fr_af
) {
340 DPFPRINTF(("expiring IPv4 %d(%p) from cache.\n",
341 ntohs(frag
->fr_id
), frag
));
344 DPFPRINTF(("expiring IPv6 %d(%p) from cache.\n",
345 ntohl(frag
->fr_id6
), frag
));
348 VERIFY(0 && "only IPv4 and IPv6 supported");
351 pf_free_fragment(frag
);
352 VERIFY(TAILQ_EMPTY(&pf_cachequeue
) ||
353 TAILQ_LAST(&pf_cachequeue
, pf_cachequeue
) != frag
);
358 * Try to flush old fragments to make space for new ones
362 pf_flush_fragments(void)
364 struct pf_fragment
*frag
;
367 goal
= pf_nfrents
* 9 / 10;
368 DPFPRINTF(("trying to free > %d frents\n",
370 while (goal
< pf_nfrents
) {
371 frag
= TAILQ_LAST(&pf_fragqueue
, pf_fragqueue
);
374 pf_free_fragment(frag
);
378 goal
= pf_ncache
* 9 / 10;
379 DPFPRINTF(("trying to free > %d cache entries\n",
381 while (goal
< pf_ncache
) {
382 frag
= TAILQ_LAST(&pf_cachequeue
, pf_cachequeue
);
385 pf_free_fragment(frag
);
389 /* Frees the fragments and all associated entries */
392 pf_free_fragment(struct pf_fragment
*frag
)
394 struct pf_frent
*frent
;
395 struct pf_frcache
*frcache
;
397 /* Free all fragments */
398 if (BUFFER_FRAGMENTS(frag
)) {
399 for (frent
= LIST_FIRST(&frag
->fr_queue
); frent
;
400 frent
= LIST_FIRST(&frag
->fr_queue
)) {
401 LIST_REMOVE(frent
, fr_next
);
403 m_freem(frent
->fr_m
);
404 pool_put(&pf_frent_pl
, frent
);
408 for (frcache
= LIST_FIRST(&frag
->fr_cache
); frcache
;
409 frcache
= LIST_FIRST(&frag
->fr_cache
)) {
410 LIST_REMOVE(frcache
, fr_next
);
412 VERIFY(LIST_EMPTY(&frag
->fr_cache
) ||
413 LIST_FIRST(&frag
->fr_cache
)->fr_off
>
416 pool_put(&pf_cent_pl
, frcache
);
421 pf_remove_fragment(frag
);
425 pf_ip6hdr2key(struct pf_fragment
*key
, struct ip6_hdr
*ip6
,
428 key
->fr_p
= fh
->ip6f_nxt
;
429 key
->fr_id6
= fh
->ip6f_ident
;
430 key
->fr_af
= AF_INET6
;
431 key
->fr_srcx
.v6
= ip6
->ip6_src
;
432 key
->fr_dstx
.v6
= ip6
->ip6_dst
;
436 pf_ip2key(struct pf_fragment
*key
, struct ip
*ip
)
438 key
->fr_p
= ip
->ip_p
;
439 key
->fr_id
= ip
->ip_id
;
440 key
->fr_af
= AF_INET
;
441 key
->fr_srcx
.v4
.s_addr
= ip
->ip_src
.s_addr
;
442 key
->fr_dstx
.v4
.s_addr
= ip
->ip_dst
.s_addr
;
445 static struct pf_fragment
*
446 pf_find_fragment_by_key(struct pf_fragment
*key
, struct pf_frag_tree
*tree
)
448 struct pf_fragment
*frag
;
450 frag
= RB_FIND(pf_frag_tree
, tree
, key
);
452 /* XXX Are we sure we want to update the timeout? */
453 frag
->fr_timeout
= pf_time_second();
454 if (BUFFER_FRAGMENTS(frag
)) {
455 TAILQ_REMOVE(&pf_fragqueue
, frag
, frag_next
);
456 TAILQ_INSERT_HEAD(&pf_fragqueue
, frag
, frag_next
);
458 TAILQ_REMOVE(&pf_cachequeue
, frag
, frag_next
);
459 TAILQ_INSERT_HEAD(&pf_cachequeue
, frag
, frag_next
);
466 static __inline
struct pf_fragment
*
467 pf_find_fragment_by_ipv4_header(struct ip
*ip
, struct pf_frag_tree
*tree
)
469 struct pf_fragment key
;
471 return pf_find_fragment_by_key(&key
, tree
);
474 static __inline
struct pf_fragment
*
475 pf_find_fragment_by_ipv6_header(struct ip6_hdr
*ip6
, struct ip6_frag
*fh
,
476 struct pf_frag_tree
*tree
)
478 struct pf_fragment key
;
479 pf_ip6hdr2key(&key
, ip6
, fh
);
480 return pf_find_fragment_by_key(&key
, tree
);
483 /* Removes a fragment from the fragment queue and frees the fragment */
486 pf_remove_fragment(struct pf_fragment
*frag
)
488 if (BUFFER_FRAGMENTS(frag
)) {
489 RB_REMOVE(pf_frag_tree
, &pf_frag_tree
, frag
);
490 TAILQ_REMOVE(&pf_fragqueue
, frag
, frag_next
);
491 pool_put(&pf_frag_pl
, frag
);
493 RB_REMOVE(pf_frag_tree
, &pf_cache_tree
, frag
);
494 TAILQ_REMOVE(&pf_cachequeue
, frag
, frag_next
);
495 pool_put(&pf_cache_pl
, frag
);
499 #define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
501 pf_reassemble(struct mbuf
**m0
, struct pf_fragment
**frag
,
502 struct pf_frent
*frent
, int mff
)
504 struct mbuf
*m
= *m0
, *m2
;
505 struct pf_frent
*frea
, *next
;
506 struct pf_frent
*frep
= NULL
;
507 struct ip
*ip
= frent
->fr_ip
;
508 int hlen
= ip
->ip_hl
<< 2;
509 u_int16_t off
= (ntohs(ip
->ip_off
) & IP_OFFMASK
) << 3;
510 u_int16_t ip_len
= ntohs(ip
->ip_len
) - ip
->ip_hl
* 4;
511 u_int16_t fr_max
= ip_len
+ off
;
513 VERIFY(*frag
== NULL
|| BUFFER_FRAGMENTS(*frag
));
515 /* Strip off ip header */
519 /* Create a new reassembly queue for this packet */
521 *frag
= pool_get(&pf_frag_pl
, PR_NOWAIT
);
523 pf_flush_fragments();
524 *frag
= pool_get(&pf_frag_pl
, PR_NOWAIT
);
529 (*frag
)->fr_flags
= 0;
531 (*frag
)->fr_af
= AF_INET
;
532 (*frag
)->fr_srcx
.v4
= frent
->fr_ip
->ip_src
;
533 (*frag
)->fr_dstx
.v4
= frent
->fr_ip
->ip_dst
;
534 (*frag
)->fr_p
= frent
->fr_ip
->ip_p
;
535 (*frag
)->fr_id
= frent
->fr_ip
->ip_id
;
536 (*frag
)->fr_timeout
= pf_time_second();
537 LIST_INIT(&(*frag
)->fr_queue
);
539 RB_INSERT(pf_frag_tree
, &pf_frag_tree
, *frag
);
540 TAILQ_INSERT_HEAD(&pf_fragqueue
, *frag
, frag_next
);
542 /* We do not have a previous fragment */
548 * Find a fragment after the current one:
549 * - off contains the real shifted offset.
551 LIST_FOREACH(frea
, &(*frag
)->fr_queue
, fr_next
) {
552 if (FR_IP_OFF(frea
) > off
)
557 VERIFY(frep
!= NULL
|| frea
!= NULL
);
560 FR_IP_OFF(frep
) + ntohs(frep
->fr_ip
->ip_len
) - frep
->fr_ip
->ip_hl
*
564 precut
= FR_IP_OFF(frep
) + ntohs(frep
->fr_ip
->ip_len
) -
565 frep
->fr_ip
->ip_hl
* 4 - off
;
566 if (precut
>= ip_len
)
568 m_adj(frent
->fr_m
, precut
);
569 DPFPRINTF(("overlap -%d\n", precut
));
570 /* Enforce 8 byte boundaries */
571 ip
->ip_off
= htons(ntohs(ip
->ip_off
) + (precut
>> 3));
572 off
= (ntohs(ip
->ip_off
) & IP_OFFMASK
) << 3;
574 ip
->ip_len
= htons(ip_len
);
577 for (; frea
!= NULL
&& ip_len
+ off
> FR_IP_OFF(frea
);
581 aftercut
= ip_len
+ off
- FR_IP_OFF(frea
);
582 DPFPRINTF(("adjust overlap %d\n", aftercut
));
583 if (aftercut
< ntohs(frea
->fr_ip
->ip_len
) - frea
->fr_ip
->ip_hl
585 frea
->fr_ip
->ip_len
=
586 htons(ntohs(frea
->fr_ip
->ip_len
) - aftercut
);
587 frea
->fr_ip
->ip_off
= htons(ntohs(frea
->fr_ip
->ip_off
) +
589 m_adj(frea
->fr_m
, aftercut
);
593 /* This fragment is completely overlapped, lose it */
594 next
= LIST_NEXT(frea
, fr_next
);
596 LIST_REMOVE(frea
, fr_next
);
597 pool_put(&pf_frent_pl
, frea
);
602 /* Update maximum data size */
603 if ((*frag
)->fr_max
< fr_max
)
604 (*frag
)->fr_max
= fr_max
;
605 /* This is the last segment */
607 (*frag
)->fr_flags
|= PFFRAG_SEENLAST
;
610 LIST_INSERT_HEAD(&(*frag
)->fr_queue
, frent
, fr_next
);
612 LIST_INSERT_AFTER(frep
, frent
, fr_next
);
614 /* Check if we are completely reassembled */
615 if (!((*frag
)->fr_flags
& PFFRAG_SEENLAST
))
618 /* Check if we have all the data */
620 for (frep
= LIST_FIRST(&(*frag
)->fr_queue
); frep
; frep
= next
) {
621 next
= LIST_NEXT(frep
, fr_next
);
623 off
+= ntohs(frep
->fr_ip
->ip_len
) - frep
->fr_ip
->ip_hl
* 4;
624 if (off
< (*frag
)->fr_max
&&
625 (next
== NULL
|| FR_IP_OFF(next
) != off
)) {
626 DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
627 off
, next
== NULL
? -1 : FR_IP_OFF(next
),
632 DPFPRINTF(("%d < %d?\n", off
, (*frag
)->fr_max
));
633 if (off
< (*frag
)->fr_max
)
636 /* We have all the data */
637 frent
= LIST_FIRST(&(*frag
)->fr_queue
);
638 VERIFY(frent
!= NULL
);
639 if ((frent
->fr_ip
->ip_hl
<< 2) + off
> IP_MAXPACKET
) {
640 DPFPRINTF(("drop: too big: %d\n", off
));
641 pf_free_fragment(*frag
);
645 next
= LIST_NEXT(frent
, fr_next
);
647 /* Magic from ip_input */
653 pool_put(&pf_frent_pl
, frent
);
655 for (frent
= next
; frent
!= NULL
; frent
= next
) {
656 next
= LIST_NEXT(frent
, fr_next
);
659 pool_put(&pf_frent_pl
, frent
);
664 ip
->ip_src
= (*frag
)->fr_srcx
.v4
;
665 ip
->ip_dst
= (*frag
)->fr_dstx
.v4
;
667 /* Remove from fragment queue */
668 pf_remove_fragment(*frag
);
671 hlen
= ip
->ip_hl
<< 2;
672 ip
->ip_len
= htons(off
+ hlen
);
676 /* some debugging cruft by sklower, below, will go away soon */
677 /* XXX this should be done elsewhere */
678 if (m
->m_flags
& M_PKTHDR
) {
680 for (m2
= m
; m2
; m2
= m2
->m_next
)
682 m
->m_pkthdr
.len
= plen
;
685 DPFPRINTF(("complete: %p(%d)\n", m
, ntohs(ip
->ip_len
)));
689 /* Oops - fail safe - drop packet */
690 pool_put(&pf_frent_pl
, frent
);
697 pf_fragcache(struct mbuf
**m0
, struct ip
*h
, struct pf_fragment
**frag
, int mff
,
698 int drop
, int *nomem
)
700 struct mbuf
*m
= *m0
;
701 struct pf_frcache
*frp
, *fra
, *cur
= NULL
;
702 int ip_len
= ntohs(h
->ip_len
) - (h
->ip_hl
<< 2);
703 u_int16_t off
= ntohs(h
->ip_off
) << 3;
704 u_int16_t fr_max
= ip_len
+ off
;
707 VERIFY(*frag
== NULL
|| !BUFFER_FRAGMENTS(*frag
));
709 /* Create a new range queue for this packet */
711 *frag
= pool_get(&pf_cache_pl
, PR_NOWAIT
);
713 pf_flush_fragments();
714 *frag
= pool_get(&pf_cache_pl
, PR_NOWAIT
);
719 /* Get an entry for the queue */
720 cur
= pool_get(&pf_cent_pl
, PR_NOWAIT
);
722 pool_put(&pf_cache_pl
, *frag
);
728 (*frag
)->fr_flags
= PFFRAG_NOBUFFER
;
730 (*frag
)->fr_af
= AF_INET
;
731 (*frag
)->fr_srcx
.v4
= h
->ip_src
;
732 (*frag
)->fr_dstx
.v4
= h
->ip_dst
;
733 (*frag
)->fr_p
= h
->ip_p
;
734 (*frag
)->fr_id
= h
->ip_id
;
735 (*frag
)->fr_timeout
= pf_time_second();
738 cur
->fr_end
= fr_max
;
739 LIST_INIT(&(*frag
)->fr_cache
);
740 LIST_INSERT_HEAD(&(*frag
)->fr_cache
, cur
, fr_next
);
742 RB_INSERT(pf_frag_tree
, &pf_cache_tree
, *frag
);
743 TAILQ_INSERT_HEAD(&pf_cachequeue
, *frag
, frag_next
);
745 DPFPRINTF(("fragcache[%d]: new %d-%d\n", h
->ip_id
, off
,
752 * Find a fragment after the current one:
753 * - off contains the real shifted offset.
756 LIST_FOREACH(fra
, &(*frag
)->fr_cache
, fr_next
) {
757 if (fra
->fr_off
> off
)
762 VERIFY(frp
!= NULL
|| fra
!= NULL
);
767 precut
= frp
->fr_end
- off
;
768 if (precut
>= ip_len
) {
769 /* Fragment is entirely a duplicate */
770 DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
771 h
->ip_id
, frp
->fr_off
, frp
->fr_end
, off
, fr_max
));
775 /* They are adjacent. Fixup cache entry */
776 DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
777 h
->ip_id
, frp
->fr_off
, frp
->fr_end
, off
, fr_max
));
778 frp
->fr_end
= fr_max
;
779 } else if (precut
> 0) {
781 * The first part of this payload overlaps with a
782 * fragment that has already been passed.
783 * Need to trim off the first part of the payload.
784 * But to do so easily, we need to create another
785 * mbuf to throw the original header into.
788 DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
789 h
->ip_id
, precut
, frp
->fr_off
, frp
->fr_end
, off
,
794 /* Update the previous frag to encompass this one */
795 frp
->fr_end
= fr_max
;
799 * XXX Optimization opportunity
800 * This is a very heavy way to trim the payload.
801 * we could do it much faster by diddling mbuf
802 * internals but that would be even less legible
803 * than this mbuf magic. For my next trick,
804 * I'll pull a rabbit out of my laptop.
806 *m0
= m_copym(m
, 0, h
->ip_hl
<< 2, M_NOWAIT
);
809 VERIFY((*m0
)->m_next
== NULL
);
810 m_adj(m
, precut
+ (h
->ip_hl
<< 2));
813 if (m
->m_flags
& M_PKTHDR
) {
816 for (t
= m
; t
; t
= t
->m_next
)
818 m
->m_pkthdr
.len
= plen
;
822 h
= mtod(m
, struct ip
*);
825 VERIFY((int)m
->m_len
==
826 ntohs(h
->ip_len
) - precut
);
827 h
->ip_off
= htons(ntohs(h
->ip_off
) +
829 h
->ip_len
= htons(ntohs(h
->ip_len
) - precut
);
834 /* There is a gap between fragments */
836 DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
837 h
->ip_id
, -precut
, frp
->fr_off
, frp
->fr_end
, off
,
840 cur
= pool_get(&pf_cent_pl
, PR_NOWAIT
);
846 cur
->fr_end
= fr_max
;
847 LIST_INSERT_AFTER(frp
, cur
, fr_next
);
855 aftercut
= fr_max
- fra
->fr_off
;
857 /* Adjacent fragments */
858 DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
859 h
->ip_id
, off
, fr_max
, fra
->fr_off
, fra
->fr_end
));
862 } else if (aftercut
> 0) {
863 /* Need to chop off the tail of this fragment */
864 DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
865 h
->ip_id
, aftercut
, off
, fr_max
, fra
->fr_off
,
874 if (m
->m_flags
& M_PKTHDR
) {
877 for (t
= m
; t
; t
= t
->m_next
)
879 m
->m_pkthdr
.len
= plen
;
881 h
= mtod(m
, struct ip
*);
882 VERIFY((int)m
->m_len
==
883 ntohs(h
->ip_len
) - aftercut
);
884 h
->ip_len
= htons(ntohs(h
->ip_len
) - aftercut
);
888 } else if (frp
== NULL
) {
889 /* There is a gap between fragments */
890 DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
891 h
->ip_id
, -aftercut
, off
, fr_max
, fra
->fr_off
,
894 cur
= pool_get(&pf_cent_pl
, PR_NOWAIT
);
900 cur
->fr_end
= fr_max
;
901 LIST_INSERT_BEFORE(fra
, cur
, fr_next
);
905 /* Need to glue together two separate fragment descriptors */
907 if (cur
&& fra
->fr_off
<= cur
->fr_end
) {
908 /* Need to merge in a previous 'cur' */
909 DPFPRINTF(("fragcache[%d]: adjacent(merge "
910 "%d-%d) %d-%d (%d-%d)\n",
911 h
->ip_id
, cur
->fr_off
, cur
->fr_end
, off
,
912 fr_max
, fra
->fr_off
, fra
->fr_end
));
913 fra
->fr_off
= cur
->fr_off
;
914 LIST_REMOVE(cur
, fr_next
);
915 pool_put(&pf_cent_pl
, cur
);
919 } else if (frp
&& fra
->fr_off
<= frp
->fr_end
) {
920 /* Need to merge in a modified 'frp' */
922 DPFPRINTF(("fragcache[%d]: adjacent(merge "
923 "%d-%d) %d-%d (%d-%d)\n",
924 h
->ip_id
, frp
->fr_off
, frp
->fr_end
, off
,
925 fr_max
, fra
->fr_off
, fra
->fr_end
));
926 fra
->fr_off
= frp
->fr_off
;
927 LIST_REMOVE(frp
, fr_next
);
928 pool_put(&pf_cent_pl
, frp
);
938 * We must keep tracking the overall fragment even when
939 * we're going to drop it anyway so that we know when to
940 * free the overall descriptor. Thus we drop the frag late.
947 /* Update maximum data size */
948 if ((*frag
)->fr_max
< fr_max
)
949 (*frag
)->fr_max
= fr_max
;
951 /* This is the last segment */
953 (*frag
)->fr_flags
|= PFFRAG_SEENLAST
;
955 /* Check if we are completely reassembled */
956 if (((*frag
)->fr_flags
& PFFRAG_SEENLAST
) &&
957 LIST_FIRST(&(*frag
)->fr_cache
)->fr_off
== 0 &&
958 LIST_FIRST(&(*frag
)->fr_cache
)->fr_end
== (*frag
)->fr_max
) {
959 /* Remove from fragment queue */
960 DPFPRINTF(("fragcache[%d]: done 0-%d\n", h
->ip_id
,
962 pf_free_fragment(*frag
);
971 /* Still need to pay attention to !IP_MF */
972 if (!mff
&& *frag
!= NULL
)
973 (*frag
)->fr_flags
|= PFFRAG_SEENLAST
;
980 /* Still need to pay attention to !IP_MF */
981 if (!mff
&& *frag
!= NULL
)
982 (*frag
)->fr_flags
|= PFFRAG_SEENLAST
;
985 /* This fragment has been deemed bad. Don't reass */
986 if (((*frag
)->fr_flags
& PFFRAG_DROP
) == 0)
987 DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
989 (*frag
)->fr_flags
|= PFFRAG_DROP
;
996 #define FR_IP6_OFF(fr) \
997 (ntohs((fr)->fr_ip6f_opt.ip6f_offlg & IP6F_OFF_MASK))
998 #define FR_IP6_PLEN(fr) (ntohs((fr)->fr_ip6->ip6_plen))
1000 pf_reassemble6(struct mbuf
**m0
, struct pf_fragment
**frag
,
1001 struct pf_frent
*frent
, int mff
)
1003 struct mbuf
*m
, *m2
;
1004 struct pf_frent
*frea
, *frep
, *next
;
1005 struct ip6_hdr
*ip6
;
1006 int plen
, off
, fr_max
;
1008 VERIFY(*frag
== NULL
|| BUFFER_FRAGMENTS(*frag
));
1011 ip6
= frent
->fr_ip6
;
1012 off
= FR_IP6_OFF(frent
);
1013 plen
= FR_IP6_PLEN(frent
);
1014 fr_max
= off
+ plen
- (frent
->fr_ip6f_hlen
- sizeof *ip6
);
1016 DPFPRINTF(("%p IPv6 frag plen %u off %u fr_ip6f_hlen %u fr_max %u m_len %u\n", m
,
1017 plen
, off
, frent
->fr_ip6f_hlen
, fr_max
, m
->m_len
));
1019 /* strip off headers up to the fragment payload */
1020 m
->m_data
+= frent
->fr_ip6f_hlen
;
1021 m
->m_len
-= frent
->fr_ip6f_hlen
;
1023 /* Create a new reassembly queue for this packet */
1024 if (*frag
== NULL
) {
1025 *frag
= pool_get(&pf_frag_pl
, PR_NOWAIT
);
1026 if (*frag
== NULL
) {
1027 pf_flush_fragments();
1028 *frag
= pool_get(&pf_frag_pl
, PR_NOWAIT
);
1033 (*frag
)->fr_flags
= 0;
1034 (*frag
)->fr_max
= 0;
1035 (*frag
)->fr_af
= AF_INET6
;
1036 (*frag
)->fr_srcx
.v6
= frent
->fr_ip6
->ip6_src
;
1037 (*frag
)->fr_dstx
.v6
= frent
->fr_ip6
->ip6_dst
;
1038 (*frag
)->fr_p
= frent
->fr_ip6f_opt
.ip6f_nxt
;
1039 (*frag
)->fr_id6
= frent
->fr_ip6f_opt
.ip6f_ident
;
1040 (*frag
)->fr_timeout
= pf_time_second();
1041 LIST_INIT(&(*frag
)->fr_queue
);
1043 RB_INSERT(pf_frag_tree
, &pf_frag_tree
, *frag
);
1044 TAILQ_INSERT_HEAD(&pf_fragqueue
, *frag
, frag_next
);
1046 /* We do not have a previous fragment */
1052 * Find a fragment after the current one:
1053 * - off contains the real shifted offset.
1055 LIST_FOREACH(frea
, &(*frag
)->fr_queue
, fr_next
) {
1056 if (FR_IP6_OFF(frea
) > off
)
1061 VERIFY(frep
!= NULL
|| frea
!= NULL
);
1064 FR_IP6_OFF(frep
) + FR_IP6_PLEN(frep
) - frep
->fr_ip6f_hlen
> off
)
1068 precut
= FR_IP6_OFF(frep
) + FR_IP6_PLEN(frep
) -
1069 frep
->fr_ip6f_hlen
- off
;
1072 m_adj(frent
->fr_m
, precut
);
1073 DPFPRINTF(("overlap -%d\n", precut
));
1074 /* Enforce 8 byte boundaries */
1075 frent
->fr_ip6f_opt
.ip6f_offlg
=
1076 htons(ntohs(frent
->fr_ip6f_opt
.ip6f_offlg
) +
1078 off
= FR_IP6_OFF(frent
);
1080 ip6
->ip6_plen
= htons(plen
);
1083 for (; frea
!= NULL
&& plen
+ off
> FR_IP6_OFF(frea
); frea
= next
) {
1086 aftercut
= plen
+ off
- FR_IP6_OFF(frea
);
1087 DPFPRINTF(("adjust overlap %d\n", aftercut
));
1088 if (aftercut
< FR_IP6_PLEN(frea
) - frea
->fr_ip6f_hlen
) {
1089 frea
->fr_ip6
->ip6_plen
= htons(FR_IP6_PLEN(frea
) -
1091 frea
->fr_ip6f_opt
.ip6f_offlg
=
1092 htons(ntohs(frea
->fr_ip6f_opt
.ip6f_offlg
) +
1094 m_adj(frea
->fr_m
, aftercut
);
1098 /* This fragment is completely overlapped, lose it */
1099 next
= LIST_NEXT(frea
, fr_next
);
1100 m_freem(frea
->fr_m
);
1101 LIST_REMOVE(frea
, fr_next
);
1102 pool_put(&pf_frent_pl
, frea
);
1107 /* Update maximum data size */
1108 if ((*frag
)->fr_max
< fr_max
)
1109 (*frag
)->fr_max
= fr_max
;
1110 /* This is the last segment */
1112 (*frag
)->fr_flags
|= PFFRAG_SEENLAST
;
1115 LIST_INSERT_HEAD(&(*frag
)->fr_queue
, frent
, fr_next
);
1117 LIST_INSERT_AFTER(frep
, frent
, fr_next
);
1119 /* Check if we are completely reassembled */
1120 if (!((*frag
)->fr_flags
& PFFRAG_SEENLAST
))
1123 /* Check if we have all the data */
1125 for (frep
= LIST_FIRST(&(*frag
)->fr_queue
); frep
; frep
= next
) {
1126 next
= LIST_NEXT(frep
, fr_next
);
1127 off
+= FR_IP6_PLEN(frep
) - (frent
->fr_ip6f_hlen
- sizeof *ip6
);
1128 DPFPRINTF(("frep at %d, next %d, max %d\n",
1129 off
, next
== NULL
? -1 : FR_IP6_OFF(next
),
1131 if (off
< (*frag
)->fr_max
&&
1132 (next
== NULL
|| FR_IP6_OFF(next
) != off
)) {
1133 DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
1134 off
, next
== NULL
? -1 : FR_IP6_OFF(next
),
1139 DPFPRINTF(("%d < %d?\n", off
, (*frag
)->fr_max
));
1140 if (off
< (*frag
)->fr_max
)
1143 /* We have all the data */
1144 frent
= LIST_FIRST(&(*frag
)->fr_queue
);
1145 VERIFY(frent
!= NULL
);
1146 if (frent
->fr_ip6f_hlen
+ off
> IP_MAXPACKET
) {
1147 DPFPRINTF(("drop: too big: %d\n", off
));
1148 pf_free_fragment(*frag
);
1153 ip6
= frent
->fr_ip6
;
1154 ip6
->ip6_nxt
= (*frag
)->fr_p
;
1155 ip6
->ip6_plen
= htons(off
);
1156 ip6
->ip6_src
= (*frag
)->fr_srcx
.v6
;
1157 ip6
->ip6_dst
= (*frag
)->fr_dstx
.v6
;
1159 /* Remove from fragment queue */
1160 pf_remove_fragment(*frag
);
1164 m
->m_len
+= sizeof(struct ip6_hdr
);
1165 m
->m_data
-= sizeof(struct ip6_hdr
);
1166 memmove(m
->m_data
, ip6
, sizeof(struct ip6_hdr
));
1168 next
= LIST_NEXT(frent
, fr_next
);
1169 pool_put(&pf_frent_pl
, frent
);
1171 for (frent
= next
; next
!= NULL
; frent
= next
) {
1175 next
= LIST_NEXT(frent
, fr_next
);
1176 pool_put(&pf_frent_pl
, frent
);
1180 /* XXX this should be done elsewhere */
1181 if (m
->m_flags
& M_PKTHDR
) {
1183 for (m2
= m
; m2
; m2
= m2
->m_next
)
1184 pktlen
+= m2
->m_len
;
1185 m
->m_pkthdr
.len
= pktlen
;
1188 DPFPRINTF(("complete: %p ip6_plen %d m_pkthdr.len %d\n",
1189 m
, ntohs(ip6
->ip6_plen
), m
->m_pkthdr
.len
));
1194 /* Oops - fail safe - drop packet */
1195 pool_put(&pf_frent_pl
, frent
);
1201 static struct mbuf
*
1202 pf_frag6cache(struct mbuf
**m0
, struct ip6_hdr
*h
, struct ip6_frag
*fh
,
1203 struct pf_fragment
**frag
, int hlen
, int mff
, int drop
, int *nomem
)
1205 struct mbuf
*m
= *m0
;
1206 u_int16_t plen
, off
, fr_max
;
1207 struct pf_frcache
*frp
, *fra
, *cur
= NULL
;
1210 VERIFY(*frag
== NULL
|| !BUFFER_FRAGMENTS(*frag
));
1212 off
= ntohs(fh
->ip6f_offlg
& IP6F_OFF_MASK
);
1213 plen
= ntohs(h
->ip6_plen
) - (hlen
- sizeof *h
);
1216 * Apple Modification: dimambro@apple.com. The hlen, being passed
1217 * into this function Includes all the headers associated with
1218 * the packet, and may include routing headers, so to get to
1219 * the data payload as stored in the original IPv6 header we need
1220 * to subtract al those headers and the IP header.
1222 * The 'max' local variable should also contain the offset from the start
1223 * of the reassembled packet to the octet just past the end of the octets
1224 * in the current fragment where:
1225 * - 'off' is the offset from the start of the reassembled packet to the
1226 * first octet in the fragment,
1227 * - 'plen' is the length of the "payload data length" Excluding all the
1228 * IPv6 headers of the fragment.
1229 * - 'hlen' is computed in pf_normalize_ip6() as the offset from the start
1230 * of the IPv6 packet to the beginning of the data.
1232 fr_max
= off
+ plen
;
1234 DPFPRINTF(("%p plen %u off %u fr_max %u\n", m
,
1235 plen
, off
, fr_max
));
1237 /* Create a new range queue for this packet */
1238 if (*frag
== NULL
) {
1239 *frag
= pool_get(&pf_cache_pl
, PR_NOWAIT
);
1240 if (*frag
== NULL
) {
1241 pf_flush_fragments();
1242 *frag
= pool_get(&pf_cache_pl
, PR_NOWAIT
);
1247 /* Get an entry for the queue */
1248 cur
= pool_get(&pf_cent_pl
, PR_NOWAIT
);
1250 pool_put(&pf_cache_pl
, *frag
);
1256 (*frag
)->fr_flags
= PFFRAG_NOBUFFER
;
1257 (*frag
)->fr_max
= 0;
1258 (*frag
)->fr_af
= AF_INET6
;
1259 (*frag
)->fr_srcx
.v6
= h
->ip6_src
;
1260 (*frag
)->fr_dstx
.v6
= h
->ip6_dst
;
1261 (*frag
)->fr_p
= fh
->ip6f_nxt
;
1262 (*frag
)->fr_id6
= fh
->ip6f_ident
;
1263 (*frag
)->fr_timeout
= pf_time_second();
1266 cur
->fr_end
= fr_max
;
1267 LIST_INIT(&(*frag
)->fr_cache
);
1268 LIST_INSERT_HEAD(&(*frag
)->fr_cache
, cur
, fr_next
);
1270 RB_INSERT(pf_frag_tree
, &pf_cache_tree
, *frag
);
1271 TAILQ_INSERT_HEAD(&pf_cachequeue
, *frag
, frag_next
);
1273 DPFPRINTF(("frag6cache[%d]: new %d-%d\n", ntohl(fh
->ip6f_ident
),
1280 * Find a fragment after the current one:
1281 * - off contains the real shifted offset.
1284 LIST_FOREACH(fra
, &(*frag
)->fr_cache
, fr_next
) {
1285 if (fra
->fr_off
> off
)
1290 VERIFY(frp
!= NULL
|| fra
!= NULL
);
1295 precut
= frp
->fr_end
- off
;
1296 if (precut
>= plen
) {
1297 /* Fragment is entirely a duplicate */
1298 DPFPRINTF(("frag6cache[%u]: dead (%d-%d) %d-%d\n",
1299 ntohl(fh
->ip6f_ident
), frp
->fr_off
, frp
->fr_end
,
1304 /* They are adjacent. Fixup cache entry */
1305 DPFPRINTF(("frag6cache[%u]: adjacent (%d-%d) %d-%d\n",
1306 ntohl(fh
->ip6f_ident
), frp
->fr_off
, frp
->fr_end
,
1308 frp
->fr_end
= fr_max
;
1309 } else if (precut
> 0) {
1310 /* The first part of this payload overlaps with a
1311 * fragment that has already been passed.
1312 * Need to trim off the first part of the payload.
1313 * But to do so easily, we need to create another
1314 * mbuf to throw the original header into.
1317 DPFPRINTF(("frag6cache[%u]: chop %d (%d-%d) %d-%d\n",
1318 ntohl(fh
->ip6f_ident
), precut
, frp
->fr_off
,
1319 frp
->fr_end
, off
, fr_max
));
1323 /* Update the previous frag to encompass this one */
1324 frp
->fr_end
= fr_max
;
1327 /* XXX Optimization opportunity
1328 * This is a very heavy way to trim the payload.
1329 * we could do it much faster by diddling mbuf
1330 * internals but that would be even less legible
1331 * than this mbuf magic. For my next trick,
1332 * I'll pull a rabbit out of my laptop.
1334 *m0
= m_copym(m
, 0, hlen
, M_NOWAIT
);
1337 VERIFY((*m0
)->m_next
== NULL
);
1338 m_adj(m
, precut
+ hlen
);
1341 if (m
->m_flags
& M_PKTHDR
) {
1344 for (t
= m
; t
; t
= t
->m_next
)
1346 m
->m_pkthdr
.len
= pktlen
;
1349 h
= mtod(m
, struct ip6_hdr
*);
1351 VERIFY((int)m
->m_len
==
1352 ntohs(h
->ip6_plen
) - precut
);
1353 fh
->ip6f_offlg
&= ~IP6F_OFF_MASK
;
1355 htons(ntohs(fh
->ip6f_offlg
& IP6F_OFF_MASK
)
1357 h
->ip6_plen
= htons(ntohs(h
->ip6_plen
) -
1363 /* There is a gap between fragments */
1365 DPFPRINTF(("frag6cache[%u]: gap %d (%d-%d) %d-%d\n",
1366 ntohl(fh
->ip6f_ident
), -precut
, frp
->fr_off
,
1367 frp
->fr_end
, off
, fr_max
));
1369 cur
= pool_get(&pf_cent_pl
, PR_NOWAIT
);
1375 cur
->fr_end
= fr_max
;
1376 LIST_INSERT_AFTER(frp
, cur
, fr_next
);
1384 aftercut
= fr_max
- fra
->fr_off
;
1385 if (aftercut
== 0) {
1386 /* Adjacent fragments */
1387 DPFPRINTF(("frag6cache[%u]: adjacent %d-%d (%d-%d)\n",
1388 ntohl(fh
->ip6f_ident
), off
, fr_max
, fra
->fr_off
,
1392 } else if (aftercut
> 0) {
1393 /* Need to chop off the tail of this fragment */
1394 DPFPRINTF(("frag6cache[%u]: chop %d %d-%d (%d-%d)\n",
1395 ntohl(fh
->ip6f_ident
), aftercut
, off
, fr_max
,
1396 fra
->fr_off
, fra
->fr_end
));
1403 m_adj(m
, -aftercut
);
1404 if (m
->m_flags
& M_PKTHDR
) {
1407 for (t
= m
; t
; t
= t
->m_next
)
1409 m
->m_pkthdr
.len
= pktlen
;
1411 h
= mtod(m
, struct ip6_hdr
*);
1412 VERIFY((int)m
->m_len
==
1413 ntohs(h
->ip6_plen
) - aftercut
);
1415 htons(ntohs(h
->ip6_plen
) - aftercut
);
1419 } else if (frp
== NULL
) {
1420 /* There is a gap between fragments */
1421 DPFPRINTF(("frag6cache[%u]: gap %d %d-%d (%d-%d)\n",
1422 ntohl(fh
->ip6f_ident
), -aftercut
, off
, fr_max
,
1423 fra
->fr_off
, fra
->fr_end
));
1425 cur
= pool_get(&pf_cent_pl
, PR_NOWAIT
);
1431 cur
->fr_end
= fr_max
;
1432 LIST_INSERT_BEFORE(fra
, cur
, fr_next
);
1435 /* Need to glue together two separate fragment descriptors */
1437 if (cur
&& fra
->fr_off
<= cur
->fr_end
) {
1438 /* Need to merge in a previous 'cur' */
1439 DPFPRINTF(("frag6cache[%u]: adjacent(merge "
1440 "%d-%d) %d-%d (%d-%d)\n",
1441 ntohl(fh
->ip6f_ident
), cur
->fr_off
,
1442 cur
->fr_end
, off
, fr_max
, fra
->fr_off
,
1444 fra
->fr_off
= cur
->fr_off
;
1445 LIST_REMOVE(cur
, fr_next
);
1446 pool_put(&pf_cent_pl
, cur
);
1449 } else if (frp
&& fra
->fr_off
<= frp
->fr_end
) {
1450 /* Need to merge in a modified 'frp' */
1451 VERIFY(cur
== NULL
);
1452 DPFPRINTF(("frag6cache[%u]: adjacent(merge "
1453 "%d-%d) %d-%d (%d-%d)\n",
1454 ntohl(fh
->ip6f_ident
), frp
->fr_off
,
1455 frp
->fr_end
, off
, fr_max
, fra
->fr_off
,
1457 fra
->fr_off
= frp
->fr_off
;
1458 LIST_REMOVE(frp
, fr_next
);
1459 pool_put(&pf_cent_pl
, frp
);
1468 * We must keep tracking the overall fragment even when
1469 * we're going to drop it anyway so that we know when to
1470 * free the overall descriptor. Thus we drop the frag late.
1476 /* Update maximum data size */
1477 if ((*frag
)->fr_max
< fr_max
)
1478 (*frag
)->fr_max
= fr_max
;
1480 /* This is the last segment */
1482 (*frag
)->fr_flags
|= PFFRAG_SEENLAST
;
1484 /* Check if we are completely reassembled */
1485 if (((*frag
)->fr_flags
& PFFRAG_SEENLAST
) &&
1486 LIST_FIRST(&(*frag
)->fr_cache
)->fr_off
== 0 &&
1487 LIST_FIRST(&(*frag
)->fr_cache
)->fr_end
== (*frag
)->fr_max
) {
1488 /* Remove from fragment queue */
1489 DPFPRINTF(("frag6cache[%u]: done 0-%d\n",
1490 ntohl(fh
->ip6f_ident
), (*frag
)->fr_max
));
1491 pf_free_fragment(*frag
);
1500 /* Still need to pay attention to !IP_MF */
1501 if (!mff
&& *frag
!= NULL
)
1502 (*frag
)->fr_flags
|= PFFRAG_SEENLAST
;
1509 /* Still need to pay attention to !IP_MF */
1510 if (!mff
&& *frag
!= NULL
)
1511 (*frag
)->fr_flags
|= PFFRAG_SEENLAST
;
1514 /* This fragment has been deemed bad. Don't reass */
1515 if (((*frag
)->fr_flags
& PFFRAG_DROP
) == 0)
1516 DPFPRINTF(("frag6cache[%u]: dropping overall fragment\n",
1517 ntohl(fh
->ip6f_ident
)));
1518 (*frag
)->fr_flags
|= PFFRAG_DROP
;
1526 pf_normalize_ip(struct mbuf
**m0
, int dir
, struct pfi_kif
*kif
, u_short
*reason
,
1527 struct pf_pdesc
*pd
)
1529 struct mbuf
*m
= *m0
;
1531 struct pf_frent
*frent
;
1532 struct pf_fragment
*frag
= NULL
;
1533 struct ip
*h
= mtod(m
, struct ip
*);
1534 int mff
= (ntohs(h
->ip_off
) & IP_MF
);
1535 int hlen
= h
->ip_hl
<< 2;
1536 u_int16_t fragoff
= (ntohs(h
->ip_off
) & IP_OFFMASK
) << 3;
1541 struct pf_ruleset
*ruleset
= NULL
;
1543 r
= TAILQ_FIRST(pf_main_ruleset
.rules
[PF_RULESET_SCRUB
].active
.ptr
);
1546 if (pfi_kif_match(r
->kif
, kif
) == r
->ifnot
)
1547 r
= r
->skip
[PF_SKIP_IFP
].ptr
;
1548 else if (r
->direction
&& r
->direction
!= dir
)
1549 r
= r
->skip
[PF_SKIP_DIR
].ptr
;
1550 else if (r
->af
&& r
->af
!= AF_INET
)
1551 r
= r
->skip
[PF_SKIP_AF
].ptr
;
1552 else if (r
->proto
&& r
->proto
!= h
->ip_p
)
1553 r
= r
->skip
[PF_SKIP_PROTO
].ptr
;
1554 else if (PF_MISMATCHAW(&r
->src
.addr
,
1555 (struct pf_addr
*)&h
->ip_src
.s_addr
, AF_INET
,
1557 r
= r
->skip
[PF_SKIP_SRC_ADDR
].ptr
;
1558 else if (PF_MISMATCHAW(&r
->dst
.addr
,
1559 (struct pf_addr
*)&h
->ip_dst
.s_addr
, AF_INET
,
1561 r
= r
->skip
[PF_SKIP_DST_ADDR
].ptr
;
1563 if (r
->anchor
== NULL
)
1566 pf_step_into_anchor(&asd
, &ruleset
,
1567 PF_RULESET_SCRUB
, &r
, NULL
, NULL
);
1569 if (r
== NULL
&& pf_step_out_of_anchor(&asd
, &ruleset
,
1570 PF_RULESET_SCRUB
, &r
, NULL
, NULL
))
1574 if (r
== NULL
|| r
->action
== PF_NOSCRUB
)
1577 r
->packets
[dir
== PF_OUT
]++;
1578 r
->bytes
[dir
== PF_OUT
] += pd
->tot_len
;
1581 /* Check for illegal packets */
1582 if (hlen
< (int)sizeof (struct ip
))
1585 if (hlen
> ntohs(h
->ip_len
))
1588 /* Clear IP_DF if the rule uses the no-df option */
1589 if (r
->rule_flag
& PFRULE_NODF
&& h
->ip_off
& htons(IP_DF
)) {
1590 u_int16_t ipoff
= h
->ip_off
;
1592 h
->ip_off
&= htons(~IP_DF
);
1593 h
->ip_sum
= pf_cksum_fixup(h
->ip_sum
, ipoff
, h
->ip_off
, 0);
1596 /* We will need other tests here */
1597 if (!fragoff
&& !mff
)
1601 * We're dealing with a fragment now. Don't allow fragments
1602 * with IP_DF to enter the cache. If the flag was cleared by
1603 * no-df above, fine. Otherwise drop it.
1605 if (h
->ip_off
& htons(IP_DF
)) {
1606 DPFPRINTF(("IP_DF\n"));
1610 ip_len
= ntohs(h
->ip_len
) - hlen
;
1611 ip_off
= (ntohs(h
->ip_off
) & IP_OFFMASK
) << 3;
1613 /* All fragments are 8 byte aligned */
1614 if (mff
&& (ip_len
& 0x7)) {
1615 DPFPRINTF(("mff and %d\n", ip_len
));
1619 /* Respect maximum length */
1620 if (fragoff
+ ip_len
> IP_MAXPACKET
) {
1621 DPFPRINTF(("max packet %d\n", fragoff
+ ip_len
));
1624 fr_max
= fragoff
+ ip_len
;
1626 if ((r
->rule_flag
& (PFRULE_FRAGCROP
|PFRULE_FRAGDROP
)) == 0) {
1627 /* Fully buffer all of the fragments */
1629 frag
= pf_find_fragment_by_ipv4_header(h
, &pf_frag_tree
);
1630 /* Check if we saw the last fragment already */
1631 if (frag
!= NULL
&& (frag
->fr_flags
& PFFRAG_SEENLAST
) &&
1632 fr_max
> frag
->fr_max
)
1635 /* Get an entry for the fragment queue */
1636 frent
= pool_get(&pf_frent_pl
, PR_NOWAIT
);
1637 if (frent
== NULL
) {
1638 REASON_SET(reason
, PFRES_MEMORY
);
1645 /* Might return a completely reassembled mbuf, or NULL */
1646 DPFPRINTF(("reass IPv4 frag %d @ %d-%d\n", ntohs(h
->ip_id
),
1648 *m0
= m
= pf_reassemble(m0
, &frag
, frent
, mff
);
1653 /* use mtag from concatenated mbuf chain */
1654 pd
->pf_mtag
= pf_find_mtag(m
);
1656 if (pd
->pf_mtag
== NULL
) {
1657 printf("%s: pf_find_mtag returned NULL(1)\n", __func__
);
1658 if ((pd
->pf_mtag
= pf_get_mtag(m
)) == NULL
) {
1665 if (frag
!= NULL
&& (frag
->fr_flags
& PFFRAG_DROP
))
1668 h
= mtod(m
, struct ip
*);
1670 /* non-buffering fragment cache (drops or masks overlaps) */
1673 if (dir
== PF_OUT
&& (pd
->pf_mtag
->pftag_flags
& PF_TAG_FRAGCACHE
)) {
1675 * Already passed the fragment cache in the
1676 * input direction. If we continued, it would
1677 * appear to be a dup and would be dropped.
1682 frag
= pf_find_fragment_by_ipv4_header(h
, &pf_cache_tree
);
1684 /* Check if we saw the last fragment already */
1685 if (frag
!= NULL
&& (frag
->fr_flags
& PFFRAG_SEENLAST
) &&
1686 fr_max
> frag
->fr_max
) {
1687 if (r
->rule_flag
& PFRULE_FRAGDROP
)
1688 frag
->fr_flags
|= PFFRAG_DROP
;
1692 *m0
= m
= pf_fragcache(m0
, h
, &frag
, mff
,
1693 (r
->rule_flag
& PFRULE_FRAGDROP
) ? 1 : 0, &nomem
);
1700 /* use mtag from copied and trimmed mbuf chain */
1701 pd
->pf_mtag
= pf_find_mtag(m
);
1703 if (pd
->pf_mtag
== NULL
) {
1704 printf("%s: pf_find_mtag returned NULL(2)\n", __func__
);
1705 if ((pd
->pf_mtag
= pf_get_mtag(m
)) == NULL
) {
1713 pd
->pf_mtag
->pftag_flags
|= PF_TAG_FRAGCACHE
;
1715 if (frag
!= NULL
&& (frag
->fr_flags
& PFFRAG_DROP
))
1721 /* At this point, only IP_DF is allowed in ip_off */
1722 if (h
->ip_off
& ~htons(IP_DF
)) {
1723 u_int16_t ipoff
= h
->ip_off
;
1725 h
->ip_off
&= htons(IP_DF
);
1726 h
->ip_sum
= pf_cksum_fixup(h
->ip_sum
, ipoff
, h
->ip_off
, 0);
1729 /* Enforce a minimum ttl, may cause endless packet loops */
1730 if (r
->min_ttl
&& h
->ip_ttl
< r
->min_ttl
) {
1731 u_int16_t ip_ttl
= h
->ip_ttl
;
1733 h
->ip_ttl
= r
->min_ttl
;
1734 h
->ip_sum
= pf_cksum_fixup(h
->ip_sum
, ip_ttl
, h
->ip_ttl
, 0);
1737 if (r
->rule_flag
& PFRULE_RANDOMID
) {
1738 u_int16_t ip_id
= h
->ip_id
;
1740 h
->ip_id
= ip_randomid();
1741 h
->ip_sum
= pf_cksum_fixup(h
->ip_sum
, ip_id
, h
->ip_id
, 0);
1743 #endif /* RANDOM_IP_ID */
1744 if ((r
->rule_flag
& (PFRULE_FRAGCROP
|PFRULE_FRAGDROP
)) == 0)
1745 pd
->flags
|= PFDESC_IP_REAS
;
1750 /* Enforce a minimum ttl, may cause endless packet loops */
1751 if (r
->min_ttl
&& h
->ip_ttl
< r
->min_ttl
) {
1752 u_int16_t ip_ttl
= h
->ip_ttl
;
1754 h
->ip_ttl
= r
->min_ttl
;
1755 h
->ip_sum
= pf_cksum_fixup(h
->ip_sum
, ip_ttl
, h
->ip_ttl
, 0);
1757 if ((r
->rule_flag
& (PFRULE_FRAGCROP
|PFRULE_FRAGDROP
)) == 0)
1758 pd
->flags
|= PFDESC_IP_REAS
;
1762 REASON_SET(reason
, PFRES_MEMORY
);
1763 if (r
!= NULL
&& r
->log
)
1764 PFLOG_PACKET(kif
, h
, m
, AF_INET
, dir
, *reason
, r
,
1769 REASON_SET(reason
, PFRES_NORM
);
1770 if (r
!= NULL
&& r
->log
)
1771 PFLOG_PACKET(kif
, h
, m
, AF_INET
, dir
, *reason
, r
,
1776 DPFPRINTF(("dropping bad IPv4 fragment\n"));
1778 /* Free associated fragments */
1780 pf_free_fragment(frag
);
1782 REASON_SET(reason
, PFRES_FRAG
);
1783 if (r
!= NULL
&& r
->log
)
1784 PFLOG_PACKET(kif
, h
, m
, AF_INET
, dir
, *reason
, r
, NULL
, NULL
, pd
);
1791 pf_normalize_ip6(struct mbuf
**m0
, int dir
, struct pfi_kif
*kif
,
1792 u_short
*reason
, struct pf_pdesc
*pd
)
1794 struct mbuf
*m
= *m0
;
1796 struct ip6_hdr
*h
= mtod(m
, struct ip6_hdr
*);
1802 struct ip6_opt_jumbo jumbo
;
1806 struct ip6_frag frag
;
1807 u_int32_t jumbolen
= 0, plen
;
1808 u_int16_t fragoff
= 0;
1811 struct pf_frent
*frent
;
1812 struct pf_fragment
*pff
= NULL
;
1813 int mff
= 0, rh_cnt
= 0;
1816 struct pf_ruleset
*ruleset
= NULL
;
1818 r
= TAILQ_FIRST(pf_main_ruleset
.rules
[PF_RULESET_SCRUB
].active
.ptr
);
1821 if (pfi_kif_match(r
->kif
, kif
) == r
->ifnot
)
1822 r
= r
->skip
[PF_SKIP_IFP
].ptr
;
1823 else if (r
->direction
&& r
->direction
!= dir
)
1824 r
= r
->skip
[PF_SKIP_DIR
].ptr
;
1825 else if (r
->af
&& r
->af
!= AF_INET6
)
1826 r
= r
->skip
[PF_SKIP_AF
].ptr
;
1827 #if 0 /* header chain! */
1828 else if (r
->proto
&& r
->proto
!= h
->ip6_nxt
)
1829 r
= r
->skip
[PF_SKIP_PROTO
].ptr
;
1831 else if (PF_MISMATCHAW(&r
->src
.addr
,
1832 (struct pf_addr
*)&h
->ip6_src
, AF_INET6
,
1834 r
= r
->skip
[PF_SKIP_SRC_ADDR
].ptr
;
1835 else if (PF_MISMATCHAW(&r
->dst
.addr
,
1836 (struct pf_addr
*)&h
->ip6_dst
, AF_INET6
,
1838 r
= r
->skip
[PF_SKIP_DST_ADDR
].ptr
;
1840 if (r
->anchor
== NULL
)
1843 pf_step_into_anchor(&asd
, &ruleset
,
1844 PF_RULESET_SCRUB
, &r
, NULL
, NULL
);
1846 if (r
== NULL
&& pf_step_out_of_anchor(&asd
, &ruleset
,
1847 PF_RULESET_SCRUB
, &r
, NULL
, NULL
))
1851 if (r
== NULL
|| r
->action
== PF_NOSCRUB
)
1854 r
->packets
[dir
== PF_OUT
]++;
1855 r
->bytes
[dir
== PF_OUT
] += pd
->tot_len
;
1858 /* Check for illegal packets */
1859 if ((int)(sizeof (struct ip6_hdr
) + IPV6_MAXPACKET
) < m
->m_pkthdr
.len
)
1862 off
= sizeof (struct ip6_hdr
);
1868 case IPPROTO_FRAGMENT
:
1872 case IPPROTO_ROUTING
:
1873 case IPPROTO_DSTOPTS
:
1874 if (!pf_pull_hdr(m
, off
, &ext
, sizeof (ext
), NULL
,
1879 * Multiple routing headers not allowed.
1880 * Routing header type zero considered harmful.
1882 if (proto
== IPPROTO_ROUTING
) {
1883 const struct ip6_rthdr
*rh
=
1884 (const struct ip6_rthdr
*)&ext
;
1887 if (rh
->ip6r_type
== IPV6_RTHDR_TYPE_0
)
1891 if (proto
== IPPROTO_AH
)
1892 off
+= (ext
.ip6e_len
+ 2) * 4;
1894 off
+= (ext
.ip6e_len
+ 1) * 8;
1895 proto
= ext
.ip6e_nxt
;
1897 case IPPROTO_HOPOPTS
:
1900 if (!pf_pull_hdr(m
, off
, &ext
, sizeof (ext
), NULL
,
1903 optend
= off
+ (ext
.ip6e_len
+ 1) * 8;
1904 ooff
= off
+ sizeof (ext
);
1906 if (!pf_pull_hdr(m
, ooff
, &opt
.ip6o_type
,
1907 sizeof (opt
.ip6o_type
), NULL
, NULL
,
1910 if (opt
.ip6o_type
== IP6OPT_PAD1
) {
1914 if (!pf_pull_hdr(m
, ooff
, &opt
, sizeof (opt
),
1915 NULL
, NULL
, AF_INET6
))
1917 if (ooff
+ sizeof (opt
) + opt
.ip6o_len
> optend
)
1919 switch (opt
.ip6o_type
) {
1921 if (h
->ip6_plen
!= 0)
1923 if (!pf_pull_hdr(m
, ooff
, &jumbo
,
1924 sizeof (jumbo
), NULL
, NULL
,
1927 memcpy(&jumbolen
, jumbo
.ip6oj_jumbo_len
,
1929 jumbolen
= ntohl(jumbolen
);
1930 if (jumbolen
<= IPV6_MAXPACKET
)
1932 if (sizeof (struct ip6_hdr
) +
1933 jumbolen
!= m
->m_pkthdr
.len
)
1939 ooff
+= sizeof (opt
) + opt
.ip6o_len
;
1940 } while (ooff
< optend
);
1943 proto
= ext
.ip6e_nxt
;
1950 } while (!terminal
);
1952 /* jumbo payload option must be present, or plen > 0 */
1953 if (ntohs(h
->ip6_plen
) == 0)
1956 plen
= ntohs(h
->ip6_plen
);
1959 if ((int)(sizeof (struct ip6_hdr
) + plen
) > m
->m_pkthdr
.len
)
1962 /* Enforce a minimum ttl, may cause endless packet loops */
1963 if (r
->min_ttl
&& h
->ip6_hlim
< r
->min_ttl
)
1964 h
->ip6_hlim
= r
->min_ttl
;
1969 if (ntohs(h
->ip6_plen
) == 0 || jumbolen
)
1971 plen
= ntohs(h
->ip6_plen
);
1973 if (!pf_pull_hdr(m
, off
, &frag
, sizeof (frag
), NULL
, NULL
, AF_INET6
))
1975 fragoff
= ntohs(frag
.ip6f_offlg
& IP6F_OFF_MASK
);
1976 pd
->proto
= frag
.ip6f_nxt
;
1977 mff
= ntohs(frag
.ip6f_offlg
& IP6F_MORE_FRAG
);
1979 if (fragoff
+ (plen
- off
) > IPV6_MAXPACKET
)
1982 fr_max
= fragoff
+ plen
- (off
- sizeof(struct ip6_hdr
));
1983 DPFPRINTF(("%p IPv6 frag plen %u mff %d off %u fragoff %u fr_max %u\n", m
,
1984 plen
, mff
, off
, fragoff
, fr_max
));
1986 if ((r
->rule_flag
& (PFRULE_FRAGCROP
|PFRULE_FRAGDROP
)) == 0) {
1987 /* Fully buffer all of the fragments */
1988 pd
->flags
|= PFDESC_IP_REAS
;
1990 pff
= pf_find_fragment_by_ipv6_header(h
, &frag
,
1993 /* Check if we saw the last fragment already */
1994 if (pff
!= NULL
&& (pff
->fr_flags
& PFFRAG_SEENLAST
) &&
1995 fr_max
> pff
->fr_max
)
1998 /* Get an entry for the fragment queue */
1999 frent
= pool_get(&pf_frent_pl
, PR_NOWAIT
);
2000 if (frent
== NULL
) {
2001 REASON_SET(reason
, PFRES_MEMORY
);
2007 frent
->fr_ip6f_opt
= frag
;
2008 frent
->fr_ip6f_hlen
= off
;
2010 /* Might return a completely reassembled mbuf, or NULL */
2011 DPFPRINTF(("reass IPv6 frag %d @ %d-%d\n",
2012 ntohl(frag
.ip6f_ident
), fragoff
, fr_max
));
2013 *m0
= m
= pf_reassemble6(m0
, &pff
, frent
, mff
);
2018 if (pff
!= NULL
&& (pff
->fr_flags
& PFFRAG_DROP
))
2021 h
= mtod(m
, struct ip6_hdr
*);
2023 else if (dir
== PF_IN
|| !(pd
->pf_mtag
->pftag_flags
& PF_TAG_FRAGCACHE
)) {
2024 /* non-buffering fragment cache (overlaps: see RFC 5722) */
2027 pff
= pf_find_fragment_by_ipv6_header(h
, &frag
,
2030 /* Check if we saw the last fragment already */
2031 if (pff
!= NULL
&& (pff
->fr_flags
& PFFRAG_SEENLAST
) &&
2032 fr_max
> pff
->fr_max
) {
2033 if (r
->rule_flag
& PFRULE_FRAGDROP
)
2034 pff
->fr_flags
|= PFFRAG_DROP
;
2038 *m0
= m
= pf_frag6cache(m0
, h
, &frag
, &pff
, off
, mff
,
2039 (r
->rule_flag
& PFRULE_FRAGDROP
) ? 1 : 0, &nomem
);
2047 pd
->pf_mtag
->pftag_flags
|= PF_TAG_FRAGCACHE
;
2049 if (pff
!= NULL
&& (pff
->fr_flags
& PFFRAG_DROP
))
2053 /* Enforce a minimum ttl, may cause endless packet loops */
2054 if (r
->min_ttl
&& h
->ip6_hlim
< r
->min_ttl
)
2055 h
->ip6_hlim
= r
->min_ttl
;
2059 REASON_SET(reason
, PFRES_MEMORY
);
2063 REASON_SET(reason
, PFRES_SHORT
);
2067 REASON_SET(reason
, PFRES_NORM
);
2071 DPFPRINTF(("dropping bad IPv6 fragment\n"));
2072 REASON_SET(reason
, PFRES_FRAG
);
2077 pf_free_fragment(pff
);
2078 if (r
!= NULL
&& r
->log
)
2079 PFLOG_PACKET(kif
, h
, m
, AF_INET6
, dir
, *reason
, r
, NULL
, NULL
, pd
);
2085 pf_normalize_tcp(int dir
, struct pfi_kif
*kif
, struct mbuf
*m
, int ipoff
,
2086 int off
, void *h
, struct pf_pdesc
*pd
)
2088 #pragma unused(ipoff, h)
2089 struct pf_rule
*r
, *rm
= NULL
;
2090 struct tcphdr
*th
= pd
->hdr
.tcp
;
2095 sa_family_t af
= pd
->af
;
2096 struct pf_ruleset
*ruleset
= NULL
;
2097 union pf_state_xport sxport
, dxport
;
2099 sxport
.port
= th
->th_sport
;
2100 dxport
.port
= th
->th_dport
;
2102 r
= TAILQ_FIRST(pf_main_ruleset
.rules
[PF_RULESET_SCRUB
].active
.ptr
);
2105 if (pfi_kif_match(r
->kif
, kif
) == r
->ifnot
)
2106 r
= r
->skip
[PF_SKIP_IFP
].ptr
;
2107 else if (r
->direction
&& r
->direction
!= dir
)
2108 r
= r
->skip
[PF_SKIP_DIR
].ptr
;
2109 else if (r
->af
&& r
->af
!= af
)
2110 r
= r
->skip
[PF_SKIP_AF
].ptr
;
2111 else if (r
->proto
&& r
->proto
!= pd
->proto
)
2112 r
= r
->skip
[PF_SKIP_PROTO
].ptr
;
2113 else if (PF_MISMATCHAW(&r
->src
.addr
, pd
->src
, af
,
2115 r
= r
->skip
[PF_SKIP_SRC_ADDR
].ptr
;
2116 else if (r
->src
.xport
.range
.op
&&
2117 !pf_match_xport(r
->src
.xport
.range
.op
, r
->proto_variant
,
2118 &r
->src
.xport
, &sxport
))
2119 r
= r
->skip
[PF_SKIP_SRC_PORT
].ptr
;
2120 else if (PF_MISMATCHAW(&r
->dst
.addr
, pd
->dst
, af
,
2122 r
= r
->skip
[PF_SKIP_DST_ADDR
].ptr
;
2123 else if (r
->dst
.xport
.range
.op
&&
2124 !pf_match_xport(r
->dst
.xport
.range
.op
, r
->proto_variant
,
2125 &r
->dst
.xport
, &dxport
))
2126 r
= r
->skip
[PF_SKIP_DST_PORT
].ptr
;
2127 else if (r
->os_fingerprint
!= PF_OSFP_ANY
&&
2128 !pf_osfp_match(pf_osfp_fingerprint(pd
, m
, off
, th
),
2130 r
= TAILQ_NEXT(r
, entries
);
2132 if (r
->anchor
== NULL
) {
2136 pf_step_into_anchor(&asd
, &ruleset
,
2137 PF_RULESET_SCRUB
, &r
, NULL
, NULL
);
2140 if (r
== NULL
&& pf_step_out_of_anchor(&asd
, &ruleset
,
2141 PF_RULESET_SCRUB
, &r
, NULL
, NULL
))
2145 if (rm
== NULL
|| rm
->action
== PF_NOSCRUB
)
2148 r
->packets
[dir
== PF_OUT
]++;
2149 r
->bytes
[dir
== PF_OUT
] += pd
->tot_len
;
2152 if (rm
->rule_flag
& PFRULE_REASSEMBLE_TCP
)
2153 pd
->flags
|= PFDESC_TCP_NORM
;
2155 flags
= th
->th_flags
;
2156 if (flags
& TH_SYN
) {
2157 /* Illegal packet */
2164 /* Illegal packet */
2165 if (!(flags
& (TH_ACK
|TH_RST
)))
2169 if (!(flags
& TH_ACK
)) {
2170 /* These flags are only valid if ACK is set */
2171 if ((flags
& TH_FIN
) || (flags
& TH_PUSH
) || (flags
& TH_URG
))
2175 /* Check for illegal header length */
2176 if (th
->th_off
< (sizeof (struct tcphdr
) >> 2))
2179 /* If flags changed, or reserved data set, then adjust */
2180 if (flags
!= th
->th_flags
|| th
->th_x2
!= 0) {
2183 ov
= *(u_int16_t
*)(&th
->th_ack
+ 1);
2184 th
->th_flags
= flags
;
2186 nv
= *(u_int16_t
*)(&th
->th_ack
+ 1);
2188 th
->th_sum
= pf_cksum_fixup(th
->th_sum
, ov
, nv
, 0);
2192 /* Remove urgent pointer, if TH_URG is not set */
2193 if (!(flags
& TH_URG
) && th
->th_urp
) {
2194 th
->th_sum
= pf_cksum_fixup(th
->th_sum
, th
->th_urp
, 0, 0);
2199 /* copy back packet headers if we sanitized */
2200 /* Process options */
2202 int rv
= pf_normalize_tcpopt(r
, dir
, kif
, pd
, m
, th
, off
,
2210 struct mbuf
*mw
= pf_lazy_makewritable(pd
, m
,
2211 off
+ sizeof (*th
));
2213 REASON_SET(&reason
, PFRES_MEMORY
);
2215 PFLOG_PACKET(kif
, h
, m
, AF_INET
, dir
, reason
,
2220 m_copyback(mw
, off
, sizeof (*th
), th
);
2226 REASON_SET(&reason
, PFRES_NORM
);
2227 if (rm
!= NULL
&& r
->log
)
2228 PFLOG_PACKET(kif
, h
, m
, AF_INET
, dir
, reason
, r
, NULL
, NULL
, pd
);
2233 pf_normalize_tcp_init(struct mbuf
*m
, int off
, struct pf_pdesc
*pd
,
2234 struct tcphdr
*th
, struct pf_state_peer
*src
, struct pf_state_peer
*dst
)
2237 u_int32_t tsval
, tsecr
;
2241 VERIFY(src
->scrub
== NULL
);
2243 src
->scrub
= pool_get(&pf_state_scrub_pl
, PR_NOWAIT
);
2244 if (src
->scrub
== NULL
)
2246 bzero(src
->scrub
, sizeof (*src
->scrub
));
2251 struct ip
*h
= mtod(m
, struct ip
*);
2252 src
->scrub
->pfss_ttl
= h
->ip_ttl
;
2258 struct ip6_hdr
*h
= mtod(m
, struct ip6_hdr
*);
2259 src
->scrub
->pfss_ttl
= h
->ip6_hlim
;
2267 * All normalizations below are only begun if we see the start of
2268 * the connections. They must all set an enabled bit in pfss_flags
2270 if ((th
->th_flags
& TH_SYN
) == 0)
2274 if (th
->th_off
> (sizeof (struct tcphdr
) >> 2) && src
->scrub
&&
2275 pf_pull_hdr(m
, off
, hdr
, th
->th_off
<< 2, NULL
, NULL
, pd
->af
)) {
2276 /* Diddle with TCP options */
2278 opt
= hdr
+ sizeof (struct tcphdr
);
2279 hlen
= (th
->th_off
<< 2) - sizeof (struct tcphdr
);
2280 while (hlen
>= TCPOLEN_TIMESTAMP
) {
2282 case TCPOPT_EOL
: /* FALLTHROUGH */
2287 case TCPOPT_TIMESTAMP
:
2288 if (opt
[1] >= TCPOLEN_TIMESTAMP
) {
2289 src
->scrub
->pfss_flags
|=
2291 src
->scrub
->pfss_ts_mod
=
2294 /* note PFSS_PAWS not set yet */
2295 memcpy(&tsval
, &opt
[2],
2296 sizeof (u_int32_t
));
2297 memcpy(&tsecr
, &opt
[6],
2298 sizeof (u_int32_t
));
2299 src
->scrub
->pfss_tsval0
= ntohl(tsval
);
2300 src
->scrub
->pfss_tsval
= ntohl(tsval
);
2301 src
->scrub
->pfss_tsecr
= ntohl(tsecr
);
2302 getmicrouptime(&src
->scrub
->pfss_last
);
2306 hlen
-= MAX(opt
[1], 2);
2307 opt
+= MAX(opt
[1], 2);
2317 pf_normalize_tcp_cleanup(struct pf_state
*state
)
2319 if (state
->src
.scrub
)
2320 pool_put(&pf_state_scrub_pl
, state
->src
.scrub
);
2321 if (state
->dst
.scrub
)
2322 pool_put(&pf_state_scrub_pl
, state
->dst
.scrub
);
2324 /* Someday... flush the TCP segment reassembly descriptors. */
2328 pf_normalize_tcp_stateful(struct mbuf
*m
, int off
, struct pf_pdesc
*pd
,
2329 u_short
*reason
, struct tcphdr
*th
, struct pf_state
*state
,
2330 struct pf_state_peer
*src
, struct pf_state_peer
*dst
, int *writeback
)
2332 struct timeval uptime
;
2333 u_int32_t tsval
, tsecr
;
2334 u_int tsval_from_last
;
2340 VERIFY(src
->scrub
|| dst
->scrub
);
2343 * Enforce the minimum TTL seen for this connection. Negate a common
2344 * technique to evade an intrusion detection system and confuse
2345 * firewall state code.
2351 struct ip
*h
= mtod(m
, struct ip
*);
2352 if (h
->ip_ttl
> src
->scrub
->pfss_ttl
)
2353 src
->scrub
->pfss_ttl
= h
->ip_ttl
;
2354 h
->ip_ttl
= src
->scrub
->pfss_ttl
;
2362 struct ip6_hdr
*h
= mtod(m
, struct ip6_hdr
*);
2363 if (h
->ip6_hlim
> src
->scrub
->pfss_ttl
)
2364 src
->scrub
->pfss_ttl
= h
->ip6_hlim
;
2365 h
->ip6_hlim
= src
->scrub
->pfss_ttl
;
2372 if (th
->th_off
> (sizeof (struct tcphdr
) >> 2) &&
2373 ((src
->scrub
&& (src
->scrub
->pfss_flags
& PFSS_TIMESTAMP
)) ||
2374 (dst
->scrub
&& (dst
->scrub
->pfss_flags
& PFSS_TIMESTAMP
))) &&
2375 pf_pull_hdr(m
, off
, hdr
, th
->th_off
<< 2, NULL
, NULL
, pd
->af
)) {
2376 /* Diddle with TCP options */
2378 opt
= hdr
+ sizeof (struct tcphdr
);
2379 hlen
= (th
->th_off
<< 2) - sizeof (struct tcphdr
);
2380 while (hlen
>= TCPOLEN_TIMESTAMP
) {
2382 case TCPOPT_EOL
: /* FALLTHROUGH */
2387 case TCPOPT_TIMESTAMP
:
2389 * Modulate the timestamps. Can be used for
2390 * NAT detection, OS uptime determination or
2395 /* Huh? Multiple timestamps!? */
2396 if (pf_status
.debug
>= PF_DEBUG_MISC
) {
2397 DPFPRINTF(("multiple TS??"));
2398 pf_print_state(state
);
2401 REASON_SET(reason
, PFRES_TS
);
2404 if (opt
[1] >= TCPOLEN_TIMESTAMP
) {
2405 memcpy(&tsval
, &opt
[2],
2406 sizeof (u_int32_t
));
2407 if (tsval
&& src
->scrub
&&
2408 (src
->scrub
->pfss_flags
&
2410 tsval
= ntohl(tsval
);
2411 pf_change_a(&opt
[2],
2414 src
->scrub
->pfss_ts_mod
),
2419 /* Modulate TS reply iff valid (!0) */
2420 memcpy(&tsecr
, &opt
[6],
2421 sizeof (u_int32_t
));
2422 if (tsecr
&& dst
->scrub
&&
2423 (dst
->scrub
->pfss_flags
&
2425 tsecr
= ntohl(tsecr
)
2426 - dst
->scrub
->pfss_ts_mod
;
2427 pf_change_a(&opt
[6],
2428 &th
->th_sum
, htonl(tsecr
),
2436 hlen
-= MAX(opt
[1], 2);
2437 opt
+= MAX(opt
[1], 2);
2442 /* Copyback the options, caller copys back header */
2443 int optoff
= off
+ sizeof (*th
);
2444 int optlen
= (th
->th_off
<< 2) - sizeof (*th
);
2445 m
= pf_lazy_makewritable(pd
, m
, optoff
+ optlen
);
2447 REASON_SET(reason
, PFRES_MEMORY
);
2450 *writeback
= optoff
+ optlen
;
2451 m_copyback(m
, optoff
, optlen
, hdr
+ sizeof (*th
));
2457 * Must invalidate PAWS checks on connections idle for too long.
2458 * The fastest allowed timestamp clock is 1ms. That turns out to
2459 * be about 24 days before it wraps. XXX Right now our lowerbound
2460 * TS echo check only works for the first 12 days of a connection
2461 * when the TS has exhausted half its 32bit space
2463 #define TS_MAX_IDLE (24*24*60*60)
2464 #define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */
2466 getmicrouptime(&uptime
);
2467 if (src
->scrub
&& (src
->scrub
->pfss_flags
& PFSS_PAWS
) &&
2468 (uptime
.tv_sec
- src
->scrub
->pfss_last
.tv_sec
> TS_MAX_IDLE
||
2469 pf_time_second() - state
->creation
> TS_MAX_CONN
)) {
2470 if (pf_status
.debug
>= PF_DEBUG_MISC
) {
2471 DPFPRINTF(("src idled out of PAWS\n"));
2472 pf_print_state(state
);
2475 src
->scrub
->pfss_flags
= (src
->scrub
->pfss_flags
& ~PFSS_PAWS
)
2478 if (dst
->scrub
&& (dst
->scrub
->pfss_flags
& PFSS_PAWS
) &&
2479 uptime
.tv_sec
- dst
->scrub
->pfss_last
.tv_sec
> TS_MAX_IDLE
) {
2480 if (pf_status
.debug
>= PF_DEBUG_MISC
) {
2481 DPFPRINTF(("dst idled out of PAWS\n"));
2482 pf_print_state(state
);
2485 dst
->scrub
->pfss_flags
= (dst
->scrub
->pfss_flags
& ~PFSS_PAWS
)
2489 if (got_ts
&& src
->scrub
&& dst
->scrub
&&
2490 (src
->scrub
->pfss_flags
& PFSS_PAWS
) &&
2491 (dst
->scrub
->pfss_flags
& PFSS_PAWS
)) {
2493 * Validate that the timestamps are "in-window".
2494 * RFC1323 describes TCP Timestamp options that allow
2495 * measurement of RTT (round trip time) and PAWS
2496 * (protection against wrapped sequence numbers). PAWS
2497 * gives us a set of rules for rejecting packets on
2498 * long fat pipes (packets that were somehow delayed
2499 * in transit longer than the time it took to send the
2500 * full TCP sequence space of 4Gb). We can use these
2501 * rules and infer a few others that will let us treat
2502 * the 32bit timestamp and the 32bit echoed timestamp
2503 * as sequence numbers to prevent a blind attacker from
2504 * inserting packets into a connection.
2507 * - The timestamp on this packet must be greater than
2508 * or equal to the last value echoed by the other
2509 * endpoint. The RFC says those will be discarded
2510 * since it is a dup that has already been acked.
2511 * This gives us a lowerbound on the timestamp.
2512 * timestamp >= other last echoed timestamp
2513 * - The timestamp will be less than or equal to
2514 * the last timestamp plus the time between the
2515 * last packet and now. The RFC defines the max
2516 * clock rate as 1ms. We will allow clocks to be
2517 * up to 10% fast and will allow a total difference
2518 * or 30 seconds due to a route change. And this
2519 * gives us an upperbound on the timestamp.
2520 * timestamp <= last timestamp + max ticks
2521 * We have to be careful here. Windows will send an
2522 * initial timestamp of zero and then initialize it
2523 * to a random value after the 3whs; presumably to
2524 * avoid a DoS by having to call an expensive RNG
2525 * during a SYN flood. Proof MS has at least one
2526 * good security geek.
2528 * - The TCP timestamp option must also echo the other
2529 * endpoints timestamp. The timestamp echoed is the
2530 * one carried on the earliest unacknowledged segment
2531 * on the left edge of the sequence window. The RFC
2532 * states that the host will reject any echoed
2533 * timestamps that were larger than any ever sent.
2534 * This gives us an upperbound on the TS echo.
2535 * tescr <= largest_tsval
2536 * - The lowerbound on the TS echo is a little more
2537 * tricky to determine. The other endpoint's echoed
2538 * values will not decrease. But there may be
2539 * network conditions that re-order packets and
2540 * cause our view of them to decrease. For now the
2541 * only lowerbound we can safely determine is that
2542 * the TS echo will never be less than the original
2543 * TS. XXX There is probably a better lowerbound.
2544 * Remove TS_MAX_CONN with better lowerbound check.
2545 * tescr >= other original TS
2547 * It is also important to note that the fastest
2548 * timestamp clock of 1ms will wrap its 32bit space in
2549 * 24 days. So we just disable TS checking after 24
2550 * days of idle time. We actually must use a 12d
2551 * connection limit until we can come up with a better
2552 * lowerbound to the TS echo check.
2554 struct timeval delta_ts
;
2559 * PFTM_TS_DIFF is how many seconds of leeway to allow
2560 * a host's timestamp. This can happen if the previous
2561 * packet got delayed in transit for much longer than
2564 if ((ts_fudge
= state
->rule
.ptr
->timeout
[PFTM_TS_DIFF
]) == 0)
2565 ts_fudge
= pf_default_rule
.timeout
[PFTM_TS_DIFF
];
2568 /* Calculate max ticks since the last timestamp */
2569 #define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */
2570 #define TS_MICROSECS 1000000 /* microseconds per second */
2571 timersub(&uptime
, &src
->scrub
->pfss_last
, &delta_ts
);
2572 tsval_from_last
= (delta_ts
.tv_sec
+ ts_fudge
) * TS_MAXFREQ
;
2573 tsval_from_last
+= delta_ts
.tv_usec
/ (TS_MICROSECS
/TS_MAXFREQ
);
2576 if ((src
->state
>= TCPS_ESTABLISHED
&&
2577 dst
->state
>= TCPS_ESTABLISHED
) &&
2578 (SEQ_LT(tsval
, dst
->scrub
->pfss_tsecr
) ||
2579 SEQ_GT(tsval
, src
->scrub
->pfss_tsval
+ tsval_from_last
) ||
2580 (tsecr
&& (SEQ_GT(tsecr
, dst
->scrub
->pfss_tsval
) ||
2581 SEQ_LT(tsecr
, dst
->scrub
->pfss_tsval0
))))) {
2583 * Bad RFC1323 implementation or an insertion attack.
2585 * - Solaris 2.6 and 2.7 are known to send another ACK
2586 * after the FIN,FIN|ACK,ACK closing that carries
2590 DPFPRINTF(("Timestamp failed %c%c%c%c\n",
2591 SEQ_LT(tsval
, dst
->scrub
->pfss_tsecr
) ? '0' : ' ',
2592 SEQ_GT(tsval
, src
->scrub
->pfss_tsval
+
2593 tsval_from_last
) ? '1' : ' ',
2594 SEQ_GT(tsecr
, dst
->scrub
->pfss_tsval
) ? '2' : ' ',
2595 SEQ_LT(tsecr
, dst
->scrub
->pfss_tsval0
)? '3' : ' '));
2596 DPFPRINTF((" tsval: %u tsecr: %u +ticks: %u "
2597 "idle: %lus %ums\n",
2598 tsval
, tsecr
, tsval_from_last
, delta_ts
.tv_sec
,
2599 delta_ts
.tv_usec
/ 1000));
2600 DPFPRINTF((" src->tsval: %u tsecr: %u\n",
2601 src
->scrub
->pfss_tsval
, src
->scrub
->pfss_tsecr
));
2602 DPFPRINTF((" dst->tsval: %u tsecr: %u tsval0: %u\n",
2603 dst
->scrub
->pfss_tsval
, dst
->scrub
->pfss_tsecr
,
2604 dst
->scrub
->pfss_tsval0
));
2605 if (pf_status
.debug
>= PF_DEBUG_MISC
) {
2606 pf_print_state(state
);
2607 pf_print_flags(th
->th_flags
);
2610 REASON_SET(reason
, PFRES_TS
);
2614 /* XXX I'd really like to require tsecr but it's optional */
2616 } else if (!got_ts
&& (th
->th_flags
& TH_RST
) == 0 &&
2617 ((src
->state
== TCPS_ESTABLISHED
&& dst
->state
== TCPS_ESTABLISHED
)
2618 || pd
->p_len
> 0 || (th
->th_flags
& TH_SYN
)) &&
2619 src
->scrub
&& dst
->scrub
&&
2620 (src
->scrub
->pfss_flags
& PFSS_PAWS
) &&
2621 (dst
->scrub
->pfss_flags
& PFSS_PAWS
)) {
2623 * Didn't send a timestamp. Timestamps aren't really useful
2625 * - connection opening or closing (often not even sent).
2626 * but we must not let an attacker to put a FIN on a
2627 * data packet to sneak it through our ESTABLISHED check.
2628 * - on a TCP reset. RFC suggests not even looking at TS.
2629 * - on an empty ACK. The TS will not be echoed so it will
2630 * probably not help keep the RTT calculation in sync and
2631 * there isn't as much danger when the sequence numbers
2632 * got wrapped. So some stacks don't include TS on empty
2635 * To minimize the disruption to mostly RFC1323 conformant
2636 * stacks, we will only require timestamps on data packets.
2638 * And what do ya know, we cannot require timestamps on data
2639 * packets. There appear to be devices that do legitimate
2640 * TCP connection hijacking. There are HTTP devices that allow
2641 * a 3whs (with timestamps) and then buffer the HTTP request.
2642 * If the intermediate device has the HTTP response cache, it
2643 * will spoof the response but not bother timestamping its
2644 * packets. So we can look for the presence of a timestamp in
2645 * the first data packet and if there, require it in all future
2649 if (pd
->p_len
> 0 && (src
->scrub
->pfss_flags
& PFSS_DATA_TS
)) {
2651 * Hey! Someone tried to sneak a packet in. Or the
2652 * stack changed its RFC1323 behavior?!?!
2654 if (pf_status
.debug
>= PF_DEBUG_MISC
) {
2655 DPFPRINTF(("Did not receive expected RFC1323 "
2657 pf_print_state(state
);
2658 pf_print_flags(th
->th_flags
);
2661 REASON_SET(reason
, PFRES_TS
);
2668 * We will note if a host sends his data packets with or without
2669 * timestamps. And require all data packets to contain a timestamp
2670 * if the first does. PAWS implicitly requires that all data packets be
2671 * timestamped. But I think there are middle-man devices that hijack
2672 * TCP streams immediately after the 3whs and don't timestamp their
2673 * packets (seen in a WWW accelerator or cache).
2675 if (pd
->p_len
> 0 && src
->scrub
&& (src
->scrub
->pfss_flags
&
2676 (PFSS_TIMESTAMP
|PFSS_DATA_TS
|PFSS_DATA_NOTS
)) == PFSS_TIMESTAMP
) {
2678 src
->scrub
->pfss_flags
|= PFSS_DATA_TS
;
2680 src
->scrub
->pfss_flags
|= PFSS_DATA_NOTS
;
2681 if (pf_status
.debug
>= PF_DEBUG_MISC
&& dst
->scrub
&&
2682 (dst
->scrub
->pfss_flags
& PFSS_TIMESTAMP
)) {
2683 /* Don't warn if other host rejected RFC1323 */
2684 DPFPRINTF(("Broken RFC1323 stack did not "
2685 "timestamp data packet. Disabled PAWS "
2687 pf_print_state(state
);
2688 pf_print_flags(th
->th_flags
);
2696 * Update PAWS values
2698 if (got_ts
&& src
->scrub
&& PFSS_TIMESTAMP
== (src
->scrub
->pfss_flags
&
2699 (PFSS_PAWS_IDLED
|PFSS_TIMESTAMP
))) {
2700 getmicrouptime(&src
->scrub
->pfss_last
);
2701 if (SEQ_GEQ(tsval
, src
->scrub
->pfss_tsval
) ||
2702 (src
->scrub
->pfss_flags
& PFSS_PAWS
) == 0)
2703 src
->scrub
->pfss_tsval
= tsval
;
2706 if (SEQ_GEQ(tsecr
, src
->scrub
->pfss_tsecr
) ||
2707 (src
->scrub
->pfss_flags
& PFSS_PAWS
) == 0)
2708 src
->scrub
->pfss_tsecr
= tsecr
;
2710 if ((src
->scrub
->pfss_flags
& PFSS_PAWS
) == 0 &&
2711 (SEQ_LT(tsval
, src
->scrub
->pfss_tsval0
) ||
2712 src
->scrub
->pfss_tsval0
== 0)) {
2713 /* tsval0 MUST be the lowest timestamp */
2714 src
->scrub
->pfss_tsval0
= tsval
;
2717 /* Only fully initialized after a TS gets echoed */
2718 if ((src
->scrub
->pfss_flags
& PFSS_PAWS
) == 0)
2719 src
->scrub
->pfss_flags
|= PFSS_PAWS
;
2723 /* I have a dream.... TCP segment reassembly.... */
2728 pf_normalize_tcpopt(struct pf_rule
*r
, int dir
, struct pfi_kif
*kif
,
2729 struct pf_pdesc
*pd
, struct mbuf
*m
, struct tcphdr
*th
, int off
,
2732 #pragma unused(dir, kif)
2733 sa_family_t af
= pd
->af
;
2736 int opt
, cnt
, optlen
= 0;
2738 u_char opts
[MAX_TCPOPTLEN
];
2739 u_char
*optp
= opts
;
2741 thoff
= th
->th_off
<< 2;
2742 cnt
= thoff
- sizeof (struct tcphdr
);
2744 if (cnt
> 0 && !pf_pull_hdr(m
, off
+ sizeof (*th
), opts
, cnt
,
2748 for (; cnt
> 0; cnt
-= optlen
, optp
+= optlen
) {
2750 if (opt
== TCPOPT_EOL
)
2752 if (opt
== TCPOPT_NOP
)
2758 if (optlen
< 2 || optlen
> cnt
)
2763 mss
= (u_int16_t
*)(void *)(optp
+ 2);
2764 if ((ntohs(*mss
)) > r
->max_mss
) {
2767 * Only do the TCP checksum fixup if delayed
2768 * checksum calculation will not be performed.
2770 if (m
->m_pkthdr
.rcvif
||
2771 !(m
->m_pkthdr
.csum_flags
& CSUM_TCP
))
2772 th
->th_sum
= pf_cksum_fixup(th
->th_sum
,
2773 *mss
, htons(r
->max_mss
), 0);
2774 *mss
= htons(r
->max_mss
);
2787 mw
= pf_lazy_makewritable(pd
, pd
->mp
,
2788 off
+ sizeof (*th
) + thoff
);
2790 REASON_SET(&reason
, PFRES_MEMORY
);
2792 PFLOG_PACKET(kif
, h
, m
, AF_INET
, dir
, reason
,
2798 m_copyback(mw
, off
+ sizeof (*th
), thoff
- sizeof (*th
), opts
);