]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/pf_norm.c
3df62ef3c229c0a3b406a8bd65ea915cd3e595e5
[apple/xnu.git] / bsd / net / pf_norm.c
1 /*
2 * Copyright (c) 2007-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /* $apfw: pf_norm.c,v 1.10 2008/08/28 19:10:53 jhw Exp $ */
30 /* $OpenBSD: pf_norm.c,v 1.107 2006/04/16 00:59:52 pascoe Exp $ */
31
32 /*
33 * Copyright 2001 Niels Provos <provos@citi.umich.edu>
34 * All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
46 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
47 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
48 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
49 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
50 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
51 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
52 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
53 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
54 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
55 */
56
57 #include <sys/param.h>
58 #include <sys/systm.h>
59 #include <sys/mbuf.h>
60 #include <sys/filio.h>
61 #include <sys/fcntl.h>
62 #include <sys/socket.h>
63 #include <sys/kernel.h>
64 #include <sys/time.h>
65 #include <sys/random.h>
66 #include <sys/mcache.h>
67
68 #include <net/if.h>
69 #include <net/if_types.h>
70 #include <net/bpf.h>
71 #include <net/route.h>
72 #include <net/if_pflog.h>
73
74 #include <netinet/in.h>
75 #include <netinet/in_var.h>
76 #include <netinet/in_systm.h>
77 #include <netinet/ip.h>
78 #include <netinet/ip_var.h>
79 #include <netinet/tcp.h>
80 #include <netinet/tcp_seq.h>
81 #include <netinet/tcp_fsm.h>
82 #include <netinet/udp.h>
83 #include <netinet/ip_icmp.h>
84
85 #if INET6
86 #include <netinet/ip6.h>
87 #endif /* INET6 */
88
89 #include <net/pfvar.h>
90
91 struct pf_frent {
92 LIST_ENTRY(pf_frent) fr_next;
93 struct mbuf *fr_m;
94 #define fr_ip fr_u.fru_ipv4
95 #define fr_ip6 fr_u.fru_ipv6
96 union {
97 struct ip *fru_ipv4;
98 struct ip6_hdr *fru_ipv6;
99 } fr_u;
100 struct ip6_frag fr_ip6f_opt;
101 int fr_ip6f_hlen;
102 };
103
104 struct pf_frcache {
105 LIST_ENTRY(pf_frcache) fr_next;
106 uint16_t fr_off;
107 uint16_t fr_end;
108 };
109
110 #define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */
111 #define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */
112 #define PFFRAG_DROP 0x0004 /* Drop all fragments */
113 #define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER))
114
115 struct pf_fragment {
116 RB_ENTRY(pf_fragment) fr_entry;
117 TAILQ_ENTRY(pf_fragment) frag_next;
118 struct pf_addr fr_srcx;
119 struct pf_addr fr_dstx;
120 u_int8_t fr_p; /* protocol of this fragment */
121 u_int8_t fr_flags; /* status flags */
122 u_int16_t fr_max; /* fragment data max */
123 #define fr_id fr_uid.fru_id4
124 #define fr_id6 fr_uid.fru_id6
125 union {
126 u_int16_t fru_id4;
127 u_int32_t fru_id6;
128 } fr_uid;
129 int fr_af;
130 u_int32_t fr_timeout;
131 #define fr_queue fr_u.fru_queue
132 #define fr_cache fr_u.fru_cache
133 union {
134 LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */
135 LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */
136 } fr_u;
137 };
138
139 static TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue;
140 static TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue;
141
142 static __inline int pf_frag_compare(struct pf_fragment *,
143 struct pf_fragment *);
144 static RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree;
145 RB_PROTOTYPE_SC(__private_extern__, pf_frag_tree, pf_fragment, fr_entry,
146 pf_frag_compare);
147 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
148
149 /* Private prototypes */
150 static void pf_ip6hdr2key(struct pf_fragment *, struct ip6_hdr *,
151 struct ip6_frag *);
152 static void pf_ip2key(struct pf_fragment *, struct ip *);
153 static void pf_remove_fragment(struct pf_fragment *);
154 static void pf_flush_fragments(void);
155 static void pf_free_fragment(struct pf_fragment *);
156 static struct pf_fragment *pf_find_fragment_by_key(struct pf_fragment *,
157 struct pf_frag_tree *);
158 static __inline struct pf_fragment *
159 pf_find_fragment_by_ipv4_header(struct ip *, struct pf_frag_tree *);
160 static __inline struct pf_fragment *
161 pf_find_fragment_by_ipv6_header(struct ip6_hdr *, struct ip6_frag *,
162 struct pf_frag_tree *);
163 static struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **,
164 struct pf_frent *, int);
165 static struct mbuf *pf_fragcache(struct mbuf **, struct ip *,
166 struct pf_fragment **, int, int, int *);
167 static struct mbuf *pf_reassemble6(struct mbuf **, struct pf_fragment **,
168 struct pf_frent *, int);
169 static struct mbuf *pf_frag6cache(struct mbuf **, struct ip6_hdr*,
170 struct ip6_frag *, struct pf_fragment **, int, int, int, int *);
171 static int pf_normalize_tcpopt(struct pf_rule *, int, struct pfi_kif *,
172 struct pf_pdesc *, struct mbuf *, struct tcphdr *, int, int *);
173
174 #define DPFPRINTF(x) do { \
175 if (pf_status.debug >= PF_DEBUG_MISC) { \
176 printf("%s: ", __func__); \
177 printf x ; \
178 } \
179 } while (0)
180
181 /* Globals */
182 struct pool pf_frent_pl, pf_frag_pl;
183 static struct pool pf_cache_pl, pf_cent_pl;
184 struct pool pf_state_scrub_pl;
185
186 static int pf_nfrents, pf_ncache;
187
188 void
189 pf_normalize_init(void)
190 {
191 pool_init(&pf_frent_pl, sizeof (struct pf_frent), 0, 0, 0, "pffrent",
192 NULL);
193 pool_init(&pf_frag_pl, sizeof (struct pf_fragment), 0, 0, 0, "pffrag",
194 NULL);
195 pool_init(&pf_cache_pl, sizeof (struct pf_fragment), 0, 0, 0,
196 "pffrcache", NULL);
197 pool_init(&pf_cent_pl, sizeof (struct pf_frcache), 0, 0, 0, "pffrcent",
198 NULL);
199 pool_init(&pf_state_scrub_pl, sizeof (struct pf_state_scrub), 0, 0, 0,
200 "pfstscr", NULL);
201
202 pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT);
203 pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0);
204 pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0);
205 pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0);
206
207 TAILQ_INIT(&pf_fragqueue);
208 TAILQ_INIT(&pf_cachequeue);
209 }
210
211 #if 0
212 void
213 pf_normalize_destroy(void)
214 {
215 pool_destroy(&pf_state_scrub_pl);
216 pool_destroy(&pf_cent_pl);
217 pool_destroy(&pf_cache_pl);
218 pool_destroy(&pf_frag_pl);
219 pool_destroy(&pf_frent_pl);
220 }
221 #endif
222
223 int
224 pf_normalize_isempty(void)
225 {
226 return (TAILQ_EMPTY(&pf_fragqueue) && TAILQ_EMPTY(&pf_cachequeue));
227 }
228
229 static __inline int
230 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
231 {
232 int diff;
233
234 if ((diff = a->fr_af - b->fr_af))
235 return (diff);
236 else if ((diff = a->fr_p - b->fr_p))
237 return (diff);
238 else {
239 struct pf_addr *sa = &a->fr_srcx;
240 struct pf_addr *sb = &b->fr_srcx;
241 struct pf_addr *da = &a->fr_dstx;
242 struct pf_addr *db = &b->fr_dstx;
243
244 switch (a->fr_af) {
245 #ifdef INET
246 case AF_INET:
247 if ((diff = a->fr_id - b->fr_id))
248 return (diff);
249 else if (sa->v4.s_addr < sb->v4.s_addr)
250 return (-1);
251 else if (sa->v4.s_addr > sb->v4.s_addr)
252 return (1);
253 else if (da->v4.s_addr < db->v4.s_addr)
254 return (-1);
255 else if (da->v4.s_addr > db->v4.s_addr)
256 return (1);
257 break;
258 #endif
259 #ifdef INET6
260 case AF_INET6:
261 if ((diff = a->fr_id6 - b->fr_id6))
262 return (diff);
263 else if (sa->addr32[3] < sb->addr32[3])
264 return (-1);
265 else if (sa->addr32[3] > sb->addr32[3])
266 return (1);
267 else if (sa->addr32[2] < sb->addr32[2])
268 return (-1);
269 else if (sa->addr32[2] > sb->addr32[2])
270 return (1);
271 else if (sa->addr32[1] < sb->addr32[1])
272 return (-1);
273 else if (sa->addr32[1] > sb->addr32[1])
274 return (1);
275 else if (sa->addr32[0] < sb->addr32[0])
276 return (-1);
277 else if (sa->addr32[0] > sb->addr32[0])
278 return (1);
279 else if (da->addr32[3] < db->addr32[3])
280 return (-1);
281 else if (da->addr32[3] > db->addr32[3])
282 return (1);
283 else if (da->addr32[2] < db->addr32[2])
284 return (-1);
285 else if (da->addr32[2] > db->addr32[2])
286 return (1);
287 else if (da->addr32[1] < db->addr32[1])
288 return (-1);
289 else if (da->addr32[1] > db->addr32[1])
290 return (1);
291 else if (da->addr32[0] < db->addr32[0])
292 return (-1);
293 else if (da->addr32[0] > db->addr32[0])
294 return (1);
295 break;
296 #endif
297 default:
298 VERIFY(!0 && "only IPv4 and IPv6 supported!");
299 break;
300 }
301 }
302 return (0);
303 }
304
305 void
306 pf_purge_expired_fragments(void)
307 {
308 struct pf_fragment *frag;
309 u_int32_t expire = pf_time_second() -
310 pf_default_rule.timeout[PFTM_FRAG];
311
312 while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
313 VERIFY(BUFFER_FRAGMENTS(frag));
314 if (frag->fr_timeout > expire)
315 break;
316
317 switch (frag->fr_af) {
318 case AF_INET:
319 DPFPRINTF(("expiring IPv4 %d(0x%llx) from queue.\n",
320 ntohs(frag->fr_id),
321 (uint64_t)VM_KERNEL_ADDRPERM(frag)));
322 break;
323 case AF_INET6:
324 DPFPRINTF(("expiring IPv6 %d(0x%llx) from queue.\n",
325 ntohl(frag->fr_id6),
326 (uint64_t)VM_KERNEL_ADDRPERM(frag)));
327 break;
328 default:
329 VERIFY(0 && "only IPv4 and IPv6 supported");
330 break;
331 }
332 pf_free_fragment(frag);
333 }
334
335 while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) {
336 VERIFY(!BUFFER_FRAGMENTS(frag));
337 if (frag->fr_timeout > expire)
338 break;
339
340 switch (frag->fr_af) {
341 case AF_INET:
342 DPFPRINTF(("expiring IPv4 %d(0x%llx) from cache.\n",
343 ntohs(frag->fr_id),
344 (uint64_t)VM_KERNEL_ADDRPERM(frag)));
345 break;
346 case AF_INET6:
347 DPFPRINTF(("expiring IPv6 %d(0x%llx) from cache.\n",
348 ntohl(frag->fr_id6),
349 (uint64_t)VM_KERNEL_ADDRPERM(frag)));
350 break;
351 default:
352 VERIFY(0 && "only IPv4 and IPv6 supported");
353 break;
354 }
355 pf_free_fragment(frag);
356 VERIFY(TAILQ_EMPTY(&pf_cachequeue) ||
357 TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag);
358 }
359 }
360
361 /*
362 * Try to flush old fragments to make space for new ones
363 */
364
365 static void
366 pf_flush_fragments(void)
367 {
368 struct pf_fragment *frag;
369 int goal;
370
371 goal = pf_nfrents * 9 / 10;
372 DPFPRINTF(("trying to free > %d frents\n",
373 pf_nfrents - goal));
374 while (goal < pf_nfrents) {
375 frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue);
376 if (frag == NULL)
377 break;
378 pf_free_fragment(frag);
379 }
380
381
382 goal = pf_ncache * 9 / 10;
383 DPFPRINTF(("trying to free > %d cache entries\n",
384 pf_ncache - goal));
385 while (goal < pf_ncache) {
386 frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue);
387 if (frag == NULL)
388 break;
389 pf_free_fragment(frag);
390 }
391 }
392
393 /* Frees the fragments and all associated entries */
394
395 static void
396 pf_free_fragment(struct pf_fragment *frag)
397 {
398 struct pf_frent *frent;
399 struct pf_frcache *frcache;
400
401 /* Free all fragments */
402 if (BUFFER_FRAGMENTS(frag)) {
403 for (frent = LIST_FIRST(&frag->fr_queue); frent;
404 frent = LIST_FIRST(&frag->fr_queue)) {
405 LIST_REMOVE(frent, fr_next);
406
407 m_freem(frent->fr_m);
408 pool_put(&pf_frent_pl, frent);
409 pf_nfrents--;
410 }
411 } else {
412 for (frcache = LIST_FIRST(&frag->fr_cache); frcache;
413 frcache = LIST_FIRST(&frag->fr_cache)) {
414 LIST_REMOVE(frcache, fr_next);
415
416 VERIFY(LIST_EMPTY(&frag->fr_cache) ||
417 LIST_FIRST(&frag->fr_cache)->fr_off >
418 frcache->fr_end);
419
420 pool_put(&pf_cent_pl, frcache);
421 pf_ncache--;
422 }
423 }
424
425 pf_remove_fragment(frag);
426 }
427
428 static void
429 pf_ip6hdr2key(struct pf_fragment *key, struct ip6_hdr *ip6,
430 struct ip6_frag *fh)
431 {
432 key->fr_p = fh->ip6f_nxt;
433 key->fr_id6 = fh->ip6f_ident;
434 key->fr_af = AF_INET6;
435 key->fr_srcx.v6 = ip6->ip6_src;
436 key->fr_dstx.v6 = ip6->ip6_dst;
437 }
438
439 static void
440 pf_ip2key(struct pf_fragment *key, struct ip *ip)
441 {
442 key->fr_p = ip->ip_p;
443 key->fr_id = ip->ip_id;
444 key->fr_af = AF_INET;
445 key->fr_srcx.v4.s_addr = ip->ip_src.s_addr;
446 key->fr_dstx.v4.s_addr = ip->ip_dst.s_addr;
447 }
448
449 static struct pf_fragment *
450 pf_find_fragment_by_key(struct pf_fragment *key, struct pf_frag_tree *tree)
451 {
452 struct pf_fragment *frag;
453
454 frag = RB_FIND(pf_frag_tree, tree, key);
455 if (frag != NULL) {
456 /* XXX Are we sure we want to update the timeout? */
457 frag->fr_timeout = pf_time_second();
458 if (BUFFER_FRAGMENTS(frag)) {
459 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
460 TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
461 } else {
462 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
463 TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next);
464 }
465 }
466
467 return (frag);
468 }
469
470 static __inline struct pf_fragment *
471 pf_find_fragment_by_ipv4_header(struct ip *ip, struct pf_frag_tree *tree)
472 {
473 struct pf_fragment key;
474 pf_ip2key(&key, ip);
475 return pf_find_fragment_by_key(&key, tree);
476 }
477
478 static __inline struct pf_fragment *
479 pf_find_fragment_by_ipv6_header(struct ip6_hdr *ip6, struct ip6_frag *fh,
480 struct pf_frag_tree *tree)
481 {
482 struct pf_fragment key;
483 pf_ip6hdr2key(&key, ip6, fh);
484 return pf_find_fragment_by_key(&key, tree);
485 }
486
487 /* Removes a fragment from the fragment queue and frees the fragment */
488
489 static void
490 pf_remove_fragment(struct pf_fragment *frag)
491 {
492 if (BUFFER_FRAGMENTS(frag)) {
493 RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag);
494 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
495 pool_put(&pf_frag_pl, frag);
496 } else {
497 RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag);
498 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
499 pool_put(&pf_cache_pl, frag);
500 }
501 }
502
503 #define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
504 static struct mbuf *
505 pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
506 struct pf_frent *frent, int mff)
507 {
508 struct mbuf *m = *m0, *m2;
509 struct pf_frent *frea, *next;
510 struct pf_frent *frep = NULL;
511 struct ip *ip = frent->fr_ip;
512 int hlen = ip->ip_hl << 2;
513 u_int16_t off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
514 u_int16_t ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4;
515 u_int16_t fr_max = ip_len + off;
516
517 VERIFY(*frag == NULL || BUFFER_FRAGMENTS(*frag));
518
519 /* Strip off ip header */
520 m->m_data += hlen;
521 m->m_len -= hlen;
522
523 /* Create a new reassembly queue for this packet */
524 if (*frag == NULL) {
525 *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
526 if (*frag == NULL) {
527 pf_flush_fragments();
528 *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
529 if (*frag == NULL)
530 goto drop_fragment;
531 }
532
533 (*frag)->fr_flags = 0;
534 (*frag)->fr_max = 0;
535 (*frag)->fr_af = AF_INET;
536 (*frag)->fr_srcx.v4 = frent->fr_ip->ip_src;
537 (*frag)->fr_dstx.v4 = frent->fr_ip->ip_dst;
538 (*frag)->fr_p = frent->fr_ip->ip_p;
539 (*frag)->fr_id = frent->fr_ip->ip_id;
540 (*frag)->fr_timeout = pf_time_second();
541 LIST_INIT(&(*frag)->fr_queue);
542
543 RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
544 TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
545
546 /* We do not have a previous fragment */
547 frep = NULL;
548 goto insert;
549 }
550
551 /*
552 * Find a fragment after the current one:
553 * - off contains the real shifted offset.
554 */
555 LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
556 if (FR_IP_OFF(frea) > off)
557 break;
558 frep = frea;
559 }
560
561 VERIFY(frep != NULL || frea != NULL);
562
563 if (frep != NULL &&
564 FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl *
565 4 > off) {
566 u_int16_t precut;
567
568 precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
569 frep->fr_ip->ip_hl * 4 - off;
570 if (precut >= ip_len)
571 goto drop_fragment;
572 m_adj(frent->fr_m, precut);
573 DPFPRINTF(("overlap -%d\n", precut));
574 /* Enforce 8 byte boundaries */
575 ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3));
576 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
577 ip_len -= precut;
578 ip->ip_len = htons(ip_len);
579 }
580
581 for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
582 frea = next) {
583 u_int16_t aftercut;
584
585 aftercut = ip_len + off - FR_IP_OFF(frea);
586 DPFPRINTF(("adjust overlap %d\n", aftercut));
587 if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl
588 * 4) {
589 frea->fr_ip->ip_len =
590 htons(ntohs(frea->fr_ip->ip_len) - aftercut);
591 frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) +
592 (aftercut >> 3));
593 m_adj(frea->fr_m, aftercut);
594 break;
595 }
596
597 /* This fragment is completely overlapped, lose it */
598 next = LIST_NEXT(frea, fr_next);
599 m_freem(frea->fr_m);
600 LIST_REMOVE(frea, fr_next);
601 pool_put(&pf_frent_pl, frea);
602 pf_nfrents--;
603 }
604
605 insert:
606 /* Update maximum data size */
607 if ((*frag)->fr_max < fr_max)
608 (*frag)->fr_max = fr_max;
609 /* This is the last segment */
610 if (!mff)
611 (*frag)->fr_flags |= PFFRAG_SEENLAST;
612
613 if (frep == NULL)
614 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
615 else
616 LIST_INSERT_AFTER(frep, frent, fr_next);
617
618 /* Check if we are completely reassembled */
619 if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
620 return (NULL);
621
622 /* Check if we have all the data */
623 off = 0;
624 for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
625 next = LIST_NEXT(frep, fr_next);
626
627 off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4;
628 if (off < (*frag)->fr_max &&
629 (next == NULL || FR_IP_OFF(next) != off)) {
630 DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
631 off, next == NULL ? -1 : FR_IP_OFF(next),
632 (*frag)->fr_max));
633 return (NULL);
634 }
635 }
636 DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
637 if (off < (*frag)->fr_max)
638 return (NULL);
639
640 /* We have all the data */
641 frent = LIST_FIRST(&(*frag)->fr_queue);
642 VERIFY(frent != NULL);
643 if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) {
644 DPFPRINTF(("drop: too big: %d\n", off));
645 pf_free_fragment(*frag);
646 *frag = NULL;
647 return (NULL);
648 }
649 next = LIST_NEXT(frent, fr_next);
650
651 /* Magic from ip_input */
652 ip = frent->fr_ip;
653 m = frent->fr_m;
654 m2 = m->m_next;
655 m->m_next = NULL;
656 m_cat(m, m2);
657 pool_put(&pf_frent_pl, frent);
658 pf_nfrents--;
659 for (frent = next; frent != NULL; frent = next) {
660 next = LIST_NEXT(frent, fr_next);
661
662 m2 = frent->fr_m;
663 pool_put(&pf_frent_pl, frent);
664 pf_nfrents--;
665 m_cat(m, m2);
666 }
667
668 ip->ip_src = (*frag)->fr_srcx.v4;
669 ip->ip_dst = (*frag)->fr_dstx.v4;
670
671 /* Remove from fragment queue */
672 pf_remove_fragment(*frag);
673 *frag = NULL;
674
675 hlen = ip->ip_hl << 2;
676 ip->ip_len = htons(off + hlen);
677 m->m_len += hlen;
678 m->m_data -= hlen;
679
680 /* some debugging cruft by sklower, below, will go away soon */
681 /* XXX this should be done elsewhere */
682 if (m->m_flags & M_PKTHDR) {
683 int plen = 0;
684 for (m2 = m; m2; m2 = m2->m_next)
685 plen += m2->m_len;
686 m->m_pkthdr.len = plen;
687 }
688
689 DPFPRINTF(("complete: 0x%llx(%d)\n",
690 (uint64_t)VM_KERNEL_ADDRPERM(m), ntohs(ip->ip_len)));
691 return (m);
692
693 drop_fragment:
694 /* Oops - fail safe - drop packet */
695 pool_put(&pf_frent_pl, frent);
696 pf_nfrents--;
697 m_freem(m);
698 return (NULL);
699 }
700
701 static struct mbuf *
702 pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
703 int drop, int *nomem)
704 {
705 struct mbuf *m = *m0;
706 struct pf_frcache *frp, *fra, *cur = NULL;
707 int ip_len = ntohs(h->ip_len) - (h->ip_hl << 2);
708 u_int16_t off = ntohs(h->ip_off) << 3;
709 u_int16_t fr_max = ip_len + off;
710 int hosed = 0;
711
712 VERIFY(*frag == NULL || !BUFFER_FRAGMENTS(*frag));
713
714 /* Create a new range queue for this packet */
715 if (*frag == NULL) {
716 *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
717 if (*frag == NULL) {
718 pf_flush_fragments();
719 *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
720 if (*frag == NULL)
721 goto no_mem;
722 }
723
724 /* Get an entry for the queue */
725 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
726 if (cur == NULL) {
727 pool_put(&pf_cache_pl, *frag);
728 *frag = NULL;
729 goto no_mem;
730 }
731 pf_ncache++;
732
733 (*frag)->fr_flags = PFFRAG_NOBUFFER;
734 (*frag)->fr_max = 0;
735 (*frag)->fr_af = AF_INET;
736 (*frag)->fr_srcx.v4 = h->ip_src;
737 (*frag)->fr_dstx.v4 = h->ip_dst;
738 (*frag)->fr_p = h->ip_p;
739 (*frag)->fr_id = h->ip_id;
740 (*frag)->fr_timeout = pf_time_second();
741
742 cur->fr_off = off;
743 cur->fr_end = fr_max;
744 LIST_INIT(&(*frag)->fr_cache);
745 LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
746
747 RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
748 TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
749
750 DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off,
751 fr_max));
752
753 goto pass;
754 }
755
756 /*
757 * Find a fragment after the current one:
758 * - off contains the real shifted offset.
759 */
760 frp = NULL;
761 LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
762 if (fra->fr_off > off)
763 break;
764 frp = fra;
765 }
766
767 VERIFY(frp != NULL || fra != NULL);
768
769 if (frp != NULL) {
770 int precut;
771
772 precut = frp->fr_end - off;
773 if (precut >= ip_len) {
774 /* Fragment is entirely a duplicate */
775 DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
776 h->ip_id, frp->fr_off, frp->fr_end, off, fr_max));
777 goto drop_fragment;
778 }
779 if (precut == 0) {
780 /* They are adjacent. Fixup cache entry */
781 DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
782 h->ip_id, frp->fr_off, frp->fr_end, off, fr_max));
783 frp->fr_end = fr_max;
784 } else if (precut > 0) {
785 /*
786 * The first part of this payload overlaps with a
787 * fragment that has already been passed.
788 * Need to trim off the first part of the payload.
789 * But to do so easily, we need to create another
790 * mbuf to throw the original header into.
791 */
792
793 DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
794 h->ip_id, precut, frp->fr_off, frp->fr_end, off,
795 fr_max));
796
797 off += precut;
798 fr_max -= precut;
799 /* Update the previous frag to encompass this one */
800 frp->fr_end = fr_max;
801
802 if (!drop) {
803 /*
804 * XXX Optimization opportunity
805 * This is a very heavy way to trim the payload.
806 * we could do it much faster by diddling mbuf
807 * internals but that would be even less legible
808 * than this mbuf magic. For my next trick,
809 * I'll pull a rabbit out of my laptop.
810 */
811 *m0 = m_copym(m, 0, h->ip_hl << 2, M_NOWAIT);
812 if (*m0 == NULL)
813 goto no_mem;
814 VERIFY((*m0)->m_next == NULL);
815 m_adj(m, precut + (h->ip_hl << 2));
816 m_cat(*m0, m);
817 m = *m0;
818 if (m->m_flags & M_PKTHDR) {
819 int plen = 0;
820 struct mbuf *t;
821 for (t = m; t; t = t->m_next)
822 plen += t->m_len;
823 m->m_pkthdr.len = plen;
824 }
825
826
827 h = mtod(m, struct ip *);
828
829
830 VERIFY((int)m->m_len ==
831 ntohs(h->ip_len) - precut);
832 h->ip_off = htons(ntohs(h->ip_off) +
833 (precut >> 3));
834 h->ip_len = htons(ntohs(h->ip_len) - precut);
835 } else {
836 hosed++;
837 }
838 } else {
839 /* There is a gap between fragments */
840
841 DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
842 h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
843 fr_max));
844
845 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
846 if (cur == NULL)
847 goto no_mem;
848 pf_ncache++;
849
850 cur->fr_off = off;
851 cur->fr_end = fr_max;
852 LIST_INSERT_AFTER(frp, cur, fr_next);
853 }
854 }
855
856 if (fra != NULL) {
857 int aftercut;
858 int merge = 0;
859
860 aftercut = fr_max - fra->fr_off;
861 if (aftercut == 0) {
862 /* Adjacent fragments */
863 DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
864 h->ip_id, off, fr_max, fra->fr_off, fra->fr_end));
865 fra->fr_off = off;
866 merge = 1;
867 } else if (aftercut > 0) {
868 /* Need to chop off the tail of this fragment */
869 DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
870 h->ip_id, aftercut, off, fr_max, fra->fr_off,
871 fra->fr_end));
872 fra->fr_off = off;
873 fr_max -= aftercut;
874
875 merge = 1;
876
877 if (!drop) {
878 m_adj(m, -aftercut);
879 if (m->m_flags & M_PKTHDR) {
880 int plen = 0;
881 struct mbuf *t;
882 for (t = m; t; t = t->m_next)
883 plen += t->m_len;
884 m->m_pkthdr.len = plen;
885 }
886 h = mtod(m, struct ip *);
887 VERIFY((int)m->m_len ==
888 ntohs(h->ip_len) - aftercut);
889 h->ip_len = htons(ntohs(h->ip_len) - aftercut);
890 } else {
891 hosed++;
892 }
893 } else if (frp == NULL) {
894 /* There is a gap between fragments */
895 DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
896 h->ip_id, -aftercut, off, fr_max, fra->fr_off,
897 fra->fr_end));
898
899 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
900 if (cur == NULL)
901 goto no_mem;
902 pf_ncache++;
903
904 cur->fr_off = off;
905 cur->fr_end = fr_max;
906 LIST_INSERT_BEFORE(fra, cur, fr_next);
907 }
908
909
910 /* Need to glue together two separate fragment descriptors */
911 if (merge) {
912 if (cur && fra->fr_off <= cur->fr_end) {
913 /* Need to merge in a previous 'cur' */
914 DPFPRINTF(("fragcache[%d]: adjacent(merge "
915 "%d-%d) %d-%d (%d-%d)\n",
916 h->ip_id, cur->fr_off, cur->fr_end, off,
917 fr_max, fra->fr_off, fra->fr_end));
918 fra->fr_off = cur->fr_off;
919 LIST_REMOVE(cur, fr_next);
920 pool_put(&pf_cent_pl, cur);
921 pf_ncache--;
922 cur = NULL;
923
924 } else if (frp && fra->fr_off <= frp->fr_end) {
925 /* Need to merge in a modified 'frp' */
926 VERIFY(cur == NULL);
927 DPFPRINTF(("fragcache[%d]: adjacent(merge "
928 "%d-%d) %d-%d (%d-%d)\n",
929 h->ip_id, frp->fr_off, frp->fr_end, off,
930 fr_max, fra->fr_off, fra->fr_end));
931 fra->fr_off = frp->fr_off;
932 LIST_REMOVE(frp, fr_next);
933 pool_put(&pf_cent_pl, frp);
934 pf_ncache--;
935 frp = NULL;
936
937 }
938 }
939 }
940
941 if (hosed) {
942 /*
943 * We must keep tracking the overall fragment even when
944 * we're going to drop it anyway so that we know when to
945 * free the overall descriptor. Thus we drop the frag late.
946 */
947 goto drop_fragment;
948 }
949
950
951 pass:
952 /* Update maximum data size */
953 if ((*frag)->fr_max < fr_max)
954 (*frag)->fr_max = fr_max;
955
956 /* This is the last segment */
957 if (!mff)
958 (*frag)->fr_flags |= PFFRAG_SEENLAST;
959
960 /* Check if we are completely reassembled */
961 if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
962 LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
963 LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) {
964 /* Remove from fragment queue */
965 DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
966 (*frag)->fr_max));
967 pf_free_fragment(*frag);
968 *frag = NULL;
969 }
970
971 return (m);
972
973 no_mem:
974 *nomem = 1;
975
976 /* Still need to pay attention to !IP_MF */
977 if (!mff && *frag != NULL)
978 (*frag)->fr_flags |= PFFRAG_SEENLAST;
979
980 m_freem(m);
981 return (NULL);
982
983 drop_fragment:
984
985 /* Still need to pay attention to !IP_MF */
986 if (!mff && *frag != NULL)
987 (*frag)->fr_flags |= PFFRAG_SEENLAST;
988
989 if (drop) {
990 /* This fragment has been deemed bad. Don't reass */
991 if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
992 DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
993 h->ip_id));
994 (*frag)->fr_flags |= PFFRAG_DROP;
995 }
996
997 m_freem(m);
998 return (NULL);
999 }
1000
1001 #define FR_IP6_OFF(fr) \
1002 (ntohs((fr)->fr_ip6f_opt.ip6f_offlg & IP6F_OFF_MASK))
1003 #define FR_IP6_PLEN(fr) (ntohs((fr)->fr_ip6->ip6_plen))
1004 struct mbuf *
1005 pf_reassemble6(struct mbuf **m0, struct pf_fragment **frag,
1006 struct pf_frent *frent, int mff)
1007 {
1008 struct mbuf *m, *m2;
1009 struct pf_frent *frea, *frep, *next;
1010 struct ip6_hdr *ip6;
1011 int plen, off, fr_max;
1012
1013 VERIFY(*frag == NULL || BUFFER_FRAGMENTS(*frag));
1014 m = *m0;
1015 frep = NULL;
1016 ip6 = frent->fr_ip6;
1017 off = FR_IP6_OFF(frent);
1018 plen = FR_IP6_PLEN(frent);
1019 fr_max = off + plen - (frent->fr_ip6f_hlen - sizeof *ip6);
1020
1021 DPFPRINTF(("0x%llx IPv6 frag plen %u off %u fr_ip6f_hlen %u "
1022 "fr_max %u m_len %u\n", (uint64_t)VM_KERNEL_ADDRPERM(m), plen, off,
1023 frent->fr_ip6f_hlen, fr_max, m->m_len));
1024
1025 /* strip off headers up to the fragment payload */
1026 m->m_data += frent->fr_ip6f_hlen;
1027 m->m_len -= frent->fr_ip6f_hlen;
1028
1029 /* Create a new reassembly queue for this packet */
1030 if (*frag == NULL) {
1031 *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
1032 if (*frag == NULL) {
1033 pf_flush_fragments();
1034 *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
1035 if (*frag == NULL)
1036 goto drop_fragment;
1037 }
1038
1039 (*frag)->fr_flags = 0;
1040 (*frag)->fr_max = 0;
1041 (*frag)->fr_af = AF_INET6;
1042 (*frag)->fr_srcx.v6 = frent->fr_ip6->ip6_src;
1043 (*frag)->fr_dstx.v6 = frent->fr_ip6->ip6_dst;
1044 (*frag)->fr_p = frent->fr_ip6f_opt.ip6f_nxt;
1045 (*frag)->fr_id6 = frent->fr_ip6f_opt.ip6f_ident;
1046 (*frag)->fr_timeout = pf_time_second();
1047 LIST_INIT(&(*frag)->fr_queue);
1048
1049 RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
1050 TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
1051
1052 /* We do not have a previous fragment */
1053 frep = NULL;
1054 goto insert;
1055 }
1056
1057 /*
1058 * Find a fragment after the current one:
1059 * - off contains the real shifted offset.
1060 */
1061 LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
1062 if (FR_IP6_OFF(frea) > off)
1063 break;
1064 frep = frea;
1065 }
1066
1067 VERIFY(frep != NULL || frea != NULL);
1068
1069 if (frep != NULL &&
1070 FR_IP6_OFF(frep) + FR_IP6_PLEN(frep) - frep->fr_ip6f_hlen > off)
1071 {
1072 u_int16_t precut;
1073
1074 precut = FR_IP6_OFF(frep) + FR_IP6_PLEN(frep) -
1075 frep->fr_ip6f_hlen - off;
1076 if (precut >= plen)
1077 goto drop_fragment;
1078 m_adj(frent->fr_m, precut);
1079 DPFPRINTF(("overlap -%d\n", precut));
1080 /* Enforce 8 byte boundaries */
1081 frent->fr_ip6f_opt.ip6f_offlg =
1082 htons(ntohs(frent->fr_ip6f_opt.ip6f_offlg) +
1083 (precut >> 3));
1084 off = FR_IP6_OFF(frent);
1085 plen -= precut;
1086 ip6->ip6_plen = htons(plen);
1087 }
1088
1089 for (; frea != NULL && plen + off > FR_IP6_OFF(frea); frea = next) {
1090 u_int16_t aftercut;
1091
1092 aftercut = plen + off - FR_IP6_OFF(frea);
1093 DPFPRINTF(("adjust overlap %d\n", aftercut));
1094 if (aftercut < FR_IP6_PLEN(frea) - frea->fr_ip6f_hlen) {
1095 frea->fr_ip6->ip6_plen = htons(FR_IP6_PLEN(frea) -
1096 aftercut);
1097 frea->fr_ip6f_opt.ip6f_offlg =
1098 htons(ntohs(frea->fr_ip6f_opt.ip6f_offlg) +
1099 (aftercut >> 3));
1100 m_adj(frea->fr_m, aftercut);
1101 break;
1102 }
1103
1104 /* This fragment is completely overlapped, lose it */
1105 next = LIST_NEXT(frea, fr_next);
1106 m_freem(frea->fr_m);
1107 LIST_REMOVE(frea, fr_next);
1108 pool_put(&pf_frent_pl, frea);
1109 pf_nfrents--;
1110 }
1111
1112 insert:
1113 /* Update maximum data size */
1114 if ((*frag)->fr_max < fr_max)
1115 (*frag)->fr_max = fr_max;
1116 /* This is the last segment */
1117 if (!mff)
1118 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1119
1120 if (frep == NULL)
1121 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
1122 else
1123 LIST_INSERT_AFTER(frep, frent, fr_next);
1124
1125 /* Check if we are completely reassembled */
1126 if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
1127 return (NULL);
1128
1129 /* Check if we have all the data */
1130 off = 0;
1131 for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
1132 next = LIST_NEXT(frep, fr_next);
1133 off += FR_IP6_PLEN(frep) - (frent->fr_ip6f_hlen - sizeof *ip6);
1134 DPFPRINTF(("frep at %d, next %d, max %d\n",
1135 off, next == NULL ? -1 : FR_IP6_OFF(next),
1136 (*frag)->fr_max));
1137 if (off < (*frag)->fr_max &&
1138 (next == NULL || FR_IP6_OFF(next) != off)) {
1139 DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
1140 off, next == NULL ? -1 : FR_IP6_OFF(next),
1141 (*frag)->fr_max));
1142 return (NULL);
1143 }
1144 }
1145 DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
1146 if (off < (*frag)->fr_max)
1147 return (NULL);
1148
1149 /* We have all the data */
1150 frent = LIST_FIRST(&(*frag)->fr_queue);
1151 VERIFY(frent != NULL);
1152 if (frent->fr_ip6f_hlen + off > IP_MAXPACKET) {
1153 DPFPRINTF(("drop: too big: %d\n", off));
1154 pf_free_fragment(*frag);
1155 *frag = NULL;
1156 return (NULL);
1157 }
1158
1159 ip6 = frent->fr_ip6;
1160 ip6->ip6_nxt = (*frag)->fr_p;
1161 ip6->ip6_plen = htons(off);
1162 ip6->ip6_src = (*frag)->fr_srcx.v6;
1163 ip6->ip6_dst = (*frag)->fr_dstx.v6;
1164
1165 /* Remove from fragment queue */
1166 pf_remove_fragment(*frag);
1167 *frag = NULL;
1168
1169 m = frent->fr_m;
1170 m->m_len += sizeof(struct ip6_hdr);
1171 m->m_data -= sizeof(struct ip6_hdr);
1172 memmove(m->m_data, ip6, sizeof(struct ip6_hdr));
1173
1174 next = LIST_NEXT(frent, fr_next);
1175 pool_put(&pf_frent_pl, frent);
1176 pf_nfrents--;
1177 for (frent = next; next != NULL; frent = next) {
1178 m2 = frent->fr_m;
1179
1180 m_cat(m, m2);
1181 next = LIST_NEXT(frent, fr_next);
1182 pool_put(&pf_frent_pl, frent);
1183 pf_nfrents--;
1184 }
1185
1186 /* XXX this should be done elsewhere */
1187 if (m->m_flags & M_PKTHDR) {
1188 int pktlen = 0;
1189 for (m2 = m; m2; m2 = m2->m_next)
1190 pktlen += m2->m_len;
1191 m->m_pkthdr.len = pktlen;
1192 }
1193
1194 DPFPRINTF(("complete: 0x%llx ip6_plen %d m_pkthdr.len %d\n",
1195 (uint64_t)VM_KERNEL_ADDRPERM(m), ntohs(ip6->ip6_plen),
1196 m->m_pkthdr.len));
1197
1198 return m;
1199
1200 drop_fragment:
1201 /* Oops - fail safe - drop packet */
1202 pool_put(&pf_frent_pl, frent);
1203 --pf_nfrents;
1204 m_freem(m);
1205 return NULL;
1206 }
1207
1208 static struct mbuf *
1209 pf_frag6cache(struct mbuf **m0, struct ip6_hdr *h, struct ip6_frag *fh,
1210 struct pf_fragment **frag, int hlen, int mff, int drop, int *nomem)
1211 {
1212 struct mbuf *m = *m0;
1213 u_int16_t plen, off, fr_max;
1214 struct pf_frcache *frp, *fra, *cur = NULL;
1215 int hosed = 0;
1216
1217 VERIFY(*frag == NULL || !BUFFER_FRAGMENTS(*frag));
1218 m = *m0;
1219 off = ntohs(fh->ip6f_offlg & IP6F_OFF_MASK);
1220 plen = ntohs(h->ip6_plen) - (hlen - sizeof *h);
1221
1222 /*
1223 * Apple Modification: dimambro@apple.com. The hlen, being passed
1224 * into this function Includes all the headers associated with
1225 * the packet, and may include routing headers, so to get to
1226 * the data payload as stored in the original IPv6 header we need
1227 * to subtract al those headers and the IP header.
1228 *
1229 * The 'max' local variable should also contain the offset from the start
1230 * of the reassembled packet to the octet just past the end of the octets
1231 * in the current fragment where:
1232 * - 'off' is the offset from the start of the reassembled packet to the
1233 * first octet in the fragment,
1234 * - 'plen' is the length of the "payload data length" Excluding all the
1235 * IPv6 headers of the fragment.
1236 * - 'hlen' is computed in pf_normalize_ip6() as the offset from the start
1237 * of the IPv6 packet to the beginning of the data.
1238 */
1239 fr_max = off + plen;
1240
1241 DPFPRINTF(("0x%llx plen %u off %u fr_max %u\n",
1242 (uint64_t)VM_KERNEL_ADDRPERM(m), plen, off, fr_max));
1243
1244 /* Create a new range queue for this packet */
1245 if (*frag == NULL) {
1246 *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
1247 if (*frag == NULL) {
1248 pf_flush_fragments();
1249 *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
1250 if (*frag == NULL)
1251 goto no_mem;
1252 }
1253
1254 /* Get an entry for the queue */
1255 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1256 if (cur == NULL) {
1257 pool_put(&pf_cache_pl, *frag);
1258 *frag = NULL;
1259 goto no_mem;
1260 }
1261 pf_ncache++;
1262
1263 (*frag)->fr_flags = PFFRAG_NOBUFFER;
1264 (*frag)->fr_max = 0;
1265 (*frag)->fr_af = AF_INET6;
1266 (*frag)->fr_srcx.v6 = h->ip6_src;
1267 (*frag)->fr_dstx.v6 = h->ip6_dst;
1268 (*frag)->fr_p = fh->ip6f_nxt;
1269 (*frag)->fr_id6 = fh->ip6f_ident;
1270 (*frag)->fr_timeout = pf_time_second();
1271
1272 cur->fr_off = off;
1273 cur->fr_end = fr_max;
1274 LIST_INIT(&(*frag)->fr_cache);
1275 LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
1276
1277 RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
1278 TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
1279
1280 DPFPRINTF(("frag6cache[%d]: new %d-%d\n", ntohl(fh->ip6f_ident),
1281 off, fr_max));
1282
1283 goto pass;
1284 }
1285
1286 /*
1287 * Find a fragment after the current one:
1288 * - off contains the real shifted offset.
1289 */
1290 frp = NULL;
1291 LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
1292 if (fra->fr_off > off)
1293 break;
1294 frp = fra;
1295 }
1296
1297 VERIFY(frp != NULL || fra != NULL);
1298
1299 if (frp != NULL) {
1300 int precut;
1301
1302 precut = frp->fr_end - off;
1303 if (precut >= plen) {
1304 /* Fragment is entirely a duplicate */
1305 DPFPRINTF(("frag6cache[%u]: dead (%d-%d) %d-%d\n",
1306 ntohl(fh->ip6f_ident), frp->fr_off, frp->fr_end,
1307 off, fr_max));
1308 goto drop_fragment;
1309 }
1310 if (precut == 0) {
1311 /* They are adjacent. Fixup cache entry */
1312 DPFPRINTF(("frag6cache[%u]: adjacent (%d-%d) %d-%d\n",
1313 ntohl(fh->ip6f_ident), frp->fr_off, frp->fr_end,
1314 off, fr_max));
1315 frp->fr_end = fr_max;
1316 } else if (precut > 0) {
1317 /* The first part of this payload overlaps with a
1318 * fragment that has already been passed.
1319 * Need to trim off the first part of the payload.
1320 * But to do so easily, we need to create another
1321 * mbuf to throw the original header into.
1322 */
1323
1324 DPFPRINTF(("frag6cache[%u]: chop %d (%d-%d) %d-%d\n",
1325 ntohl(fh->ip6f_ident), precut, frp->fr_off,
1326 frp->fr_end, off, fr_max));
1327
1328 off += precut;
1329 fr_max -= precut;
1330 /* Update the previous frag to encompass this one */
1331 frp->fr_end = fr_max;
1332
1333 if (!drop) {
1334 /* XXX Optimization opportunity
1335 * This is a very heavy way to trim the payload.
1336 * we could do it much faster by diddling mbuf
1337 * internals but that would be even less legible
1338 * than this mbuf magic. For my next trick,
1339 * I'll pull a rabbit out of my laptop.
1340 */
1341 *m0 = m_copym(m, 0, hlen, M_NOWAIT);
1342 if (*m0 == NULL)
1343 goto no_mem;
1344 VERIFY((*m0)->m_next == NULL);
1345 m_adj(m, precut + hlen);
1346 m_cat(*m0, m);
1347 m = *m0;
1348 if (m->m_flags & M_PKTHDR) {
1349 int pktlen = 0;
1350 struct mbuf *t;
1351 for (t = m; t; t = t->m_next)
1352 pktlen += t->m_len;
1353 m->m_pkthdr.len = pktlen;
1354 }
1355
1356 h = mtod(m, struct ip6_hdr *);
1357
1358 VERIFY((int)m->m_len ==
1359 ntohs(h->ip6_plen) - precut);
1360 fh->ip6f_offlg &= ~IP6F_OFF_MASK;
1361 fh->ip6f_offlg |=
1362 htons(ntohs(fh->ip6f_offlg & IP6F_OFF_MASK)
1363 + (precut >> 3));
1364 h->ip6_plen = htons(ntohs(h->ip6_plen) -
1365 precut);
1366 } else {
1367 hosed++;
1368 }
1369 } else {
1370 /* There is a gap between fragments */
1371
1372 DPFPRINTF(("frag6cache[%u]: gap %d (%d-%d) %d-%d\n",
1373 ntohl(fh->ip6f_ident), -precut, frp->fr_off,
1374 frp->fr_end, off, fr_max));
1375
1376 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1377 if (cur == NULL)
1378 goto no_mem;
1379 pf_ncache++;
1380
1381 cur->fr_off = off;
1382 cur->fr_end = fr_max;
1383 LIST_INSERT_AFTER(frp, cur, fr_next);
1384 }
1385 }
1386
1387 if (fra != NULL) {
1388 int aftercut;
1389 int merge = 0;
1390
1391 aftercut = fr_max - fra->fr_off;
1392 if (aftercut == 0) {
1393 /* Adjacent fragments */
1394 DPFPRINTF(("frag6cache[%u]: adjacent %d-%d (%d-%d)\n",
1395 ntohl(fh->ip6f_ident), off, fr_max, fra->fr_off,
1396 fra->fr_end));
1397 fra->fr_off = off;
1398 merge = 1;
1399 } else if (aftercut > 0) {
1400 /* Need to chop off the tail of this fragment */
1401 DPFPRINTF(("frag6cache[%u]: chop %d %d-%d (%d-%d)\n",
1402 ntohl(fh->ip6f_ident), aftercut, off, fr_max,
1403 fra->fr_off, fra->fr_end));
1404 fra->fr_off = off;
1405 fr_max -= aftercut;
1406
1407 merge = 1;
1408
1409 if (!drop) {
1410 m_adj(m, -aftercut);
1411 if (m->m_flags & M_PKTHDR) {
1412 int pktlen = 0;
1413 struct mbuf *t;
1414 for (t = m; t; t = t->m_next)
1415 pktlen += t->m_len;
1416 m->m_pkthdr.len = pktlen;
1417 }
1418 h = mtod(m, struct ip6_hdr *);
1419 VERIFY((int)m->m_len ==
1420 ntohs(h->ip6_plen) - aftercut);
1421 h->ip6_plen =
1422 htons(ntohs(h->ip6_plen) - aftercut);
1423 } else {
1424 hosed++;
1425 }
1426 } else if (frp == NULL) {
1427 /* There is a gap between fragments */
1428 DPFPRINTF(("frag6cache[%u]: gap %d %d-%d (%d-%d)\n",
1429 ntohl(fh->ip6f_ident), -aftercut, off, fr_max,
1430 fra->fr_off, fra->fr_end));
1431
1432 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1433 if (cur == NULL)
1434 goto no_mem;
1435 pf_ncache++;
1436
1437 cur->fr_off = off;
1438 cur->fr_end = fr_max;
1439 LIST_INSERT_BEFORE(fra, cur, fr_next);
1440 }
1441
1442 /* Need to glue together two separate fragment descriptors */
1443 if (merge) {
1444 if (cur && fra->fr_off <= cur->fr_end) {
1445 /* Need to merge in a previous 'cur' */
1446 DPFPRINTF(("frag6cache[%u]: adjacent(merge "
1447 "%d-%d) %d-%d (%d-%d)\n",
1448 ntohl(fh->ip6f_ident), cur->fr_off,
1449 cur->fr_end, off, fr_max, fra->fr_off,
1450 fra->fr_end));
1451 fra->fr_off = cur->fr_off;
1452 LIST_REMOVE(cur, fr_next);
1453 pool_put(&pf_cent_pl, cur);
1454 pf_ncache--;
1455 cur = NULL;
1456 } else if (frp && fra->fr_off <= frp->fr_end) {
1457 /* Need to merge in a modified 'frp' */
1458 VERIFY(cur == NULL);
1459 DPFPRINTF(("frag6cache[%u]: adjacent(merge "
1460 "%d-%d) %d-%d (%d-%d)\n",
1461 ntohl(fh->ip6f_ident), frp->fr_off,
1462 frp->fr_end, off, fr_max, fra->fr_off,
1463 fra->fr_end));
1464 fra->fr_off = frp->fr_off;
1465 LIST_REMOVE(frp, fr_next);
1466 pool_put(&pf_cent_pl, frp);
1467 pf_ncache--;
1468 frp = NULL;
1469 }
1470 }
1471 }
1472
1473 if (hosed) {
1474 /*
1475 * We must keep tracking the overall fragment even when
1476 * we're going to drop it anyway so that we know when to
1477 * free the overall descriptor. Thus we drop the frag late.
1478 */
1479 goto drop_fragment;
1480 }
1481
1482 pass:
1483 /* Update maximum data size */
1484 if ((*frag)->fr_max < fr_max)
1485 (*frag)->fr_max = fr_max;
1486
1487 /* This is the last segment */
1488 if (!mff)
1489 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1490
1491 /* Check if we are completely reassembled */
1492 if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
1493 LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
1494 LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) {
1495 /* Remove from fragment queue */
1496 DPFPRINTF(("frag6cache[%u]: done 0-%d\n",
1497 ntohl(fh->ip6f_ident), (*frag)->fr_max));
1498 pf_free_fragment(*frag);
1499 *frag = NULL;
1500 }
1501
1502 return (m);
1503
1504 no_mem:
1505 *nomem = 1;
1506
1507 /* Still need to pay attention to !IP_MF */
1508 if (!mff && *frag != NULL)
1509 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1510
1511 m_freem(m);
1512 return (NULL);
1513
1514 drop_fragment:
1515
1516 /* Still need to pay attention to !IP_MF */
1517 if (!mff && *frag != NULL)
1518 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1519
1520 if (drop) {
1521 /* This fragment has been deemed bad. Don't reass */
1522 if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
1523 DPFPRINTF(("frag6cache[%u]: dropping overall fragment\n",
1524 ntohl(fh->ip6f_ident)));
1525 (*frag)->fr_flags |= PFFRAG_DROP;
1526 }
1527
1528 m_freem(m);
1529 return (NULL);
1530 }
1531
1532 int
1533 pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
1534 struct pf_pdesc *pd)
1535 {
1536 struct mbuf *m = *m0;
1537 struct pf_rule *r;
1538 struct pf_frent *frent;
1539 struct pf_fragment *frag = NULL;
1540 struct ip *h = mtod(m, struct ip *);
1541 int mff = (ntohs(h->ip_off) & IP_MF);
1542 int hlen = h->ip_hl << 2;
1543 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
1544 u_int16_t fr_max;
1545 int ip_len;
1546 int ip_off;
1547 int asd = 0;
1548 struct pf_ruleset *ruleset = NULL;
1549
1550 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1551 while (r != NULL) {
1552 r->evaluations++;
1553 if (pfi_kif_match(r->kif, kif) == r->ifnot)
1554 r = r->skip[PF_SKIP_IFP].ptr;
1555 else if (r->direction && r->direction != dir)
1556 r = r->skip[PF_SKIP_DIR].ptr;
1557 else if (r->af && r->af != AF_INET)
1558 r = r->skip[PF_SKIP_AF].ptr;
1559 else if (r->proto && r->proto != h->ip_p)
1560 r = r->skip[PF_SKIP_PROTO].ptr;
1561 else if (PF_MISMATCHAW(&r->src.addr,
1562 (struct pf_addr *)&h->ip_src.s_addr, AF_INET,
1563 r->src.neg, kif))
1564 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1565 else if (PF_MISMATCHAW(&r->dst.addr,
1566 (struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
1567 r->dst.neg, NULL))
1568 r = r->skip[PF_SKIP_DST_ADDR].ptr;
1569 else {
1570 if (r->anchor == NULL)
1571 break;
1572 else
1573 pf_step_into_anchor(&asd, &ruleset,
1574 PF_RULESET_SCRUB, &r, NULL, NULL);
1575 }
1576 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
1577 PF_RULESET_SCRUB, &r, NULL, NULL))
1578 break;
1579 }
1580
1581 if (r == NULL || r->action == PF_NOSCRUB)
1582 return (PF_PASS);
1583 else {
1584 r->packets[dir == PF_OUT]++;
1585 r->bytes[dir == PF_OUT] += pd->tot_len;
1586 }
1587
1588 /* Check for illegal packets */
1589 if (hlen < (int)sizeof (struct ip))
1590 goto drop;
1591
1592 if (hlen > ntohs(h->ip_len))
1593 goto drop;
1594
1595 /* Clear IP_DF if the rule uses the no-df option */
1596 if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
1597 u_int16_t ipoff = h->ip_off;
1598
1599 h->ip_off &= htons(~IP_DF);
1600 h->ip_sum = pf_cksum_fixup(h->ip_sum, ipoff, h->ip_off, 0);
1601 }
1602
1603 /* We will need other tests here */
1604 if (!fragoff && !mff)
1605 goto no_fragment;
1606
1607 /*
1608 * We're dealing with a fragment now. Don't allow fragments
1609 * with IP_DF to enter the cache. If the flag was cleared by
1610 * no-df above, fine. Otherwise drop it.
1611 */
1612 if (h->ip_off & htons(IP_DF)) {
1613 DPFPRINTF(("IP_DF\n"));
1614 goto bad;
1615 }
1616
1617 ip_len = ntohs(h->ip_len) - hlen;
1618 ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
1619
1620 /* All fragments are 8 byte aligned */
1621 if (mff && (ip_len & 0x7)) {
1622 DPFPRINTF(("mff and %d\n", ip_len));
1623 goto bad;
1624 }
1625
1626 /* Respect maximum length */
1627 if (fragoff + ip_len > IP_MAXPACKET) {
1628 DPFPRINTF(("max packet %d\n", fragoff + ip_len));
1629 goto bad;
1630 }
1631 fr_max = fragoff + ip_len;
1632
1633 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
1634 /* Fully buffer all of the fragments */
1635
1636 frag = pf_find_fragment_by_ipv4_header(h, &pf_frag_tree);
1637 /* Check if we saw the last fragment already */
1638 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
1639 fr_max > frag->fr_max)
1640 goto bad;
1641
1642 /* Get an entry for the fragment queue */
1643 frent = pool_get(&pf_frent_pl, PR_NOWAIT);
1644 if (frent == NULL) {
1645 REASON_SET(reason, PFRES_MEMORY);
1646 return (PF_DROP);
1647 }
1648 pf_nfrents++;
1649 frent->fr_ip = h;
1650 frent->fr_m = m;
1651
1652 /* Might return a completely reassembled mbuf, or NULL */
1653 DPFPRINTF(("reass IPv4 frag %d @ %d-%d\n", ntohs(h->ip_id),
1654 fragoff, fr_max));
1655 *m0 = m = pf_reassemble(m0, &frag, frent, mff);
1656
1657 if (m == NULL)
1658 return (PF_DROP);
1659
1660 VERIFY(m->m_flags & M_PKTHDR);
1661
1662 /* use mtag from concatenated mbuf chain */
1663 pd->pf_mtag = pf_find_mtag(m);
1664 #if DIAGNOSTIC
1665 if (pd->pf_mtag == NULL) {
1666 printf("%s: pf_find_mtag returned NULL(1)\n", __func__);
1667 if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
1668 m_freem(m);
1669 m = *m0 = NULL;
1670 goto no_mem;
1671 }
1672 }
1673 #endif
1674 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
1675 goto drop;
1676
1677 h = mtod(m, struct ip *);
1678 } else {
1679 /* non-buffering fragment cache (drops or masks overlaps) */
1680 int nomem = 0;
1681
1682 if (dir == PF_OUT && (pd->pf_mtag->pftag_flags & PF_TAG_FRAGCACHE)) {
1683 /*
1684 * Already passed the fragment cache in the
1685 * input direction. If we continued, it would
1686 * appear to be a dup and would be dropped.
1687 */
1688 goto fragment_pass;
1689 }
1690
1691 frag = pf_find_fragment_by_ipv4_header(h, &pf_cache_tree);
1692
1693 /* Check if we saw the last fragment already */
1694 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
1695 fr_max > frag->fr_max) {
1696 if (r->rule_flag & PFRULE_FRAGDROP)
1697 frag->fr_flags |= PFFRAG_DROP;
1698 goto bad;
1699 }
1700
1701 *m0 = m = pf_fragcache(m0, h, &frag, mff,
1702 (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
1703 if (m == NULL) {
1704 if (nomem)
1705 goto no_mem;
1706 goto drop;
1707 }
1708
1709 VERIFY(m->m_flags & M_PKTHDR);
1710
1711 /* use mtag from copied and trimmed mbuf chain */
1712 pd->pf_mtag = pf_find_mtag(m);
1713 #if DIAGNOSTIC
1714 if (pd->pf_mtag == NULL) {
1715 printf("%s: pf_find_mtag returned NULL(2)\n", __func__);
1716 if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
1717 m_freem(m);
1718 m = *m0 = NULL;
1719 goto no_mem;
1720 }
1721 }
1722 #endif
1723 if (dir == PF_IN)
1724 pd->pf_mtag->pftag_flags |= PF_TAG_FRAGCACHE;
1725
1726 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
1727 goto drop;
1728 goto fragment_pass;
1729 }
1730
1731 no_fragment:
1732 /* At this point, only IP_DF is allowed in ip_off */
1733 if (h->ip_off & ~htons(IP_DF)) {
1734 u_int16_t ipoff = h->ip_off;
1735
1736 h->ip_off &= htons(IP_DF);
1737 h->ip_sum = pf_cksum_fixup(h->ip_sum, ipoff, h->ip_off, 0);
1738 }
1739
1740 /* Enforce a minimum ttl, may cause endless packet loops */
1741 if (r->min_ttl && h->ip_ttl < r->min_ttl) {
1742 u_int16_t ip_ttl = h->ip_ttl;
1743
1744 h->ip_ttl = r->min_ttl;
1745 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
1746 }
1747 if (r->rule_flag & PFRULE_RANDOMID) {
1748 u_int16_t oip_id = h->ip_id;
1749
1750 h->ip_id = ip_randomid();
1751 h->ip_sum = pf_cksum_fixup(h->ip_sum, oip_id, h->ip_id, 0);
1752 }
1753 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
1754 pd->flags |= PFDESC_IP_REAS;
1755
1756 return (PF_PASS);
1757
1758 fragment_pass:
1759 /* Enforce a minimum ttl, may cause endless packet loops */
1760 if (r->min_ttl && h->ip_ttl < r->min_ttl) {
1761 u_int16_t ip_ttl = h->ip_ttl;
1762
1763 h->ip_ttl = r->min_ttl;
1764 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
1765 }
1766 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
1767 pd->flags |= PFDESC_IP_REAS;
1768 return (PF_PASS);
1769
1770 no_mem:
1771 REASON_SET(reason, PFRES_MEMORY);
1772 if (r != NULL && r->log)
1773 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r,
1774 NULL, NULL, pd);
1775 return (PF_DROP);
1776
1777 drop:
1778 REASON_SET(reason, PFRES_NORM);
1779 if (r != NULL && r->log)
1780 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r,
1781 NULL, NULL, pd);
1782 return (PF_DROP);
1783
1784 bad:
1785 DPFPRINTF(("dropping bad IPv4 fragment\n"));
1786
1787 /* Free associated fragments */
1788 if (frag != NULL)
1789 pf_free_fragment(frag);
1790
1791 REASON_SET(reason, PFRES_FRAG);
1792 if (r != NULL && r->log)
1793 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1794
1795 return (PF_DROP);
1796 }
1797
1798 #if INET6
1799 int
1800 pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
1801 u_short *reason, struct pf_pdesc *pd)
1802 {
1803 struct mbuf *m = *m0;
1804 struct pf_rule *r;
1805 struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1806 int off;
1807 struct ip6_ext ext;
1808 /* adi XXX */
1809 #if 0
1810 struct ip6_opt opt;
1811 struct ip6_opt_jumbo jumbo;
1812 int optend;
1813 int ooff;
1814 #endif
1815 struct ip6_frag frag;
1816 u_int32_t jumbolen = 0, plen;
1817 u_int16_t fragoff = 0;
1818 u_int8_t proto;
1819 int terminal;
1820 struct pf_frent *frent;
1821 struct pf_fragment *pff = NULL;
1822 int mff = 0, rh_cnt = 0;
1823 u_int16_t fr_max;
1824 int asd = 0;
1825 struct pf_ruleset *ruleset = NULL;
1826
1827 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1828 while (r != NULL) {
1829 r->evaluations++;
1830 if (pfi_kif_match(r->kif, kif) == r->ifnot)
1831 r = r->skip[PF_SKIP_IFP].ptr;
1832 else if (r->direction && r->direction != dir)
1833 r = r->skip[PF_SKIP_DIR].ptr;
1834 else if (r->af && r->af != AF_INET6)
1835 r = r->skip[PF_SKIP_AF].ptr;
1836 #if 0 /* header chain! */
1837 else if (r->proto && r->proto != h->ip6_nxt)
1838 r = r->skip[PF_SKIP_PROTO].ptr;
1839 #endif
1840 else if (PF_MISMATCHAW(&r->src.addr,
1841 (struct pf_addr *)&h->ip6_src, AF_INET6,
1842 r->src.neg, kif))
1843 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1844 else if (PF_MISMATCHAW(&r->dst.addr,
1845 (struct pf_addr *)&h->ip6_dst, AF_INET6,
1846 r->dst.neg, NULL))
1847 r = r->skip[PF_SKIP_DST_ADDR].ptr;
1848 else {
1849 if (r->anchor == NULL)
1850 break;
1851 else
1852 pf_step_into_anchor(&asd, &ruleset,
1853 PF_RULESET_SCRUB, &r, NULL, NULL);
1854 }
1855 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
1856 PF_RULESET_SCRUB, &r, NULL, NULL))
1857 break;
1858 }
1859
1860 if (r == NULL || r->action == PF_NOSCRUB)
1861 return (PF_PASS);
1862 else {
1863 r->packets[dir == PF_OUT]++;
1864 r->bytes[dir == PF_OUT] += pd->tot_len;
1865 }
1866
1867 /* Check for illegal packets */
1868 if ((int)(sizeof (struct ip6_hdr) + IPV6_MAXPACKET) < m->m_pkthdr.len)
1869 goto drop;
1870
1871 off = sizeof (struct ip6_hdr);
1872 proto = h->ip6_nxt;
1873 terminal = 0;
1874 do {
1875 pd->proto = proto;
1876 switch (proto) {
1877 case IPPROTO_FRAGMENT:
1878 goto fragment;
1879 break;
1880 case IPPROTO_AH:
1881 case IPPROTO_ROUTING:
1882 case IPPROTO_DSTOPTS:
1883 if (!pf_pull_hdr(m, off, &ext, sizeof (ext), NULL,
1884 NULL, AF_INET6))
1885 goto shortpkt;
1886 /*
1887 * <jhw@apple.com>
1888 * Multiple routing headers not allowed.
1889 * Routing header type zero considered harmful.
1890 */
1891 if (proto == IPPROTO_ROUTING) {
1892 const struct ip6_rthdr *rh =
1893 (const struct ip6_rthdr *)&ext;
1894 if (rh_cnt++)
1895 goto drop;
1896 if (rh->ip6r_type == IPV6_RTHDR_TYPE_0)
1897 goto drop;
1898 }
1899 else
1900 if (proto == IPPROTO_AH)
1901 off += (ext.ip6e_len + 2) * 4;
1902 else
1903 off += (ext.ip6e_len + 1) * 8;
1904 proto = ext.ip6e_nxt;
1905 break;
1906 case IPPROTO_HOPOPTS:
1907 /* adi XXX */
1908 #if 0
1909 if (!pf_pull_hdr(m, off, &ext, sizeof (ext), NULL,
1910 NULL, AF_INET6))
1911 goto shortpkt;
1912 optend = off + (ext.ip6e_len + 1) * 8;
1913 ooff = off + sizeof (ext);
1914 do {
1915 if (!pf_pull_hdr(m, ooff, &opt.ip6o_type,
1916 sizeof (opt.ip6o_type), NULL, NULL,
1917 AF_INET6))
1918 goto shortpkt;
1919 if (opt.ip6o_type == IP6OPT_PAD1) {
1920 ooff++;
1921 continue;
1922 }
1923 if (!pf_pull_hdr(m, ooff, &opt, sizeof (opt),
1924 NULL, NULL, AF_INET6))
1925 goto shortpkt;
1926 if (ooff + sizeof (opt) + opt.ip6o_len > optend)
1927 goto drop;
1928 switch (opt.ip6o_type) {
1929 case IP6OPT_JUMBO:
1930 if (h->ip6_plen != 0)
1931 goto drop;
1932 if (!pf_pull_hdr(m, ooff, &jumbo,
1933 sizeof (jumbo), NULL, NULL,
1934 AF_INET6))
1935 goto shortpkt;
1936 memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
1937 sizeof (jumbolen));
1938 jumbolen = ntohl(jumbolen);
1939 if (jumbolen <= IPV6_MAXPACKET)
1940 goto drop;
1941 if (sizeof (struct ip6_hdr) +
1942 jumbolen != m->m_pkthdr.len)
1943 goto drop;
1944 break;
1945 default:
1946 break;
1947 }
1948 ooff += sizeof (opt) + opt.ip6o_len;
1949 } while (ooff < optend);
1950
1951 off = optend;
1952 proto = ext.ip6e_nxt;
1953 break;
1954 #endif
1955 default:
1956 terminal = 1;
1957 break;
1958 }
1959 } while (!terminal);
1960
1961 /* jumbo payload option must be present, or plen > 0 */
1962 if (ntohs(h->ip6_plen) == 0)
1963 plen = jumbolen;
1964 else
1965 plen = ntohs(h->ip6_plen);
1966 if (plen == 0)
1967 goto drop;
1968 if ((int)(sizeof (struct ip6_hdr) + plen) > m->m_pkthdr.len)
1969 goto shortpkt;
1970
1971 /* Enforce a minimum ttl, may cause endless packet loops */
1972 if (r->min_ttl && h->ip6_hlim < r->min_ttl)
1973 h->ip6_hlim = r->min_ttl;
1974
1975 return (PF_PASS);
1976
1977 fragment:
1978 if (ntohs(h->ip6_plen) == 0 || jumbolen)
1979 goto drop;
1980 plen = ntohs(h->ip6_plen);
1981
1982 if (!pf_pull_hdr(m, off, &frag, sizeof (frag), NULL, NULL, AF_INET6))
1983 goto shortpkt;
1984 fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
1985 pd->proto = frag.ip6f_nxt;
1986 mff = ntohs(frag.ip6f_offlg & IP6F_MORE_FRAG);
1987 off += sizeof frag;
1988 if (fragoff + (plen - off) > IPV6_MAXPACKET)
1989 goto badfrag;
1990
1991 fr_max = fragoff + plen - (off - sizeof(struct ip6_hdr));
1992 DPFPRINTF(("0x%llx IPv6 frag plen %u mff %d off %u fragoff %u "
1993 "fr_max %u\n", (uint64_t)VM_KERNEL_ADDRPERM(m), plen, mff, off,
1994 fragoff, fr_max));
1995
1996 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
1997 /* Fully buffer all of the fragments */
1998 pd->flags |= PFDESC_IP_REAS;
1999
2000 pff = pf_find_fragment_by_ipv6_header(h, &frag,
2001 &pf_frag_tree);
2002
2003 /* Check if we saw the last fragment already */
2004 if (pff != NULL && (pff->fr_flags & PFFRAG_SEENLAST) &&
2005 fr_max > pff->fr_max)
2006 goto badfrag;
2007
2008 /* Get an entry for the fragment queue */
2009 frent = pool_get(&pf_frent_pl, PR_NOWAIT);
2010 if (frent == NULL) {
2011 REASON_SET(reason, PFRES_MEMORY);
2012 return (PF_DROP);
2013 }
2014 pf_nfrents++;
2015 frent->fr_ip6 = h;
2016 frent->fr_m = m;
2017 frent->fr_ip6f_opt = frag;
2018 frent->fr_ip6f_hlen = off;
2019
2020 /* Might return a completely reassembled mbuf, or NULL */
2021 DPFPRINTF(("reass IPv6 frag %d @ %d-%d\n",
2022 ntohl(frag.ip6f_ident), fragoff, fr_max));
2023 *m0 = m = pf_reassemble6(m0, &pff, frent, mff);
2024
2025 if (m == NULL)
2026 return (PF_DROP);
2027
2028 if (pff != NULL && (pff->fr_flags & PFFRAG_DROP))
2029 goto drop;
2030
2031 h = mtod(m, struct ip6_hdr *);
2032 }
2033 else if (dir == PF_IN || !(pd->pf_mtag->pftag_flags & PF_TAG_FRAGCACHE)) {
2034 /* non-buffering fragment cache (overlaps: see RFC 5722) */
2035 int nomem = 0;
2036
2037 pff = pf_find_fragment_by_ipv6_header(h, &frag,
2038 &pf_cache_tree);
2039
2040 /* Check if we saw the last fragment already */
2041 if (pff != NULL && (pff->fr_flags & PFFRAG_SEENLAST) &&
2042 fr_max > pff->fr_max) {
2043 if (r->rule_flag & PFRULE_FRAGDROP)
2044 pff->fr_flags |= PFFRAG_DROP;
2045 goto badfrag;
2046 }
2047
2048 *m0 = m = pf_frag6cache(m0, h, &frag, &pff, off, mff,
2049 (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
2050 if (m == NULL) {
2051 if (nomem)
2052 goto no_mem;
2053 goto drop;
2054 }
2055
2056 if (dir == PF_IN)
2057 pd->pf_mtag->pftag_flags |= PF_TAG_FRAGCACHE;
2058
2059 if (pff != NULL && (pff->fr_flags & PFFRAG_DROP))
2060 goto drop;
2061 }
2062
2063 /* Enforce a minimum ttl, may cause endless packet loops */
2064 if (r->min_ttl && h->ip6_hlim < r->min_ttl)
2065 h->ip6_hlim = r->min_ttl;
2066 return (PF_PASS);
2067
2068 no_mem:
2069 REASON_SET(reason, PFRES_MEMORY);
2070 goto dropout;
2071
2072 shortpkt:
2073 REASON_SET(reason, PFRES_SHORT);
2074 goto dropout;
2075
2076 drop:
2077 REASON_SET(reason, PFRES_NORM);
2078 goto dropout;
2079
2080 badfrag:
2081 DPFPRINTF(("dropping bad IPv6 fragment\n"));
2082 REASON_SET(reason, PFRES_FRAG);
2083 goto dropout;
2084
2085 dropout:
2086 if (pff != NULL)
2087 pf_free_fragment(pff);
2088 if (r != NULL && r->log)
2089 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
2090 return (PF_DROP);
2091 }
2092 #endif /* INET6 */
2093
2094 int
2095 pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
2096 int off, void *h, struct pf_pdesc *pd)
2097 {
2098 #pragma unused(ipoff, h)
2099 struct pf_rule *r, *rm = NULL;
2100 struct tcphdr *th = pd->hdr.tcp;
2101 int rewrite = 0;
2102 int asd = 0;
2103 u_short reason;
2104 u_int8_t flags;
2105 sa_family_t af = pd->af;
2106 struct pf_ruleset *ruleset = NULL;
2107 union pf_state_xport sxport, dxport;
2108
2109 sxport.port = th->th_sport;
2110 dxport.port = th->th_dport;
2111
2112 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
2113 while (r != NULL) {
2114 r->evaluations++;
2115 if (pfi_kif_match(r->kif, kif) == r->ifnot)
2116 r = r->skip[PF_SKIP_IFP].ptr;
2117 else if (r->direction && r->direction != dir)
2118 r = r->skip[PF_SKIP_DIR].ptr;
2119 else if (r->af && r->af != af)
2120 r = r->skip[PF_SKIP_AF].ptr;
2121 else if (r->proto && r->proto != pd->proto)
2122 r = r->skip[PF_SKIP_PROTO].ptr;
2123 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
2124 r->src.neg, kif))
2125 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
2126 else if (r->src.xport.range.op &&
2127 !pf_match_xport(r->src.xport.range.op, r->proto_variant,
2128 &r->src.xport, &sxport))
2129 r = r->skip[PF_SKIP_SRC_PORT].ptr;
2130 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
2131 r->dst.neg, NULL))
2132 r = r->skip[PF_SKIP_DST_ADDR].ptr;
2133 else if (r->dst.xport.range.op &&
2134 !pf_match_xport(r->dst.xport.range.op, r->proto_variant,
2135 &r->dst.xport, &dxport))
2136 r = r->skip[PF_SKIP_DST_PORT].ptr;
2137 else if (r->os_fingerprint != PF_OSFP_ANY &&
2138 !pf_osfp_match(pf_osfp_fingerprint(pd, m, off, th),
2139 r->os_fingerprint))
2140 r = TAILQ_NEXT(r, entries);
2141 else {
2142 if (r->anchor == NULL) {
2143 rm = r;
2144 break;
2145 } else {
2146 pf_step_into_anchor(&asd, &ruleset,
2147 PF_RULESET_SCRUB, &r, NULL, NULL);
2148 }
2149 }
2150 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
2151 PF_RULESET_SCRUB, &r, NULL, NULL))
2152 break;
2153 }
2154
2155 if (rm == NULL || rm->action == PF_NOSCRUB)
2156 return (PF_PASS);
2157 else {
2158 r->packets[dir == PF_OUT]++;
2159 r->bytes[dir == PF_OUT] += pd->tot_len;
2160 }
2161
2162 if (rm->rule_flag & PFRULE_REASSEMBLE_TCP)
2163 pd->flags |= PFDESC_TCP_NORM;
2164
2165 flags = th->th_flags;
2166 if (flags & TH_SYN) {
2167 /* Illegal packet */
2168 if (flags & TH_RST)
2169 goto tcp_drop;
2170
2171 if (flags & TH_FIN)
2172 flags &= ~TH_FIN;
2173 } else {
2174 /* Illegal packet */
2175 if (!(flags & (TH_ACK|TH_RST)))
2176 goto tcp_drop;
2177 }
2178
2179 if (!(flags & TH_ACK)) {
2180 /* These flags are only valid if ACK is set */
2181 if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG))
2182 goto tcp_drop;
2183 }
2184
2185 /* Check for illegal header length */
2186 if (th->th_off < (sizeof (struct tcphdr) >> 2))
2187 goto tcp_drop;
2188
2189 /* If flags changed, or reserved data set, then adjust */
2190 if (flags != th->th_flags || th->th_x2 != 0) {
2191 u_int16_t ov, nv;
2192
2193 ov = *(u_int16_t *)(&th->th_ack + 1);
2194 th->th_flags = flags;
2195 th->th_x2 = 0;
2196 nv = *(u_int16_t *)(&th->th_ack + 1);
2197
2198 th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0);
2199 rewrite = 1;
2200 }
2201
2202 /* Remove urgent pointer, if TH_URG is not set */
2203 if (!(flags & TH_URG) && th->th_urp) {
2204 th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0);
2205 th->th_urp = 0;
2206 rewrite = 1;
2207 }
2208
2209 /* copy back packet headers if we sanitized */
2210 /* Process options */
2211 if (r->max_mss) {
2212 int rv = pf_normalize_tcpopt(r, dir, kif, pd, m, th, off,
2213 &rewrite);
2214 if (rv == PF_DROP)
2215 return rv;
2216 m = pd->mp;
2217 }
2218
2219 if (rewrite) {
2220 struct mbuf *mw = pf_lazy_makewritable(pd, m,
2221 off + sizeof (*th));
2222 if (!mw) {
2223 REASON_SET(&reason, PFRES_MEMORY);
2224 if (r->log)
2225 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason,
2226 r, 0, 0, pd);
2227 return PF_DROP;
2228 }
2229
2230 m_copyback(mw, off, sizeof (*th), th);
2231 }
2232
2233 return (PF_PASS);
2234
2235 tcp_drop:
2236 REASON_SET(&reason, PFRES_NORM);
2237 if (rm != NULL && r->log)
2238 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL, pd);
2239 return (PF_DROP);
2240 }
2241
2242 int
2243 pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
2244 struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst)
2245 {
2246 #pragma unused(dst)
2247 u_int32_t tsval, tsecr;
2248 u_int8_t hdr[60];
2249 u_int8_t *opt;
2250
2251 VERIFY(src->scrub == NULL);
2252
2253 src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
2254 if (src->scrub == NULL)
2255 return (1);
2256 bzero(src->scrub, sizeof (*src->scrub));
2257
2258 switch (pd->af) {
2259 #if INET
2260 case AF_INET: {
2261 struct ip *h = mtod(m, struct ip *);
2262 src->scrub->pfss_ttl = h->ip_ttl;
2263 break;
2264 }
2265 #endif /* INET */
2266 #if INET6
2267 case AF_INET6: {
2268 struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
2269 src->scrub->pfss_ttl = h->ip6_hlim;
2270 break;
2271 }
2272 #endif /* INET6 */
2273 }
2274
2275
2276 /*
2277 * All normalizations below are only begun if we see the start of
2278 * the connections. They must all set an enabled bit in pfss_flags
2279 */
2280 if ((th->th_flags & TH_SYN) == 0)
2281 return (0);
2282
2283
2284 if (th->th_off > (sizeof (struct tcphdr) >> 2) && src->scrub &&
2285 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
2286 /* Diddle with TCP options */
2287 int hlen;
2288 opt = hdr + sizeof (struct tcphdr);
2289 hlen = (th->th_off << 2) - sizeof (struct tcphdr);
2290 while (hlen >= TCPOLEN_TIMESTAMP) {
2291 switch (*opt) {
2292 case TCPOPT_EOL: /* FALLTHROUGH */
2293 case TCPOPT_NOP:
2294 opt++;
2295 hlen--;
2296 break;
2297 case TCPOPT_TIMESTAMP:
2298 if (opt[1] >= TCPOLEN_TIMESTAMP) {
2299 src->scrub->pfss_flags |=
2300 PFSS_TIMESTAMP;
2301 src->scrub->pfss_ts_mod =
2302 htonl(random());
2303
2304 /* note PFSS_PAWS not set yet */
2305 memcpy(&tsval, &opt[2],
2306 sizeof (u_int32_t));
2307 memcpy(&tsecr, &opt[6],
2308 sizeof (u_int32_t));
2309 src->scrub->pfss_tsval0 = ntohl(tsval);
2310 src->scrub->pfss_tsval = ntohl(tsval);
2311 src->scrub->pfss_tsecr = ntohl(tsecr);
2312 getmicrouptime(&src->scrub->pfss_last);
2313 }
2314 /* FALLTHROUGH */
2315 default:
2316 hlen -= MAX(opt[1], 2);
2317 opt += MAX(opt[1], 2);
2318 break;
2319 }
2320 }
2321 }
2322
2323 return (0);
2324 }
2325
2326 void
2327 pf_normalize_tcp_cleanup(struct pf_state *state)
2328 {
2329 if (state->src.scrub)
2330 pool_put(&pf_state_scrub_pl, state->src.scrub);
2331 if (state->dst.scrub)
2332 pool_put(&pf_state_scrub_pl, state->dst.scrub);
2333
2334 /* Someday... flush the TCP segment reassembly descriptors. */
2335 }
2336
2337 int
2338 pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
2339 u_short *reason, struct tcphdr *th, struct pf_state *state,
2340 struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback)
2341 {
2342 struct timeval uptime;
2343 u_int32_t tsval, tsecr;
2344 u_int tsval_from_last;
2345 u_int8_t hdr[60];
2346 u_int8_t *opt;
2347 int copyback = 0;
2348 int got_ts = 0;
2349
2350 VERIFY(src->scrub || dst->scrub);
2351
2352 /*
2353 * Enforce the minimum TTL seen for this connection. Negate a common
2354 * technique to evade an intrusion detection system and confuse
2355 * firewall state code.
2356 */
2357 switch (pd->af) {
2358 #if INET
2359 case AF_INET: {
2360 if (src->scrub) {
2361 struct ip *h = mtod(m, struct ip *);
2362 if (h->ip_ttl > src->scrub->pfss_ttl)
2363 src->scrub->pfss_ttl = h->ip_ttl;
2364 h->ip_ttl = src->scrub->pfss_ttl;
2365 }
2366 break;
2367 }
2368 #endif /* INET */
2369 #if INET6
2370 case AF_INET6: {
2371 if (src->scrub) {
2372 struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
2373 if (h->ip6_hlim > src->scrub->pfss_ttl)
2374 src->scrub->pfss_ttl = h->ip6_hlim;
2375 h->ip6_hlim = src->scrub->pfss_ttl;
2376 }
2377 break;
2378 }
2379 #endif /* INET6 */
2380 }
2381
2382 if (th->th_off > (sizeof (struct tcphdr) >> 2) &&
2383 ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
2384 (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
2385 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
2386 /* Diddle with TCP options */
2387 int hlen;
2388 opt = hdr + sizeof (struct tcphdr);
2389 hlen = (th->th_off << 2) - sizeof (struct tcphdr);
2390 while (hlen >= TCPOLEN_TIMESTAMP) {
2391 switch (*opt) {
2392 case TCPOPT_EOL: /* FALLTHROUGH */
2393 case TCPOPT_NOP:
2394 opt++;
2395 hlen--;
2396 break;
2397 case TCPOPT_TIMESTAMP:
2398 /*
2399 * Modulate the timestamps. Can be used for
2400 * NAT detection, OS uptime determination or
2401 * reboot detection.
2402 */
2403
2404 if (got_ts) {
2405 /* Huh? Multiple timestamps!? */
2406 if (pf_status.debug >= PF_DEBUG_MISC) {
2407 DPFPRINTF(("multiple TS??"));
2408 pf_print_state(state);
2409 printf("\n");
2410 }
2411 REASON_SET(reason, PFRES_TS);
2412 return (PF_DROP);
2413 }
2414 if (opt[1] >= TCPOLEN_TIMESTAMP) {
2415 memcpy(&tsval, &opt[2],
2416 sizeof (u_int32_t));
2417 if (tsval && src->scrub &&
2418 (src->scrub->pfss_flags &
2419 PFSS_TIMESTAMP)) {
2420 tsval = ntohl(tsval);
2421 pf_change_a(&opt[2],
2422 &th->th_sum,
2423 htonl(tsval +
2424 src->scrub->pfss_ts_mod),
2425 0);
2426 copyback = 1;
2427 }
2428
2429 /* Modulate TS reply iff valid (!0) */
2430 memcpy(&tsecr, &opt[6],
2431 sizeof (u_int32_t));
2432 if (tsecr && dst->scrub &&
2433 (dst->scrub->pfss_flags &
2434 PFSS_TIMESTAMP)) {
2435 tsecr = ntohl(tsecr)
2436 - dst->scrub->pfss_ts_mod;
2437 pf_change_a(&opt[6],
2438 &th->th_sum, htonl(tsecr),
2439 0);
2440 copyback = 1;
2441 }
2442 got_ts = 1;
2443 }
2444 /* FALLTHROUGH */
2445 default:
2446 hlen -= MAX(opt[1], 2);
2447 opt += MAX(opt[1], 2);
2448 break;
2449 }
2450 }
2451 if (copyback) {
2452 /* Copyback the options, caller copys back header */
2453 int optoff = off + sizeof (*th);
2454 int optlen = (th->th_off << 2) - sizeof (*th);
2455 m = pf_lazy_makewritable(pd, m, optoff + optlen);
2456 if (!m) {
2457 REASON_SET(reason, PFRES_MEMORY);
2458 return PF_DROP;
2459 }
2460 *writeback = optoff + optlen;
2461 m_copyback(m, optoff, optlen, hdr + sizeof (*th));
2462 }
2463 }
2464
2465
2466 /*
2467 * Must invalidate PAWS checks on connections idle for too long.
2468 * The fastest allowed timestamp clock is 1ms. That turns out to
2469 * be about 24 days before it wraps. XXX Right now our lowerbound
2470 * TS echo check only works for the first 12 days of a connection
2471 * when the TS has exhausted half its 32bit space
2472 */
2473 #define TS_MAX_IDLE (24*24*60*60)
2474 #define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */
2475
2476 getmicrouptime(&uptime);
2477 if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
2478 (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
2479 pf_time_second() - state->creation > TS_MAX_CONN)) {
2480 if (pf_status.debug >= PF_DEBUG_MISC) {
2481 DPFPRINTF(("src idled out of PAWS\n"));
2482 pf_print_state(state);
2483 printf("\n");
2484 }
2485 src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
2486 | PFSS_PAWS_IDLED;
2487 }
2488 if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
2489 uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
2490 if (pf_status.debug >= PF_DEBUG_MISC) {
2491 DPFPRINTF(("dst idled out of PAWS\n"));
2492 pf_print_state(state);
2493 printf("\n");
2494 }
2495 dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
2496 | PFSS_PAWS_IDLED;
2497 }
2498
2499 if (got_ts && src->scrub && dst->scrub &&
2500 (src->scrub->pfss_flags & PFSS_PAWS) &&
2501 (dst->scrub->pfss_flags & PFSS_PAWS)) {
2502 /*
2503 * Validate that the timestamps are "in-window".
2504 * RFC1323 describes TCP Timestamp options that allow
2505 * measurement of RTT (round trip time) and PAWS
2506 * (protection against wrapped sequence numbers). PAWS
2507 * gives us a set of rules for rejecting packets on
2508 * long fat pipes (packets that were somehow delayed
2509 * in transit longer than the time it took to send the
2510 * full TCP sequence space of 4Gb). We can use these
2511 * rules and infer a few others that will let us treat
2512 * the 32bit timestamp and the 32bit echoed timestamp
2513 * as sequence numbers to prevent a blind attacker from
2514 * inserting packets into a connection.
2515 *
2516 * RFC1323 tells us:
2517 * - The timestamp on this packet must be greater than
2518 * or equal to the last value echoed by the other
2519 * endpoint. The RFC says those will be discarded
2520 * since it is a dup that has already been acked.
2521 * This gives us a lowerbound on the timestamp.
2522 * timestamp >= other last echoed timestamp
2523 * - The timestamp will be less than or equal to
2524 * the last timestamp plus the time between the
2525 * last packet and now. The RFC defines the max
2526 * clock rate as 1ms. We will allow clocks to be
2527 * up to 10% fast and will allow a total difference
2528 * or 30 seconds due to a route change. And this
2529 * gives us an upperbound on the timestamp.
2530 * timestamp <= last timestamp + max ticks
2531 * We have to be careful here. Windows will send an
2532 * initial timestamp of zero and then initialize it
2533 * to a random value after the 3whs; presumably to
2534 * avoid a DoS by having to call an expensive RNG
2535 * during a SYN flood. Proof MS has at least one
2536 * good security geek.
2537 *
2538 * - The TCP timestamp option must also echo the other
2539 * endpoints timestamp. The timestamp echoed is the
2540 * one carried on the earliest unacknowledged segment
2541 * on the left edge of the sequence window. The RFC
2542 * states that the host will reject any echoed
2543 * timestamps that were larger than any ever sent.
2544 * This gives us an upperbound on the TS echo.
2545 * tescr <= largest_tsval
2546 * - The lowerbound on the TS echo is a little more
2547 * tricky to determine. The other endpoint's echoed
2548 * values will not decrease. But there may be
2549 * network conditions that re-order packets and
2550 * cause our view of them to decrease. For now the
2551 * only lowerbound we can safely determine is that
2552 * the TS echo will never be less than the original
2553 * TS. XXX There is probably a better lowerbound.
2554 * Remove TS_MAX_CONN with better lowerbound check.
2555 * tescr >= other original TS
2556 *
2557 * It is also important to note that the fastest
2558 * timestamp clock of 1ms will wrap its 32bit space in
2559 * 24 days. So we just disable TS checking after 24
2560 * days of idle time. We actually must use a 12d
2561 * connection limit until we can come up with a better
2562 * lowerbound to the TS echo check.
2563 */
2564 struct timeval delta_ts;
2565 int ts_fudge;
2566
2567
2568 /*
2569 * PFTM_TS_DIFF is how many seconds of leeway to allow
2570 * a host's timestamp. This can happen if the previous
2571 * packet got delayed in transit for much longer than
2572 * this packet.
2573 */
2574 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0)
2575 ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF];
2576
2577
2578 /* Calculate max ticks since the last timestamp */
2579 #define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */
2580 #define TS_MICROSECS 1000000 /* microseconds per second */
2581 timersub(&uptime, &src->scrub->pfss_last, &delta_ts);
2582 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
2583 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ);
2584
2585
2586 if ((src->state >= TCPS_ESTABLISHED &&
2587 dst->state >= TCPS_ESTABLISHED) &&
2588 (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
2589 SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
2590 (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
2591 SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
2592 /*
2593 * Bad RFC1323 implementation or an insertion attack.
2594 *
2595 * - Solaris 2.6 and 2.7 are known to send another ACK
2596 * after the FIN,FIN|ACK,ACK closing that carries
2597 * an old timestamp.
2598 */
2599
2600 DPFPRINTF(("Timestamp failed %c%c%c%c\n",
2601 SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
2602 SEQ_GT(tsval, src->scrub->pfss_tsval +
2603 tsval_from_last) ? '1' : ' ',
2604 SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
2605 SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
2606 DPFPRINTF((" tsval: %u tsecr: %u +ticks: %u "
2607 "idle: %lus %ums\n",
2608 tsval, tsecr, tsval_from_last, delta_ts.tv_sec,
2609 delta_ts.tv_usec / 1000));
2610 DPFPRINTF((" src->tsval: %u tsecr: %u\n",
2611 src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
2612 DPFPRINTF((" dst->tsval: %u tsecr: %u tsval0: %u\n",
2613 dst->scrub->pfss_tsval, dst->scrub->pfss_tsecr,
2614 dst->scrub->pfss_tsval0));
2615 if (pf_status.debug >= PF_DEBUG_MISC) {
2616 pf_print_state(state);
2617 pf_print_flags(th->th_flags);
2618 printf("\n");
2619 }
2620 REASON_SET(reason, PFRES_TS);
2621 return (PF_DROP);
2622 }
2623
2624 /* XXX I'd really like to require tsecr but it's optional */
2625
2626 } else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
2627 ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
2628 || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
2629 src->scrub && dst->scrub &&
2630 (src->scrub->pfss_flags & PFSS_PAWS) &&
2631 (dst->scrub->pfss_flags & PFSS_PAWS)) {
2632 /*
2633 * Didn't send a timestamp. Timestamps aren't really useful
2634 * when:
2635 * - connection opening or closing (often not even sent).
2636 * but we must not let an attacker to put a FIN on a
2637 * data packet to sneak it through our ESTABLISHED check.
2638 * - on a TCP reset. RFC suggests not even looking at TS.
2639 * - on an empty ACK. The TS will not be echoed so it will
2640 * probably not help keep the RTT calculation in sync and
2641 * there isn't as much danger when the sequence numbers
2642 * got wrapped. So some stacks don't include TS on empty
2643 * ACKs :-(
2644 *
2645 * To minimize the disruption to mostly RFC1323 conformant
2646 * stacks, we will only require timestamps on data packets.
2647 *
2648 * And what do ya know, we cannot require timestamps on data
2649 * packets. There appear to be devices that do legitimate
2650 * TCP connection hijacking. There are HTTP devices that allow
2651 * a 3whs (with timestamps) and then buffer the HTTP request.
2652 * If the intermediate device has the HTTP response cache, it
2653 * will spoof the response but not bother timestamping its
2654 * packets. So we can look for the presence of a timestamp in
2655 * the first data packet and if there, require it in all future
2656 * packets.
2657 */
2658
2659 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
2660 /*
2661 * Hey! Someone tried to sneak a packet in. Or the
2662 * stack changed its RFC1323 behavior?!?!
2663 */
2664 if (pf_status.debug >= PF_DEBUG_MISC) {
2665 DPFPRINTF(("Did not receive expected RFC1323 "
2666 "timestamp\n"));
2667 pf_print_state(state);
2668 pf_print_flags(th->th_flags);
2669 printf("\n");
2670 }
2671 REASON_SET(reason, PFRES_TS);
2672 return (PF_DROP);
2673 }
2674 }
2675
2676
2677 /*
2678 * We will note if a host sends his data packets with or without
2679 * timestamps. And require all data packets to contain a timestamp
2680 * if the first does. PAWS implicitly requires that all data packets be
2681 * timestamped. But I think there are middle-man devices that hijack
2682 * TCP streams immediately after the 3whs and don't timestamp their
2683 * packets (seen in a WWW accelerator or cache).
2684 */
2685 if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
2686 (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
2687 if (got_ts)
2688 src->scrub->pfss_flags |= PFSS_DATA_TS;
2689 else {
2690 src->scrub->pfss_flags |= PFSS_DATA_NOTS;
2691 if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
2692 (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
2693 /* Don't warn if other host rejected RFC1323 */
2694 DPFPRINTF(("Broken RFC1323 stack did not "
2695 "timestamp data packet. Disabled PAWS "
2696 "security.\n"));
2697 pf_print_state(state);
2698 pf_print_flags(th->th_flags);
2699 printf("\n");
2700 }
2701 }
2702 }
2703
2704
2705 /*
2706 * Update PAWS values
2707 */
2708 if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
2709 (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) {
2710 getmicrouptime(&src->scrub->pfss_last);
2711 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
2712 (src->scrub->pfss_flags & PFSS_PAWS) == 0)
2713 src->scrub->pfss_tsval = tsval;
2714
2715 if (tsecr) {
2716 if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
2717 (src->scrub->pfss_flags & PFSS_PAWS) == 0)
2718 src->scrub->pfss_tsecr = tsecr;
2719
2720 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
2721 (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
2722 src->scrub->pfss_tsval0 == 0)) {
2723 /* tsval0 MUST be the lowest timestamp */
2724 src->scrub->pfss_tsval0 = tsval;
2725 }
2726
2727 /* Only fully initialized after a TS gets echoed */
2728 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0)
2729 src->scrub->pfss_flags |= PFSS_PAWS;
2730 }
2731 }
2732
2733 /* I have a dream.... TCP segment reassembly.... */
2734 return (0);
2735 }
2736
2737 static int
2738 pf_normalize_tcpopt(struct pf_rule *r, int dir, struct pfi_kif *kif,
2739 struct pf_pdesc *pd, struct mbuf *m, struct tcphdr *th, int off,
2740 int *rewrptr)
2741 {
2742 #pragma unused(dir, kif)
2743 sa_family_t af = pd->af;
2744 u_int16_t *mss;
2745 int thoff;
2746 int opt, cnt, optlen = 0;
2747 int rewrite = 0;
2748 u_char opts[MAX_TCPOPTLEN];
2749 u_char *optp = opts;
2750
2751 thoff = th->th_off << 2;
2752 cnt = thoff - sizeof (struct tcphdr);
2753
2754 if (cnt > 0 && !pf_pull_hdr(m, off + sizeof (*th), opts, cnt,
2755 NULL, NULL, af))
2756 return PF_DROP;
2757
2758 for (; cnt > 0; cnt -= optlen, optp += optlen) {
2759 opt = optp[0];
2760 if (opt == TCPOPT_EOL)
2761 break;
2762 if (opt == TCPOPT_NOP)
2763 optlen = 1;
2764 else {
2765 if (cnt < 2)
2766 break;
2767 optlen = optp[1];
2768 if (optlen < 2 || optlen > cnt)
2769 break;
2770 }
2771 switch (opt) {
2772 case TCPOPT_MAXSEG:
2773 mss = (u_int16_t *)(void *)(optp + 2);
2774 if ((ntohs(*mss)) > r->max_mss) {
2775 /*
2776 * <jhw@apple.com>
2777 * Only do the TCP checksum fixup if delayed
2778 * checksum calculation will not be performed.
2779 */
2780 if (m->m_pkthdr.rcvif ||
2781 !(m->m_pkthdr.csum_flags & CSUM_TCP))
2782 th->th_sum = pf_cksum_fixup(th->th_sum,
2783 *mss, htons(r->max_mss), 0);
2784 *mss = htons(r->max_mss);
2785 rewrite = 1;
2786 }
2787 break;
2788 default:
2789 break;
2790 }
2791 }
2792
2793 if (rewrite) {
2794 struct mbuf *mw;
2795 u_short reason;
2796
2797 mw = pf_lazy_makewritable(pd, pd->mp,
2798 off + sizeof (*th) + thoff);
2799 if (!mw) {
2800 REASON_SET(&reason, PFRES_MEMORY);
2801 if (r->log)
2802 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason,
2803 r, 0, 0, pd);
2804 return PF_DROP;
2805 }
2806
2807 *rewrptr = 1;
2808 m_copyback(mw, off + sizeof (*th), thoff - sizeof (*th), opts);
2809 }
2810
2811 return PF_PASS;
2812 }