]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/pf_norm.c
xnu-3789.41.3.tar.gz
[apple/xnu.git] / bsd / net / pf_norm.c
1 /*
2 * Copyright (c) 2007-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /* $apfw: pf_norm.c,v 1.10 2008/08/28 19:10:53 jhw Exp $ */
30 /* $OpenBSD: pf_norm.c,v 1.107 2006/04/16 00:59:52 pascoe Exp $ */
31
32 /*
33 * Copyright 2001 Niels Provos <provos@citi.umich.edu>
34 * All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
46 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
47 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
48 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
49 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
50 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
51 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
52 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
53 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
54 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
55 */
56
57 #include <sys/param.h>
58 #include <sys/systm.h>
59 #include <sys/mbuf.h>
60 #include <sys/filio.h>
61 #include <sys/fcntl.h>
62 #include <sys/socket.h>
63 #include <sys/kernel.h>
64 #include <sys/time.h>
65 #include <sys/random.h>
66 #include <sys/mcache.h>
67
68 #include <net/if.h>
69 #include <net/if_types.h>
70 #include <net/bpf.h>
71 #include <net/route.h>
72 #include <net/if_pflog.h>
73
74 #include <netinet/in.h>
75 #include <netinet/in_var.h>
76 #include <netinet/in_systm.h>
77 #include <netinet/ip.h>
78 #include <netinet/ip_var.h>
79 #include <netinet/tcp.h>
80 #include <netinet/tcp_seq.h>
81 #include <netinet/tcp_fsm.h>
82 #include <netinet/udp.h>
83 #include <netinet/ip_icmp.h>
84
85 #if INET6
86 #include <netinet/ip6.h>
87 #endif /* INET6 */
88
89 #include <net/pfvar.h>
90
91 struct pf_frent {
92 LIST_ENTRY(pf_frent) fr_next;
93 struct mbuf *fr_m;
94 #define fr_ip fr_u.fru_ipv4
95 #define fr_ip6 fr_u.fru_ipv6
96 union {
97 struct ip *fru_ipv4;
98 struct ip6_hdr *fru_ipv6;
99 } fr_u;
100 struct ip6_frag fr_ip6f_opt;
101 int fr_ip6f_hlen;
102 };
103
104 struct pf_frcache {
105 LIST_ENTRY(pf_frcache) fr_next;
106 uint16_t fr_off;
107 uint16_t fr_end;
108 };
109
110 #define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */
111 #define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */
112 #define PFFRAG_DROP 0x0004 /* Drop all fragments */
113 #define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER))
114
115 struct pf_fragment {
116 RB_ENTRY(pf_fragment) fr_entry;
117 TAILQ_ENTRY(pf_fragment) frag_next;
118 struct pf_addr fr_srcx;
119 struct pf_addr fr_dstx;
120 u_int8_t fr_p; /* protocol of this fragment */
121 u_int8_t fr_flags; /* status flags */
122 u_int16_t fr_max; /* fragment data max */
123 #define fr_id fr_uid.fru_id4
124 #define fr_id6 fr_uid.fru_id6
125 union {
126 u_int16_t fru_id4;
127 u_int32_t fru_id6;
128 } fr_uid;
129 int fr_af;
130 u_int32_t fr_timeout;
131 #define fr_queue fr_u.fru_queue
132 #define fr_cache fr_u.fru_cache
133 union {
134 LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */
135 LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */
136 } fr_u;
137 };
138
139 static TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue;
140 static TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue;
141
142 static __inline int pf_frag_compare(struct pf_fragment *,
143 struct pf_fragment *);
144 static RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree;
145 RB_PROTOTYPE_SC(__private_extern__, pf_frag_tree, pf_fragment, fr_entry,
146 pf_frag_compare);
147 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
148
149 /* Private prototypes */
150 static void pf_ip6hdr2key(struct pf_fragment *, struct ip6_hdr *,
151 struct ip6_frag *);
152 static void pf_ip2key(struct pf_fragment *, struct ip *);
153 static void pf_remove_fragment(struct pf_fragment *);
154 static void pf_flush_fragments(void);
155 static void pf_free_fragment(struct pf_fragment *);
156 static struct pf_fragment *pf_find_fragment_by_key(struct pf_fragment *,
157 struct pf_frag_tree *);
158 static __inline struct pf_fragment *
159 pf_find_fragment_by_ipv4_header(struct ip *, struct pf_frag_tree *);
160 static __inline struct pf_fragment *
161 pf_find_fragment_by_ipv6_header(struct ip6_hdr *, struct ip6_frag *,
162 struct pf_frag_tree *);
163 static struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **,
164 struct pf_frent *, int);
165 static struct mbuf *pf_fragcache(struct mbuf **, struct ip *,
166 struct pf_fragment **, int, int, int *);
167 static struct mbuf *pf_reassemble6(struct mbuf **, struct pf_fragment **,
168 struct pf_frent *, int);
169 static struct mbuf *pf_frag6cache(struct mbuf **, struct ip6_hdr*,
170 struct ip6_frag *, struct pf_fragment **, int, int, int, int *);
171 static int pf_normalize_tcpopt(struct pf_rule *, int, struct pfi_kif *,
172 struct pf_pdesc *, struct mbuf *, struct tcphdr *, int, int *);
173
174 #define DPFPRINTF(x) do { \
175 if (pf_status.debug >= PF_DEBUG_MISC) { \
176 printf("%s: ", __func__); \
177 printf x ; \
178 } \
179 } while (0)
180
181 /* Globals */
182 struct pool pf_frent_pl, pf_frag_pl;
183 static struct pool pf_cache_pl, pf_cent_pl;
184 struct pool pf_state_scrub_pl;
185
186 static int pf_nfrents, pf_ncache;
187
188 void
189 pf_normalize_init(void)
190 {
191 pool_init(&pf_frent_pl, sizeof (struct pf_frent), 0, 0, 0, "pffrent",
192 NULL);
193 pool_init(&pf_frag_pl, sizeof (struct pf_fragment), 0, 0, 0, "pffrag",
194 NULL);
195 pool_init(&pf_cache_pl, sizeof (struct pf_fragment), 0, 0, 0,
196 "pffrcache", NULL);
197 pool_init(&pf_cent_pl, sizeof (struct pf_frcache), 0, 0, 0, "pffrcent",
198 NULL);
199 pool_init(&pf_state_scrub_pl, sizeof (struct pf_state_scrub), 0, 0, 0,
200 "pfstscr", NULL);
201
202 pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT);
203 pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0);
204 pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0);
205 pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0);
206
207 TAILQ_INIT(&pf_fragqueue);
208 TAILQ_INIT(&pf_cachequeue);
209 }
210
211 #if 0
212 void
213 pf_normalize_destroy(void)
214 {
215 pool_destroy(&pf_state_scrub_pl);
216 pool_destroy(&pf_cent_pl);
217 pool_destroy(&pf_cache_pl);
218 pool_destroy(&pf_frag_pl);
219 pool_destroy(&pf_frent_pl);
220 }
221 #endif
222
223 int
224 pf_normalize_isempty(void)
225 {
226 return (TAILQ_EMPTY(&pf_fragqueue) && TAILQ_EMPTY(&pf_cachequeue));
227 }
228
229 static __inline int
230 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
231 {
232 int diff;
233
234 if ((diff = a->fr_af - b->fr_af))
235 return (diff);
236 else if ((diff = a->fr_p - b->fr_p))
237 return (diff);
238 else {
239 struct pf_addr *sa = &a->fr_srcx;
240 struct pf_addr *sb = &b->fr_srcx;
241 struct pf_addr *da = &a->fr_dstx;
242 struct pf_addr *db = &b->fr_dstx;
243
244 switch (a->fr_af) {
245 #ifdef INET
246 case AF_INET:
247 if ((diff = a->fr_id - b->fr_id))
248 return (diff);
249 else if (sa->v4.s_addr < sb->v4.s_addr)
250 return (-1);
251 else if (sa->v4.s_addr > sb->v4.s_addr)
252 return (1);
253 else if (da->v4.s_addr < db->v4.s_addr)
254 return (-1);
255 else if (da->v4.s_addr > db->v4.s_addr)
256 return (1);
257 break;
258 #endif
259 #ifdef INET6
260 case AF_INET6:
261 if ((diff = a->fr_id6 - b->fr_id6))
262 return (diff);
263 else if (sa->addr32[3] < sb->addr32[3])
264 return (-1);
265 else if (sa->addr32[3] > sb->addr32[3])
266 return (1);
267 else if (sa->addr32[2] < sb->addr32[2])
268 return (-1);
269 else if (sa->addr32[2] > sb->addr32[2])
270 return (1);
271 else if (sa->addr32[1] < sb->addr32[1])
272 return (-1);
273 else if (sa->addr32[1] > sb->addr32[1])
274 return (1);
275 else if (sa->addr32[0] < sb->addr32[0])
276 return (-1);
277 else if (sa->addr32[0] > sb->addr32[0])
278 return (1);
279 else if (da->addr32[3] < db->addr32[3])
280 return (-1);
281 else if (da->addr32[3] > db->addr32[3])
282 return (1);
283 else if (da->addr32[2] < db->addr32[2])
284 return (-1);
285 else if (da->addr32[2] > db->addr32[2])
286 return (1);
287 else if (da->addr32[1] < db->addr32[1])
288 return (-1);
289 else if (da->addr32[1] > db->addr32[1])
290 return (1);
291 else if (da->addr32[0] < db->addr32[0])
292 return (-1);
293 else if (da->addr32[0] > db->addr32[0])
294 return (1);
295 break;
296 #endif
297 default:
298 VERIFY(!0 && "only IPv4 and IPv6 supported!");
299 break;
300 }
301 }
302 return (0);
303 }
304
305 void
306 pf_purge_expired_fragments(void)
307 {
308 struct pf_fragment *frag;
309 u_int32_t expire = pf_time_second() -
310 pf_default_rule.timeout[PFTM_FRAG];
311
312 while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
313 VERIFY(BUFFER_FRAGMENTS(frag));
314 if (frag->fr_timeout > expire)
315 break;
316
317 switch (frag->fr_af) {
318 case AF_INET:
319 DPFPRINTF(("expiring IPv4 %d(0x%llx) from queue.\n",
320 ntohs(frag->fr_id),
321 (uint64_t)VM_KERNEL_ADDRPERM(frag)));
322 break;
323 case AF_INET6:
324 DPFPRINTF(("expiring IPv6 %d(0x%llx) from queue.\n",
325 ntohl(frag->fr_id6),
326 (uint64_t)VM_KERNEL_ADDRPERM(frag)));
327 break;
328 default:
329 VERIFY(0 && "only IPv4 and IPv6 supported");
330 break;
331 }
332 pf_free_fragment(frag);
333 }
334
335 while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) {
336 VERIFY(!BUFFER_FRAGMENTS(frag));
337 if (frag->fr_timeout > expire)
338 break;
339
340 switch (frag->fr_af) {
341 case AF_INET:
342 DPFPRINTF(("expiring IPv4 %d(0x%llx) from cache.\n",
343 ntohs(frag->fr_id),
344 (uint64_t)VM_KERNEL_ADDRPERM(frag)));
345 break;
346 case AF_INET6:
347 DPFPRINTF(("expiring IPv6 %d(0x%llx) from cache.\n",
348 ntohl(frag->fr_id6),
349 (uint64_t)VM_KERNEL_ADDRPERM(frag)));
350 break;
351 default:
352 VERIFY(0 && "only IPv4 and IPv6 supported");
353 break;
354 }
355 pf_free_fragment(frag);
356 VERIFY(TAILQ_EMPTY(&pf_cachequeue) ||
357 TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag);
358 }
359 }
360
361 /*
362 * Try to flush old fragments to make space for new ones
363 */
364
365 static void
366 pf_flush_fragments(void)
367 {
368 struct pf_fragment *frag;
369 int goal;
370
371 goal = pf_nfrents * 9 / 10;
372 DPFPRINTF(("trying to free > %d frents\n",
373 pf_nfrents - goal));
374 while (goal < pf_nfrents) {
375 frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue);
376 if (frag == NULL)
377 break;
378 pf_free_fragment(frag);
379 }
380
381
382 goal = pf_ncache * 9 / 10;
383 DPFPRINTF(("trying to free > %d cache entries\n",
384 pf_ncache - goal));
385 while (goal < pf_ncache) {
386 frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue);
387 if (frag == NULL)
388 break;
389 pf_free_fragment(frag);
390 }
391 }
392
393 /* Frees the fragments and all associated entries */
394
395 static void
396 pf_free_fragment(struct pf_fragment *frag)
397 {
398 struct pf_frent *frent;
399 struct pf_frcache *frcache;
400
401 /* Free all fragments */
402 if (BUFFER_FRAGMENTS(frag)) {
403 for (frent = LIST_FIRST(&frag->fr_queue); frent;
404 frent = LIST_FIRST(&frag->fr_queue)) {
405 LIST_REMOVE(frent, fr_next);
406
407 m_freem(frent->fr_m);
408 pool_put(&pf_frent_pl, frent);
409 pf_nfrents--;
410 }
411 } else {
412 for (frcache = LIST_FIRST(&frag->fr_cache); frcache;
413 frcache = LIST_FIRST(&frag->fr_cache)) {
414 LIST_REMOVE(frcache, fr_next);
415
416 VERIFY(LIST_EMPTY(&frag->fr_cache) ||
417 LIST_FIRST(&frag->fr_cache)->fr_off >
418 frcache->fr_end);
419
420 pool_put(&pf_cent_pl, frcache);
421 pf_ncache--;
422 }
423 }
424
425 pf_remove_fragment(frag);
426 }
427
428 static void
429 pf_ip6hdr2key(struct pf_fragment *key, struct ip6_hdr *ip6,
430 struct ip6_frag *fh)
431 {
432 key->fr_p = fh->ip6f_nxt;
433 key->fr_id6 = fh->ip6f_ident;
434 key->fr_af = AF_INET6;
435 key->fr_srcx.v6 = ip6->ip6_src;
436 key->fr_dstx.v6 = ip6->ip6_dst;
437 }
438
439 static void
440 pf_ip2key(struct pf_fragment *key, struct ip *ip)
441 {
442 key->fr_p = ip->ip_p;
443 key->fr_id = ip->ip_id;
444 key->fr_af = AF_INET;
445 key->fr_srcx.v4.s_addr = ip->ip_src.s_addr;
446 key->fr_dstx.v4.s_addr = ip->ip_dst.s_addr;
447 }
448
449 static struct pf_fragment *
450 pf_find_fragment_by_key(struct pf_fragment *key, struct pf_frag_tree *tree)
451 {
452 struct pf_fragment *frag;
453
454 frag = RB_FIND(pf_frag_tree, tree, key);
455 if (frag != NULL) {
456 /* XXX Are we sure we want to update the timeout? */
457 frag->fr_timeout = pf_time_second();
458 if (BUFFER_FRAGMENTS(frag)) {
459 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
460 TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
461 } else {
462 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
463 TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next);
464 }
465 }
466
467 return (frag);
468 }
469
470 static __inline struct pf_fragment *
471 pf_find_fragment_by_ipv4_header(struct ip *ip, struct pf_frag_tree *tree)
472 {
473 struct pf_fragment key;
474 pf_ip2key(&key, ip);
475 return pf_find_fragment_by_key(&key, tree);
476 }
477
478 static __inline struct pf_fragment *
479 pf_find_fragment_by_ipv6_header(struct ip6_hdr *ip6, struct ip6_frag *fh,
480 struct pf_frag_tree *tree)
481 {
482 struct pf_fragment key;
483 pf_ip6hdr2key(&key, ip6, fh);
484 return pf_find_fragment_by_key(&key, tree);
485 }
486
487 /* Removes a fragment from the fragment queue and frees the fragment */
488
489 static void
490 pf_remove_fragment(struct pf_fragment *frag)
491 {
492 if (BUFFER_FRAGMENTS(frag)) {
493 RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag);
494 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
495 pool_put(&pf_frag_pl, frag);
496 } else {
497 RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag);
498 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
499 pool_put(&pf_cache_pl, frag);
500 }
501 }
502
503 #define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
504 static struct mbuf *
505 pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
506 struct pf_frent *frent, int mff)
507 {
508 struct mbuf *m = *m0, *m2;
509 struct pf_frent *frea, *next;
510 struct pf_frent *frep = NULL;
511 struct ip *ip = frent->fr_ip;
512 int hlen = ip->ip_hl << 2;
513 u_int16_t off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
514 u_int16_t ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4;
515 u_int16_t fr_max = ip_len + off;
516
517 VERIFY(*frag == NULL || BUFFER_FRAGMENTS(*frag));
518
519 /* Strip off ip header */
520 m->m_data += hlen;
521 m->m_len -= hlen;
522
523 /* Create a new reassembly queue for this packet */
524 if (*frag == NULL) {
525 *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
526 if (*frag == NULL) {
527 pf_flush_fragments();
528 *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
529 if (*frag == NULL)
530 goto drop_fragment;
531 }
532
533 (*frag)->fr_flags = 0;
534 (*frag)->fr_max = 0;
535 (*frag)->fr_af = AF_INET;
536 (*frag)->fr_srcx.v4 = frent->fr_ip->ip_src;
537 (*frag)->fr_dstx.v4 = frent->fr_ip->ip_dst;
538 (*frag)->fr_p = frent->fr_ip->ip_p;
539 (*frag)->fr_id = frent->fr_ip->ip_id;
540 (*frag)->fr_timeout = pf_time_second();
541 LIST_INIT(&(*frag)->fr_queue);
542
543 RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
544 TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
545
546 /* We do not have a previous fragment */
547 frep = NULL;
548 goto insert;
549 }
550
551 /*
552 * Find a fragment after the current one:
553 * - off contains the real shifted offset.
554 */
555 LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
556 if (FR_IP_OFF(frea) > off)
557 break;
558 frep = frea;
559 }
560
561 VERIFY(frep != NULL || frea != NULL);
562
563 if (frep != NULL &&
564 FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl *
565 4 > off) {
566 u_int16_t precut;
567
568 precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
569 frep->fr_ip->ip_hl * 4 - off;
570 if (precut >= ip_len)
571 goto drop_fragment;
572 m_adj(frent->fr_m, precut);
573 DPFPRINTF(("overlap -%d\n", precut));
574 /* Enforce 8 byte boundaries */
575 ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3));
576 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
577 ip_len -= precut;
578 ip->ip_len = htons(ip_len);
579 }
580
581 for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
582 frea = next) {
583 u_int16_t aftercut;
584
585 aftercut = ip_len + off - FR_IP_OFF(frea);
586 DPFPRINTF(("adjust overlap %d\n", aftercut));
587 if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl
588 * 4) {
589 frea->fr_ip->ip_len =
590 htons(ntohs(frea->fr_ip->ip_len) - aftercut);
591 frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) +
592 (aftercut >> 3));
593 m_adj(frea->fr_m, aftercut);
594 break;
595 }
596
597 /* This fragment is completely overlapped, lose it */
598 next = LIST_NEXT(frea, fr_next);
599 m_freem(frea->fr_m);
600 LIST_REMOVE(frea, fr_next);
601 pool_put(&pf_frent_pl, frea);
602 pf_nfrents--;
603 }
604
605 insert:
606 /* Update maximum data size */
607 if ((*frag)->fr_max < fr_max)
608 (*frag)->fr_max = fr_max;
609 /* This is the last segment */
610 if (!mff)
611 (*frag)->fr_flags |= PFFRAG_SEENLAST;
612
613 if (frep == NULL)
614 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
615 else
616 LIST_INSERT_AFTER(frep, frent, fr_next);
617
618 /* Check if we are completely reassembled */
619 if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
620 return (NULL);
621
622 /* Check if we have all the data */
623 off = 0;
624 for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
625 next = LIST_NEXT(frep, fr_next);
626
627 off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4;
628 if (off < (*frag)->fr_max &&
629 (next == NULL || FR_IP_OFF(next) != off)) {
630 DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
631 off, next == NULL ? -1 : FR_IP_OFF(next),
632 (*frag)->fr_max));
633 return (NULL);
634 }
635 }
636 DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
637 if (off < (*frag)->fr_max)
638 return (NULL);
639
640 /* We have all the data */
641 frent = LIST_FIRST(&(*frag)->fr_queue);
642 VERIFY(frent != NULL);
643 if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) {
644 DPFPRINTF(("drop: too big: %d\n", off));
645 pf_free_fragment(*frag);
646 *frag = NULL;
647 return (NULL);
648 }
649 next = LIST_NEXT(frent, fr_next);
650
651 /* Magic from ip_input */
652 ip = frent->fr_ip;
653 m = frent->fr_m;
654 m2 = m->m_next;
655 m->m_next = NULL;
656 m_cat(m, m2);
657 pool_put(&pf_frent_pl, frent);
658 pf_nfrents--;
659 for (frent = next; frent != NULL; frent = next) {
660 next = LIST_NEXT(frent, fr_next);
661
662 m2 = frent->fr_m;
663 pool_put(&pf_frent_pl, frent);
664 pf_nfrents--;
665 m_cat(m, m2);
666 }
667
668 ip->ip_src = (*frag)->fr_srcx.v4;
669 ip->ip_dst = (*frag)->fr_dstx.v4;
670
671 /* Remove from fragment queue */
672 pf_remove_fragment(*frag);
673 *frag = NULL;
674
675 hlen = ip->ip_hl << 2;
676 ip->ip_len = htons(off + hlen);
677 m->m_len += hlen;
678 m->m_data -= hlen;
679
680 /* some debugging cruft by sklower, below, will go away soon */
681 /* XXX this should be done elsewhere */
682 if (m->m_flags & M_PKTHDR) {
683 int plen = 0;
684 for (m2 = m; m2; m2 = m2->m_next)
685 plen += m2->m_len;
686 m->m_pkthdr.len = plen;
687 }
688
689 DPFPRINTF(("complete: 0x%llx(%d)\n",
690 (uint64_t)VM_KERNEL_ADDRPERM(m), ntohs(ip->ip_len)));
691 return (m);
692
693 drop_fragment:
694 /* Oops - fail safe - drop packet */
695 pool_put(&pf_frent_pl, frent);
696 pf_nfrents--;
697 m_freem(m);
698 return (NULL);
699 }
700
701 static struct mbuf *
702 pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
703 int drop, int *nomem)
704 {
705 struct mbuf *m = *m0;
706 struct pf_frcache *frp, *fra, *cur = NULL;
707 int ip_len = ntohs(h->ip_len) - (h->ip_hl << 2);
708 u_int16_t off = ntohs(h->ip_off) << 3;
709 u_int16_t fr_max = ip_len + off;
710 int hosed = 0;
711
712 VERIFY(*frag == NULL || !BUFFER_FRAGMENTS(*frag));
713
714 /* Create a new range queue for this packet */
715 if (*frag == NULL) {
716 *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
717 if (*frag == NULL) {
718 pf_flush_fragments();
719 *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
720 if (*frag == NULL)
721 goto no_mem;
722 }
723
724 /* Get an entry for the queue */
725 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
726 if (cur == NULL) {
727 pool_put(&pf_cache_pl, *frag);
728 *frag = NULL;
729 goto no_mem;
730 }
731 pf_ncache++;
732
733 (*frag)->fr_flags = PFFRAG_NOBUFFER;
734 (*frag)->fr_max = 0;
735 (*frag)->fr_af = AF_INET;
736 (*frag)->fr_srcx.v4 = h->ip_src;
737 (*frag)->fr_dstx.v4 = h->ip_dst;
738 (*frag)->fr_p = h->ip_p;
739 (*frag)->fr_id = h->ip_id;
740 (*frag)->fr_timeout = pf_time_second();
741
742 cur->fr_off = off;
743 cur->fr_end = fr_max;
744 LIST_INIT(&(*frag)->fr_cache);
745 LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
746
747 RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
748 TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
749
750 DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off,
751 fr_max));
752
753 goto pass;
754 }
755
756 /*
757 * Find a fragment after the current one:
758 * - off contains the real shifted offset.
759 */
760 frp = NULL;
761 LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
762 if (fra->fr_off > off)
763 break;
764 frp = fra;
765 }
766
767 VERIFY(frp != NULL || fra != NULL);
768
769 if (frp != NULL) {
770 int precut;
771
772 precut = frp->fr_end - off;
773 if (precut >= ip_len) {
774 /* Fragment is entirely a duplicate */
775 DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
776 h->ip_id, frp->fr_off, frp->fr_end, off, fr_max));
777 goto drop_fragment;
778 }
779 if (precut == 0) {
780 /* They are adjacent. Fixup cache entry */
781 DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
782 h->ip_id, frp->fr_off, frp->fr_end, off, fr_max));
783 frp->fr_end = fr_max;
784 } else if (precut > 0) {
785 /*
786 * The first part of this payload overlaps with a
787 * fragment that has already been passed.
788 * Need to trim off the first part of the payload.
789 * But to do so easily, we need to create another
790 * mbuf to throw the original header into.
791 */
792
793 DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
794 h->ip_id, precut, frp->fr_off, frp->fr_end, off,
795 fr_max));
796
797 off += precut;
798 fr_max -= precut;
799 /* Update the previous frag to encompass this one */
800 frp->fr_end = fr_max;
801
802 if (!drop) {
803 /*
804 * XXX Optimization opportunity
805 * This is a very heavy way to trim the payload.
806 * we could do it much faster by diddling mbuf
807 * internals but that would be even less legible
808 * than this mbuf magic. For my next trick,
809 * I'll pull a rabbit out of my laptop.
810 */
811 *m0 = m_copym(m, 0, h->ip_hl << 2, M_NOWAIT);
812 if (*m0 == NULL)
813 goto no_mem;
814 VERIFY((*m0)->m_next == NULL);
815 m_adj(m, precut + (h->ip_hl << 2));
816 m_cat(*m0, m);
817 m = *m0;
818 if (m->m_flags & M_PKTHDR) {
819 int plen = 0;
820 struct mbuf *t;
821 for (t = m; t; t = t->m_next)
822 plen += t->m_len;
823 m->m_pkthdr.len = plen;
824 }
825
826
827 h = mtod(m, struct ip *);
828
829
830 VERIFY((int)m->m_len ==
831 ntohs(h->ip_len) - precut);
832 h->ip_off = htons(ntohs(h->ip_off) +
833 (precut >> 3));
834 h->ip_len = htons(ntohs(h->ip_len) - precut);
835 } else {
836 hosed++;
837 }
838 } else {
839 /* There is a gap between fragments */
840
841 DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
842 h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
843 fr_max));
844
845 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
846 if (cur == NULL)
847 goto no_mem;
848 pf_ncache++;
849
850 cur->fr_off = off;
851 cur->fr_end = fr_max;
852 LIST_INSERT_AFTER(frp, cur, fr_next);
853 }
854 }
855
856 if (fra != NULL) {
857 int aftercut;
858 int merge = 0;
859
860 aftercut = fr_max - fra->fr_off;
861 if (aftercut == 0) {
862 /* Adjacent fragments */
863 DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
864 h->ip_id, off, fr_max, fra->fr_off, fra->fr_end));
865 fra->fr_off = off;
866 merge = 1;
867 } else if (aftercut > 0) {
868 /* Need to chop off the tail of this fragment */
869 DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
870 h->ip_id, aftercut, off, fr_max, fra->fr_off,
871 fra->fr_end));
872 fra->fr_off = off;
873 fr_max -= aftercut;
874
875 merge = 1;
876
877 if (!drop) {
878 m_adj(m, -aftercut);
879 if (m->m_flags & M_PKTHDR) {
880 int plen = 0;
881 struct mbuf *t;
882 for (t = m; t; t = t->m_next)
883 plen += t->m_len;
884 m->m_pkthdr.len = plen;
885 }
886 h = mtod(m, struct ip *);
887 VERIFY((int)m->m_len ==
888 ntohs(h->ip_len) - aftercut);
889 h->ip_len = htons(ntohs(h->ip_len) - aftercut);
890 } else {
891 hosed++;
892 }
893 } else if (frp == NULL) {
894 /* There is a gap between fragments */
895 DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
896 h->ip_id, -aftercut, off, fr_max, fra->fr_off,
897 fra->fr_end));
898
899 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
900 if (cur == NULL)
901 goto no_mem;
902 pf_ncache++;
903
904 cur->fr_off = off;
905 cur->fr_end = fr_max;
906 LIST_INSERT_BEFORE(fra, cur, fr_next);
907 }
908
909
910 /* Need to glue together two separate fragment descriptors */
911 if (merge) {
912 if (cur && fra->fr_off <= cur->fr_end) {
913 /* Need to merge in a previous 'cur' */
914 DPFPRINTF(("fragcache[%d]: adjacent(merge "
915 "%d-%d) %d-%d (%d-%d)\n",
916 h->ip_id, cur->fr_off, cur->fr_end, off,
917 fr_max, fra->fr_off, fra->fr_end));
918 fra->fr_off = cur->fr_off;
919 LIST_REMOVE(cur, fr_next);
920 pool_put(&pf_cent_pl, cur);
921 pf_ncache--;
922 cur = NULL;
923
924 } else if (frp && fra->fr_off <= frp->fr_end) {
925 /* Need to merge in a modified 'frp' */
926 VERIFY(cur == NULL);
927 DPFPRINTF(("fragcache[%d]: adjacent(merge "
928 "%d-%d) %d-%d (%d-%d)\n",
929 h->ip_id, frp->fr_off, frp->fr_end, off,
930 fr_max, fra->fr_off, fra->fr_end));
931 fra->fr_off = frp->fr_off;
932 LIST_REMOVE(frp, fr_next);
933 pool_put(&pf_cent_pl, frp);
934 pf_ncache--;
935 frp = NULL;
936
937 }
938 }
939 }
940
941 if (hosed) {
942 /*
943 * We must keep tracking the overall fragment even when
944 * we're going to drop it anyway so that we know when to
945 * free the overall descriptor. Thus we drop the frag late.
946 */
947 goto drop_fragment;
948 }
949
950
951 pass:
952 /* Update maximum data size */
953 if ((*frag)->fr_max < fr_max)
954 (*frag)->fr_max = fr_max;
955
956 /* This is the last segment */
957 if (!mff)
958 (*frag)->fr_flags |= PFFRAG_SEENLAST;
959
960 /* Check if we are completely reassembled */
961 if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
962 LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
963 LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) {
964 /* Remove from fragment queue */
965 DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
966 (*frag)->fr_max));
967 pf_free_fragment(*frag);
968 *frag = NULL;
969 }
970
971 return (m);
972
973 no_mem:
974 *nomem = 1;
975
976 /* Still need to pay attention to !IP_MF */
977 if (!mff && *frag != NULL)
978 (*frag)->fr_flags |= PFFRAG_SEENLAST;
979
980 m_freem(m);
981 return (NULL);
982
983 drop_fragment:
984
985 /* Still need to pay attention to !IP_MF */
986 if (!mff && *frag != NULL)
987 (*frag)->fr_flags |= PFFRAG_SEENLAST;
988
989 if (drop) {
990 /* This fragment has been deemed bad. Don't reass */
991 if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
992 DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
993 h->ip_id));
994 (*frag)->fr_flags |= PFFRAG_DROP;
995 }
996
997 m_freem(m);
998 return (NULL);
999 }
1000
1001 #define FR_IP6_OFF(fr) \
1002 (ntohs((fr)->fr_ip6f_opt.ip6f_offlg & IP6F_OFF_MASK))
1003 #define FR_IP6_PLEN(fr) (ntohs((fr)->fr_ip6->ip6_plen))
1004 struct mbuf *
1005 pf_reassemble6(struct mbuf **m0, struct pf_fragment **frag,
1006 struct pf_frent *frent, int mff)
1007 {
1008 struct mbuf *m, *m2;
1009 struct pf_frent *frea, *frep, *next;
1010 struct ip6_hdr *ip6;
1011 int plen, off, fr_max;
1012
1013 VERIFY(*frag == NULL || BUFFER_FRAGMENTS(*frag));
1014 m = *m0;
1015 frep = NULL;
1016 ip6 = frent->fr_ip6;
1017 off = FR_IP6_OFF(frent);
1018 plen = FR_IP6_PLEN(frent);
1019 fr_max = off + plen - (frent->fr_ip6f_hlen - sizeof *ip6);
1020
1021 DPFPRINTF(("0x%llx IPv6 frag plen %u off %u fr_ip6f_hlen %u "
1022 "fr_max %u m_len %u\n", (uint64_t)VM_KERNEL_ADDRPERM(m), plen, off,
1023 frent->fr_ip6f_hlen, fr_max, m->m_len));
1024
1025 /* strip off headers up to the fragment payload */
1026 m->m_data += frent->fr_ip6f_hlen;
1027 m->m_len -= frent->fr_ip6f_hlen;
1028
1029 /* Create a new reassembly queue for this packet */
1030 if (*frag == NULL) {
1031 *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
1032 if (*frag == NULL) {
1033 pf_flush_fragments();
1034 *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
1035 if (*frag == NULL)
1036 goto drop_fragment;
1037 }
1038
1039 (*frag)->fr_flags = 0;
1040 (*frag)->fr_max = 0;
1041 (*frag)->fr_af = AF_INET6;
1042 (*frag)->fr_srcx.v6 = frent->fr_ip6->ip6_src;
1043 (*frag)->fr_dstx.v6 = frent->fr_ip6->ip6_dst;
1044 (*frag)->fr_p = frent->fr_ip6f_opt.ip6f_nxt;
1045 (*frag)->fr_id6 = frent->fr_ip6f_opt.ip6f_ident;
1046 (*frag)->fr_timeout = pf_time_second();
1047 LIST_INIT(&(*frag)->fr_queue);
1048
1049 RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
1050 TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
1051
1052 /* We do not have a previous fragment */
1053 frep = NULL;
1054 goto insert;
1055 }
1056
1057 /*
1058 * Find a fragment after the current one:
1059 * - off contains the real shifted offset.
1060 */
1061 LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
1062 if (FR_IP6_OFF(frea) > off)
1063 break;
1064 frep = frea;
1065 }
1066
1067 VERIFY(frep != NULL || frea != NULL);
1068
1069 if (frep != NULL &&
1070 FR_IP6_OFF(frep) + FR_IP6_PLEN(frep) - frep->fr_ip6f_hlen > off)
1071 {
1072 u_int16_t precut;
1073
1074 precut = FR_IP6_OFF(frep) + FR_IP6_PLEN(frep) -
1075 frep->fr_ip6f_hlen - off;
1076 if (precut >= plen)
1077 goto drop_fragment;
1078 m_adj(frent->fr_m, precut);
1079 DPFPRINTF(("overlap -%d\n", precut));
1080 /* Enforce 8 byte boundaries */
1081 frent->fr_ip6f_opt.ip6f_offlg =
1082 htons(ntohs(frent->fr_ip6f_opt.ip6f_offlg) +
1083 (precut >> 3));
1084 off = FR_IP6_OFF(frent);
1085 plen -= precut;
1086 ip6->ip6_plen = htons(plen);
1087 }
1088
1089 for (; frea != NULL && plen + off > FR_IP6_OFF(frea); frea = next) {
1090 u_int16_t aftercut;
1091
1092 aftercut = plen + off - FR_IP6_OFF(frea);
1093 DPFPRINTF(("adjust overlap %d\n", aftercut));
1094 if (aftercut < FR_IP6_PLEN(frea) - frea->fr_ip6f_hlen) {
1095 frea->fr_ip6->ip6_plen = htons(FR_IP6_PLEN(frea) -
1096 aftercut);
1097 frea->fr_ip6f_opt.ip6f_offlg =
1098 htons(ntohs(frea->fr_ip6f_opt.ip6f_offlg) +
1099 (aftercut >> 3));
1100 m_adj(frea->fr_m, aftercut);
1101 break;
1102 }
1103
1104 /* This fragment is completely overlapped, lose it */
1105 next = LIST_NEXT(frea, fr_next);
1106 m_freem(frea->fr_m);
1107 LIST_REMOVE(frea, fr_next);
1108 pool_put(&pf_frent_pl, frea);
1109 pf_nfrents--;
1110 }
1111
1112 insert:
1113 /* Update maximum data size */
1114 if ((*frag)->fr_max < fr_max)
1115 (*frag)->fr_max = fr_max;
1116 /* This is the last segment */
1117 if (!mff)
1118 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1119
1120 if (frep == NULL)
1121 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
1122 else
1123 LIST_INSERT_AFTER(frep, frent, fr_next);
1124
1125 /* Check if we are completely reassembled */
1126 if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
1127 return (NULL);
1128
1129 /* Check if we have all the data */
1130 off = 0;
1131 for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
1132 next = LIST_NEXT(frep, fr_next);
1133 off += FR_IP6_PLEN(frep) - (frent->fr_ip6f_hlen - sizeof *ip6);
1134 DPFPRINTF(("frep at %d, next %d, max %d\n",
1135 off, next == NULL ? -1 : FR_IP6_OFF(next),
1136 (*frag)->fr_max));
1137 if (off < (*frag)->fr_max &&
1138 (next == NULL || FR_IP6_OFF(next) != off)) {
1139 DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
1140 off, next == NULL ? -1 : FR_IP6_OFF(next),
1141 (*frag)->fr_max));
1142 return (NULL);
1143 }
1144 }
1145 DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
1146 if (off < (*frag)->fr_max)
1147 return (NULL);
1148
1149 /* We have all the data */
1150 frent = LIST_FIRST(&(*frag)->fr_queue);
1151 VERIFY(frent != NULL);
1152 if (frent->fr_ip6f_hlen + off > IP_MAXPACKET) {
1153 DPFPRINTF(("drop: too big: %d\n", off));
1154 pf_free_fragment(*frag);
1155 *frag = NULL;
1156 return (NULL);
1157 }
1158
1159 ip6 = frent->fr_ip6;
1160 ip6->ip6_nxt = (*frag)->fr_p;
1161 ip6->ip6_plen = htons(off);
1162 ip6->ip6_src = (*frag)->fr_srcx.v6;
1163 ip6->ip6_dst = (*frag)->fr_dstx.v6;
1164
1165 /* Remove from fragment queue */
1166 pf_remove_fragment(*frag);
1167 *frag = NULL;
1168
1169 m = frent->fr_m;
1170 m->m_len += sizeof(struct ip6_hdr);
1171 m->m_data -= sizeof(struct ip6_hdr);
1172 memmove(m->m_data, ip6, sizeof(struct ip6_hdr));
1173
1174 next = LIST_NEXT(frent, fr_next);
1175 pool_put(&pf_frent_pl, frent);
1176 pf_nfrents--;
1177 for (frent = next; next != NULL; frent = next) {
1178 m2 = frent->fr_m;
1179
1180 m_cat(m, m2);
1181 next = LIST_NEXT(frent, fr_next);
1182 pool_put(&pf_frent_pl, frent);
1183 pf_nfrents--;
1184 }
1185
1186 /* XXX this should be done elsewhere */
1187 if (m->m_flags & M_PKTHDR) {
1188 int pktlen = 0;
1189 for (m2 = m; m2; m2 = m2->m_next)
1190 pktlen += m2->m_len;
1191 m->m_pkthdr.len = pktlen;
1192 }
1193
1194 DPFPRINTF(("complete: 0x%llx ip6_plen %d m_pkthdr.len %d\n",
1195 (uint64_t)VM_KERNEL_ADDRPERM(m), ntohs(ip6->ip6_plen),
1196 m->m_pkthdr.len));
1197
1198 return m;
1199
1200 drop_fragment:
1201 /* Oops - fail safe - drop packet */
1202 pool_put(&pf_frent_pl, frent);
1203 --pf_nfrents;
1204 m_freem(m);
1205 return NULL;
1206 }
1207
1208 static struct mbuf *
1209 pf_frag6cache(struct mbuf **m0, struct ip6_hdr *h, struct ip6_frag *fh,
1210 struct pf_fragment **frag, int hlen, int mff, int drop, int *nomem)
1211 {
1212 struct mbuf *m = *m0;
1213 u_int16_t plen, off, fr_max;
1214 struct pf_frcache *frp, *fra, *cur = NULL;
1215 int hosed = 0;
1216
1217 VERIFY(*frag == NULL || !BUFFER_FRAGMENTS(*frag));
1218 m = *m0;
1219 off = ntohs(fh->ip6f_offlg & IP6F_OFF_MASK);
1220 plen = ntohs(h->ip6_plen) - (hlen - sizeof *h);
1221
1222 /*
1223 * Apple Modification: dimambro@apple.com. The hlen, being passed
1224 * into this function Includes all the headers associated with
1225 * the packet, and may include routing headers, so to get to
1226 * the data payload as stored in the original IPv6 header we need
1227 * to subtract al those headers and the IP header.
1228 *
1229 * The 'max' local variable should also contain the offset from the start
1230 * of the reassembled packet to the octet just past the end of the octets
1231 * in the current fragment where:
1232 * - 'off' is the offset from the start of the reassembled packet to the
1233 * first octet in the fragment,
1234 * - 'plen' is the length of the "payload data length" Excluding all the
1235 * IPv6 headers of the fragment.
1236 * - 'hlen' is computed in pf_normalize_ip6() as the offset from the start
1237 * of the IPv6 packet to the beginning of the data.
1238 */
1239 fr_max = off + plen;
1240
1241 DPFPRINTF(("0x%llx plen %u off %u fr_max %u\n",
1242 (uint64_t)VM_KERNEL_ADDRPERM(m), plen, off, fr_max));
1243
1244 /* Create a new range queue for this packet */
1245 if (*frag == NULL) {
1246 *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
1247 if (*frag == NULL) {
1248 pf_flush_fragments();
1249 *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
1250 if (*frag == NULL)
1251 goto no_mem;
1252 }
1253
1254 /* Get an entry for the queue */
1255 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1256 if (cur == NULL) {
1257 pool_put(&pf_cache_pl, *frag);
1258 *frag = NULL;
1259 goto no_mem;
1260 }
1261 pf_ncache++;
1262
1263 (*frag)->fr_flags = PFFRAG_NOBUFFER;
1264 (*frag)->fr_max = 0;
1265 (*frag)->fr_af = AF_INET6;
1266 (*frag)->fr_srcx.v6 = h->ip6_src;
1267 (*frag)->fr_dstx.v6 = h->ip6_dst;
1268 (*frag)->fr_p = fh->ip6f_nxt;
1269 (*frag)->fr_id6 = fh->ip6f_ident;
1270 (*frag)->fr_timeout = pf_time_second();
1271
1272 cur->fr_off = off;
1273 cur->fr_end = fr_max;
1274 LIST_INIT(&(*frag)->fr_cache);
1275 LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
1276
1277 RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
1278 TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
1279
1280 DPFPRINTF(("frag6cache[%d]: new %d-%d\n", ntohl(fh->ip6f_ident),
1281 off, fr_max));
1282
1283 goto pass;
1284 }
1285
1286 /*
1287 * Find a fragment after the current one:
1288 * - off contains the real shifted offset.
1289 */
1290 frp = NULL;
1291 LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
1292 if (fra->fr_off > off)
1293 break;
1294 frp = fra;
1295 }
1296
1297 VERIFY(frp != NULL || fra != NULL);
1298
1299 if (frp != NULL) {
1300 int precut;
1301
1302 precut = frp->fr_end - off;
1303 if (precut >= plen) {
1304 /* Fragment is entirely a duplicate */
1305 DPFPRINTF(("frag6cache[%u]: dead (%d-%d) %d-%d\n",
1306 ntohl(fh->ip6f_ident), frp->fr_off, frp->fr_end,
1307 off, fr_max));
1308 goto drop_fragment;
1309 }
1310 if (precut == 0) {
1311 /* They are adjacent. Fixup cache entry */
1312 DPFPRINTF(("frag6cache[%u]: adjacent (%d-%d) %d-%d\n",
1313 ntohl(fh->ip6f_ident), frp->fr_off, frp->fr_end,
1314 off, fr_max));
1315 frp->fr_end = fr_max;
1316 } else if (precut > 0) {
1317 /* The first part of this payload overlaps with a
1318 * fragment that has already been passed.
1319 * Need to trim off the first part of the payload.
1320 * But to do so easily, we need to create another
1321 * mbuf to throw the original header into.
1322 */
1323
1324 DPFPRINTF(("frag6cache[%u]: chop %d (%d-%d) %d-%d\n",
1325 ntohl(fh->ip6f_ident), precut, frp->fr_off,
1326 frp->fr_end, off, fr_max));
1327
1328 off += precut;
1329 fr_max -= precut;
1330 /* Update the previous frag to encompass this one */
1331 frp->fr_end = fr_max;
1332
1333 if (!drop) {
1334 /* XXX Optimization opportunity
1335 * This is a very heavy way to trim the payload.
1336 * we could do it much faster by diddling mbuf
1337 * internals but that would be even less legible
1338 * than this mbuf magic. For my next trick,
1339 * I'll pull a rabbit out of my laptop.
1340 */
1341 *m0 = m_copym(m, 0, hlen, M_NOWAIT);
1342 if (*m0 == NULL)
1343 goto no_mem;
1344 VERIFY((*m0)->m_next == NULL);
1345 m_adj(m, precut + hlen);
1346 m_cat(*m0, m);
1347 m = *m0;
1348 if (m->m_flags & M_PKTHDR) {
1349 int pktlen = 0;
1350 struct mbuf *t;
1351 for (t = m; t; t = t->m_next)
1352 pktlen += t->m_len;
1353 m->m_pkthdr.len = pktlen;
1354 }
1355
1356 h = mtod(m, struct ip6_hdr *);
1357
1358 VERIFY((int)m->m_len ==
1359 ntohs(h->ip6_plen) - precut);
1360 fh->ip6f_offlg &= ~IP6F_OFF_MASK;
1361 fh->ip6f_offlg |=
1362 htons(ntohs(fh->ip6f_offlg & IP6F_OFF_MASK)
1363 + (precut >> 3));
1364 h->ip6_plen = htons(ntohs(h->ip6_plen) -
1365 precut);
1366 } else {
1367 hosed++;
1368 }
1369 } else {
1370 /* There is a gap between fragments */
1371
1372 DPFPRINTF(("frag6cache[%u]: gap %d (%d-%d) %d-%d\n",
1373 ntohl(fh->ip6f_ident), -precut, frp->fr_off,
1374 frp->fr_end, off, fr_max));
1375
1376 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1377 if (cur == NULL)
1378 goto no_mem;
1379 pf_ncache++;
1380
1381 cur->fr_off = off;
1382 cur->fr_end = fr_max;
1383 LIST_INSERT_AFTER(frp, cur, fr_next);
1384 }
1385 }
1386
1387 if (fra != NULL) {
1388 int aftercut;
1389 int merge = 0;
1390
1391 aftercut = fr_max - fra->fr_off;
1392 if (aftercut == 0) {
1393 /* Adjacent fragments */
1394 DPFPRINTF(("frag6cache[%u]: adjacent %d-%d (%d-%d)\n",
1395 ntohl(fh->ip6f_ident), off, fr_max, fra->fr_off,
1396 fra->fr_end));
1397 fra->fr_off = off;
1398 merge = 1;
1399 } else if (aftercut > 0) {
1400 /* Need to chop off the tail of this fragment */
1401 DPFPRINTF(("frag6cache[%u]: chop %d %d-%d (%d-%d)\n",
1402 ntohl(fh->ip6f_ident), aftercut, off, fr_max,
1403 fra->fr_off, fra->fr_end));
1404 fra->fr_off = off;
1405 fr_max -= aftercut;
1406
1407 merge = 1;
1408
1409 if (!drop) {
1410 m_adj(m, -aftercut);
1411 if (m->m_flags & M_PKTHDR) {
1412 int pktlen = 0;
1413 struct mbuf *t;
1414 for (t = m; t; t = t->m_next)
1415 pktlen += t->m_len;
1416 m->m_pkthdr.len = pktlen;
1417 }
1418 h = mtod(m, struct ip6_hdr *);
1419 VERIFY((int)m->m_len ==
1420 ntohs(h->ip6_plen) - aftercut);
1421 h->ip6_plen =
1422 htons(ntohs(h->ip6_plen) - aftercut);
1423 } else {
1424 hosed++;
1425 }
1426 } else if (frp == NULL) {
1427 /* There is a gap between fragments */
1428 DPFPRINTF(("frag6cache[%u]: gap %d %d-%d (%d-%d)\n",
1429 ntohl(fh->ip6f_ident), -aftercut, off, fr_max,
1430 fra->fr_off, fra->fr_end));
1431
1432 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1433 if (cur == NULL)
1434 goto no_mem;
1435 pf_ncache++;
1436
1437 cur->fr_off = off;
1438 cur->fr_end = fr_max;
1439 LIST_INSERT_BEFORE(fra, cur, fr_next);
1440 }
1441
1442 /* Need to glue together two separate fragment descriptors */
1443 if (merge) {
1444 if (cur && fra->fr_off <= cur->fr_end) {
1445 /* Need to merge in a previous 'cur' */
1446 DPFPRINTF(("frag6cache[%u]: adjacent(merge "
1447 "%d-%d) %d-%d (%d-%d)\n",
1448 ntohl(fh->ip6f_ident), cur->fr_off,
1449 cur->fr_end, off, fr_max, fra->fr_off,
1450 fra->fr_end));
1451 fra->fr_off = cur->fr_off;
1452 LIST_REMOVE(cur, fr_next);
1453 pool_put(&pf_cent_pl, cur);
1454 pf_ncache--;
1455 cur = NULL;
1456 } else if (frp && fra->fr_off <= frp->fr_end) {
1457 /* Need to merge in a modified 'frp' */
1458 VERIFY(cur == NULL);
1459 DPFPRINTF(("frag6cache[%u]: adjacent(merge "
1460 "%d-%d) %d-%d (%d-%d)\n",
1461 ntohl(fh->ip6f_ident), frp->fr_off,
1462 frp->fr_end, off, fr_max, fra->fr_off,
1463 fra->fr_end));
1464 fra->fr_off = frp->fr_off;
1465 LIST_REMOVE(frp, fr_next);
1466 pool_put(&pf_cent_pl, frp);
1467 pf_ncache--;
1468 frp = NULL;
1469 }
1470 }
1471 }
1472
1473 if (hosed) {
1474 /*
1475 * We must keep tracking the overall fragment even when
1476 * we're going to drop it anyway so that we know when to
1477 * free the overall descriptor. Thus we drop the frag late.
1478 */
1479 goto drop_fragment;
1480 }
1481
1482 pass:
1483 /* Update maximum data size */
1484 if ((*frag)->fr_max < fr_max)
1485 (*frag)->fr_max = fr_max;
1486
1487 /* This is the last segment */
1488 if (!mff)
1489 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1490
1491 /* Check if we are completely reassembled */
1492 if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
1493 LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
1494 LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) {
1495 /* Remove from fragment queue */
1496 DPFPRINTF(("frag6cache[%u]: done 0-%d\n",
1497 ntohl(fh->ip6f_ident), (*frag)->fr_max));
1498 pf_free_fragment(*frag);
1499 *frag = NULL;
1500 }
1501
1502 return (m);
1503
1504 no_mem:
1505 *nomem = 1;
1506
1507 /* Still need to pay attention to !IP_MF */
1508 if (!mff && *frag != NULL)
1509 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1510
1511 m_freem(m);
1512 return (NULL);
1513
1514 drop_fragment:
1515
1516 /* Still need to pay attention to !IP_MF */
1517 if (!mff && *frag != NULL)
1518 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1519
1520 if (drop) {
1521 /* This fragment has been deemed bad. Don't reass */
1522 if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
1523 DPFPRINTF(("frag6cache[%u]: dropping overall fragment\n",
1524 ntohl(fh->ip6f_ident)));
1525 (*frag)->fr_flags |= PFFRAG_DROP;
1526 }
1527
1528 m_freem(m);
1529 return (NULL);
1530 }
1531
1532 int
1533 pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
1534 struct pf_pdesc *pd)
1535 {
1536 struct mbuf *m = *m0;
1537 struct pf_rule *r;
1538 struct pf_frent *frent;
1539 struct pf_fragment *frag = NULL;
1540 struct ip *h = mtod(m, struct ip *);
1541 int mff = (ntohs(h->ip_off) & IP_MF);
1542 int hlen = h->ip_hl << 2;
1543 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
1544 u_int16_t fr_max;
1545 int ip_len;
1546 int ip_off;
1547 int asd = 0;
1548 struct pf_ruleset *ruleset = NULL;
1549
1550 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1551 while (r != NULL) {
1552 r->evaluations++;
1553 if (pfi_kif_match(r->kif, kif) == r->ifnot)
1554 r = r->skip[PF_SKIP_IFP].ptr;
1555 else if (r->direction && r->direction != dir)
1556 r = r->skip[PF_SKIP_DIR].ptr;
1557 else if (r->af && r->af != AF_INET)
1558 r = r->skip[PF_SKIP_AF].ptr;
1559 else if (r->proto && r->proto != h->ip_p)
1560 r = r->skip[PF_SKIP_PROTO].ptr;
1561 else if (PF_MISMATCHAW(&r->src.addr,
1562 (struct pf_addr *)&h->ip_src.s_addr, AF_INET,
1563 r->src.neg, kif))
1564 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1565 else if (PF_MISMATCHAW(&r->dst.addr,
1566 (struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
1567 r->dst.neg, NULL))
1568 r = r->skip[PF_SKIP_DST_ADDR].ptr;
1569 else {
1570 if (r->anchor == NULL)
1571 break;
1572 else
1573 pf_step_into_anchor(&asd, &ruleset,
1574 PF_RULESET_SCRUB, &r, NULL, NULL);
1575 }
1576 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
1577 PF_RULESET_SCRUB, &r, NULL, NULL))
1578 break;
1579 }
1580
1581 if (r == NULL || r->action == PF_NOSCRUB)
1582 return (PF_PASS);
1583 else {
1584 r->packets[dir == PF_OUT]++;
1585 r->bytes[dir == PF_OUT] += pd->tot_len;
1586 }
1587
1588 /* Check for illegal packets */
1589 if (hlen < (int)sizeof (struct ip))
1590 goto drop;
1591
1592 if (hlen > ntohs(h->ip_len))
1593 goto drop;
1594
1595 /* Clear IP_DF if the rule uses the no-df option */
1596 if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
1597 u_int16_t ipoff = h->ip_off;
1598
1599 h->ip_off &= htons(~IP_DF);
1600 h->ip_sum = pf_cksum_fixup(h->ip_sum, ipoff, h->ip_off, 0);
1601 }
1602
1603 /* We will need other tests here */
1604 if (!fragoff && !mff)
1605 goto no_fragment;
1606
1607 /*
1608 * We're dealing with a fragment now. Don't allow fragments
1609 * with IP_DF to enter the cache. If the flag was cleared by
1610 * no-df above, fine. Otherwise drop it.
1611 */
1612 if (h->ip_off & htons(IP_DF)) {
1613 DPFPRINTF(("IP_DF\n"));
1614 goto bad;
1615 }
1616
1617 ip_len = ntohs(h->ip_len) - hlen;
1618 ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
1619
1620 /* All fragments are 8 byte aligned */
1621 if (mff && (ip_len & 0x7)) {
1622 DPFPRINTF(("mff and %d\n", ip_len));
1623 goto bad;
1624 }
1625
1626 /* Respect maximum length */
1627 if (fragoff + ip_len > IP_MAXPACKET) {
1628 DPFPRINTF(("max packet %d\n", fragoff + ip_len));
1629 goto bad;
1630 }
1631 fr_max = fragoff + ip_len;
1632
1633 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
1634 /* Fully buffer all of the fragments */
1635
1636 frag = pf_find_fragment_by_ipv4_header(h, &pf_frag_tree);
1637 /* Check if we saw the last fragment already */
1638 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
1639 fr_max > frag->fr_max)
1640 goto bad;
1641
1642 /* Get an entry for the fragment queue */
1643 frent = pool_get(&pf_frent_pl, PR_NOWAIT);
1644 if (frent == NULL) {
1645 REASON_SET(reason, PFRES_MEMORY);
1646 return (PF_DROP);
1647 }
1648 pf_nfrents++;
1649 frent->fr_ip = h;
1650 frent->fr_m = m;
1651
1652 /* Might return a completely reassembled mbuf, or NULL */
1653 DPFPRINTF(("reass IPv4 frag %d @ %d-%d\n", ntohs(h->ip_id),
1654 fragoff, fr_max));
1655 *m0 = m = pf_reassemble(m0, &frag, frent, mff);
1656
1657 if (m == NULL)
1658 return (PF_DROP);
1659
1660 VERIFY(m->m_flags & M_PKTHDR);
1661
1662 /* use mtag from concatenated mbuf chain */
1663 pd->pf_mtag = pf_find_mtag(m);
1664 #if DIAGNOSTIC
1665 if (pd->pf_mtag == NULL) {
1666 printf("%s: pf_find_mtag returned NULL(1)\n", __func__);
1667 if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
1668 m_freem(m);
1669 m = *m0 = NULL;
1670 goto no_mem;
1671 }
1672 }
1673 #endif
1674 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
1675 goto drop;
1676
1677 h = mtod(m, struct ip *);
1678 } else {
1679 /* non-buffering fragment cache (drops or masks overlaps) */
1680 int nomem = 0;
1681
1682 if (dir == PF_OUT && (pd->pf_mtag->pftag_flags & PF_TAG_FRAGCACHE)) {
1683 /*
1684 * Already passed the fragment cache in the
1685 * input direction. If we continued, it would
1686 * appear to be a dup and would be dropped.
1687 */
1688 goto fragment_pass;
1689 }
1690
1691 frag = pf_find_fragment_by_ipv4_header(h, &pf_cache_tree);
1692
1693 /* Check if we saw the last fragment already */
1694 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
1695 fr_max > frag->fr_max) {
1696 if (r->rule_flag & PFRULE_FRAGDROP)
1697 frag->fr_flags |= PFFRAG_DROP;
1698 goto bad;
1699 }
1700
1701 *m0 = m = pf_fragcache(m0, h, &frag, mff,
1702 (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
1703 if (m == NULL) {
1704 if (nomem)
1705 goto no_mem;
1706 goto drop;
1707 }
1708
1709 VERIFY(m->m_flags & M_PKTHDR);
1710
1711 /* use mtag from copied and trimmed mbuf chain */
1712 pd->pf_mtag = pf_find_mtag(m);
1713 #if DIAGNOSTIC
1714 if (pd->pf_mtag == NULL) {
1715 printf("%s: pf_find_mtag returned NULL(2)\n", __func__);
1716 if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
1717 m_freem(m);
1718 m = *m0 = NULL;
1719 goto no_mem;
1720 }
1721 }
1722 #endif
1723 if (dir == PF_IN)
1724 pd->pf_mtag->pftag_flags |= PF_TAG_FRAGCACHE;
1725
1726 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
1727 goto drop;
1728 goto fragment_pass;
1729 }
1730
1731 no_fragment:
1732 /* At this point, only IP_DF is allowed in ip_off */
1733 if (h->ip_off & ~htons(IP_DF)) {
1734 u_int16_t ipoff = h->ip_off;
1735
1736 h->ip_off &= htons(IP_DF);
1737 h->ip_sum = pf_cksum_fixup(h->ip_sum, ipoff, h->ip_off, 0);
1738 }
1739
1740 /* Enforce a minimum ttl, may cause endless packet loops */
1741 if (r->min_ttl && h->ip_ttl < r->min_ttl) {
1742 u_int16_t ip_ttl = h->ip_ttl;
1743
1744 h->ip_ttl = r->min_ttl;
1745 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
1746 }
1747 if (r->rule_flag & PFRULE_RANDOMID) {
1748 u_int16_t oip_id = h->ip_id;
1749
1750 h->ip_id = ip_randomid();
1751 h->ip_sum = pf_cksum_fixup(h->ip_sum, oip_id, h->ip_id, 0);
1752 }
1753 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
1754 pd->flags |= PFDESC_IP_REAS;
1755
1756 return (PF_PASS);
1757
1758 fragment_pass:
1759 /* Enforce a minimum ttl, may cause endless packet loops */
1760 if (r->min_ttl && h->ip_ttl < r->min_ttl) {
1761 u_int16_t ip_ttl = h->ip_ttl;
1762
1763 h->ip_ttl = r->min_ttl;
1764 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
1765 }
1766 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
1767 pd->flags |= PFDESC_IP_REAS;
1768 return (PF_PASS);
1769
1770 no_mem:
1771 REASON_SET(reason, PFRES_MEMORY);
1772 if (r != NULL && r->log)
1773 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r,
1774 NULL, NULL, pd);
1775 return (PF_DROP);
1776
1777 drop:
1778 REASON_SET(reason, PFRES_NORM);
1779 if (r != NULL && r->log)
1780 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r,
1781 NULL, NULL, pd);
1782 return (PF_DROP);
1783
1784 bad:
1785 DPFPRINTF(("dropping bad IPv4 fragment\n"));
1786
1787 /* Free associated fragments */
1788 if (frag != NULL)
1789 pf_free_fragment(frag);
1790
1791 REASON_SET(reason, PFRES_FRAG);
1792 if (r != NULL && r->log)
1793 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1794
1795 return (PF_DROP);
1796 }
1797
1798 #if INET6
1799 int
1800 pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
1801 u_short *reason, struct pf_pdesc *pd)
1802 {
1803 struct mbuf *m = *m0;
1804 struct pf_rule *r;
1805 struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1806 int off;
1807 struct ip6_ext ext;
1808 /* adi XXX */
1809 #if 0
1810 struct ip6_opt opt;
1811 struct ip6_opt_jumbo jumbo;
1812 int optend;
1813 int ooff;
1814 #endif
1815 struct ip6_frag frag;
1816 u_int32_t jumbolen = 0, plen;
1817 u_int16_t fragoff = 0;
1818 u_int8_t proto;
1819 int terminal;
1820 struct pf_frent *frent;
1821 struct pf_fragment *pff = NULL;
1822 int mff = 0, rh_cnt = 0;
1823 u_int16_t fr_max;
1824 int asd = 0;
1825 struct pf_ruleset *ruleset = NULL;
1826
1827 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1828 while (r != NULL) {
1829 r->evaluations++;
1830 if (pfi_kif_match(r->kif, kif) == r->ifnot)
1831 r = r->skip[PF_SKIP_IFP].ptr;
1832 else if (r->direction && r->direction != dir)
1833 r = r->skip[PF_SKIP_DIR].ptr;
1834 else if (r->af && r->af != AF_INET6)
1835 r = r->skip[PF_SKIP_AF].ptr;
1836 #if 0 /* header chain! */
1837 else if (r->proto && r->proto != h->ip6_nxt)
1838 r = r->skip[PF_SKIP_PROTO].ptr;
1839 #endif
1840 else if (PF_MISMATCHAW(&r->src.addr,
1841 (struct pf_addr *)&h->ip6_src, AF_INET6,
1842 r->src.neg, kif))
1843 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1844 else if (PF_MISMATCHAW(&r->dst.addr,
1845 (struct pf_addr *)&h->ip6_dst, AF_INET6,
1846 r->dst.neg, NULL))
1847 r = r->skip[PF_SKIP_DST_ADDR].ptr;
1848 else {
1849 if (r->anchor == NULL)
1850 break;
1851 else
1852 pf_step_into_anchor(&asd, &ruleset,
1853 PF_RULESET_SCRUB, &r, NULL, NULL);
1854 }
1855 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
1856 PF_RULESET_SCRUB, &r, NULL, NULL))
1857 break;
1858 }
1859
1860 if (r == NULL || r->action == PF_NOSCRUB)
1861 return (PF_PASS);
1862 else {
1863 r->packets[dir == PF_OUT]++;
1864 r->bytes[dir == PF_OUT] += pd->tot_len;
1865 }
1866
1867 /* Check for illegal packets */
1868 if ((int)(sizeof (struct ip6_hdr) + IPV6_MAXPACKET) < m->m_pkthdr.len)
1869 goto drop;
1870
1871 off = sizeof (struct ip6_hdr);
1872 proto = h->ip6_nxt;
1873 terminal = 0;
1874 do {
1875 pd->proto = proto;
1876 switch (proto) {
1877 case IPPROTO_FRAGMENT:
1878 goto fragment;
1879 case IPPROTO_AH:
1880 case IPPROTO_ROUTING:
1881 case IPPROTO_DSTOPTS:
1882 if (!pf_pull_hdr(m, off, &ext, sizeof (ext), NULL,
1883 NULL, AF_INET6))
1884 goto shortpkt;
1885 /*
1886 * <jhw@apple.com>
1887 * Multiple routing headers not allowed.
1888 * Routing header type zero considered harmful.
1889 */
1890 if (proto == IPPROTO_ROUTING) {
1891 const struct ip6_rthdr *rh =
1892 (const struct ip6_rthdr *)&ext;
1893 if (rh_cnt++)
1894 goto drop;
1895 if (rh->ip6r_type == IPV6_RTHDR_TYPE_0)
1896 goto drop;
1897 }
1898 else
1899 if (proto == IPPROTO_AH)
1900 off += (ext.ip6e_len + 2) * 4;
1901 else
1902 off += (ext.ip6e_len + 1) * 8;
1903 proto = ext.ip6e_nxt;
1904 break;
1905 case IPPROTO_HOPOPTS:
1906 /* adi XXX */
1907 #if 0
1908 if (!pf_pull_hdr(m, off, &ext, sizeof (ext), NULL,
1909 NULL, AF_INET6))
1910 goto shortpkt;
1911 optend = off + (ext.ip6e_len + 1) * 8;
1912 ooff = off + sizeof (ext);
1913 do {
1914 if (!pf_pull_hdr(m, ooff, &opt.ip6o_type,
1915 sizeof (opt.ip6o_type), NULL, NULL,
1916 AF_INET6))
1917 goto shortpkt;
1918 if (opt.ip6o_type == IP6OPT_PAD1) {
1919 ooff++;
1920 continue;
1921 }
1922 if (!pf_pull_hdr(m, ooff, &opt, sizeof (opt),
1923 NULL, NULL, AF_INET6))
1924 goto shortpkt;
1925 if (ooff + sizeof (opt) + opt.ip6o_len > optend)
1926 goto drop;
1927 switch (opt.ip6o_type) {
1928 case IP6OPT_JUMBO:
1929 if (h->ip6_plen != 0)
1930 goto drop;
1931 if (!pf_pull_hdr(m, ooff, &jumbo,
1932 sizeof (jumbo), NULL, NULL,
1933 AF_INET6))
1934 goto shortpkt;
1935 memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
1936 sizeof (jumbolen));
1937 jumbolen = ntohl(jumbolen);
1938 if (jumbolen <= IPV6_MAXPACKET)
1939 goto drop;
1940 if (sizeof (struct ip6_hdr) +
1941 jumbolen != m->m_pkthdr.len)
1942 goto drop;
1943 break;
1944 default:
1945 break;
1946 }
1947 ooff += sizeof (opt) + opt.ip6o_len;
1948 } while (ooff < optend);
1949
1950 off = optend;
1951 proto = ext.ip6e_nxt;
1952 break;
1953 #endif
1954 default:
1955 terminal = 1;
1956 break;
1957 }
1958 } while (!terminal);
1959
1960 /* jumbo payload option must be present, or plen > 0 */
1961 if (ntohs(h->ip6_plen) == 0)
1962 plen = jumbolen;
1963 else
1964 plen = ntohs(h->ip6_plen);
1965 if (plen == 0)
1966 goto drop;
1967 if ((int)(sizeof (struct ip6_hdr) + plen) > m->m_pkthdr.len)
1968 goto shortpkt;
1969
1970 /* Enforce a minimum ttl, may cause endless packet loops */
1971 if (r->min_ttl && h->ip6_hlim < r->min_ttl)
1972 h->ip6_hlim = r->min_ttl;
1973
1974 return (PF_PASS);
1975
1976 fragment:
1977 if (ntohs(h->ip6_plen) == 0 || jumbolen)
1978 goto drop;
1979 plen = ntohs(h->ip6_plen);
1980
1981 if (!pf_pull_hdr(m, off, &frag, sizeof (frag), NULL, NULL, AF_INET6))
1982 goto shortpkt;
1983 fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
1984 pd->proto = frag.ip6f_nxt;
1985 mff = ntohs(frag.ip6f_offlg & IP6F_MORE_FRAG);
1986 off += sizeof frag;
1987 if (fragoff + (plen - off) > IPV6_MAXPACKET)
1988 goto badfrag;
1989
1990 fr_max = fragoff + plen - (off - sizeof(struct ip6_hdr));
1991 DPFPRINTF(("0x%llx IPv6 frag plen %u mff %d off %u fragoff %u "
1992 "fr_max %u\n", (uint64_t)VM_KERNEL_ADDRPERM(m), plen, mff, off,
1993 fragoff, fr_max));
1994
1995 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
1996 /* Fully buffer all of the fragments */
1997 pd->flags |= PFDESC_IP_REAS;
1998
1999 pff = pf_find_fragment_by_ipv6_header(h, &frag,
2000 &pf_frag_tree);
2001
2002 /* Check if we saw the last fragment already */
2003 if (pff != NULL && (pff->fr_flags & PFFRAG_SEENLAST) &&
2004 fr_max > pff->fr_max)
2005 goto badfrag;
2006
2007 /* Get an entry for the fragment queue */
2008 frent = pool_get(&pf_frent_pl, PR_NOWAIT);
2009 if (frent == NULL) {
2010 REASON_SET(reason, PFRES_MEMORY);
2011 return (PF_DROP);
2012 }
2013 pf_nfrents++;
2014 frent->fr_ip6 = h;
2015 frent->fr_m = m;
2016 frent->fr_ip6f_opt = frag;
2017 frent->fr_ip6f_hlen = off;
2018
2019 /* Might return a completely reassembled mbuf, or NULL */
2020 DPFPRINTF(("reass IPv6 frag %d @ %d-%d\n",
2021 ntohl(frag.ip6f_ident), fragoff, fr_max));
2022 *m0 = m = pf_reassemble6(m0, &pff, frent, mff);
2023
2024 if (m == NULL)
2025 return (PF_DROP);
2026
2027 if (pff != NULL && (pff->fr_flags & PFFRAG_DROP))
2028 goto drop;
2029
2030 h = mtod(m, struct ip6_hdr *);
2031 }
2032 else if (dir == PF_IN || !(pd->pf_mtag->pftag_flags & PF_TAG_FRAGCACHE)) {
2033 /* non-buffering fragment cache (overlaps: see RFC 5722) */
2034 int nomem = 0;
2035
2036 pff = pf_find_fragment_by_ipv6_header(h, &frag,
2037 &pf_cache_tree);
2038
2039 /* Check if we saw the last fragment already */
2040 if (pff != NULL && (pff->fr_flags & PFFRAG_SEENLAST) &&
2041 fr_max > pff->fr_max) {
2042 if (r->rule_flag & PFRULE_FRAGDROP)
2043 pff->fr_flags |= PFFRAG_DROP;
2044 goto badfrag;
2045 }
2046
2047 *m0 = m = pf_frag6cache(m0, h, &frag, &pff, off, mff,
2048 (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
2049 if (m == NULL) {
2050 if (nomem)
2051 goto no_mem;
2052 goto drop;
2053 }
2054
2055 if (dir == PF_IN)
2056 pd->pf_mtag->pftag_flags |= PF_TAG_FRAGCACHE;
2057
2058 if (pff != NULL && (pff->fr_flags & PFFRAG_DROP))
2059 goto drop;
2060 }
2061
2062 /* Enforce a minimum ttl, may cause endless packet loops */
2063 if (r->min_ttl && h->ip6_hlim < r->min_ttl)
2064 h->ip6_hlim = r->min_ttl;
2065 return (PF_PASS);
2066
2067 no_mem:
2068 REASON_SET(reason, PFRES_MEMORY);
2069 goto dropout;
2070
2071 shortpkt:
2072 REASON_SET(reason, PFRES_SHORT);
2073 goto dropout;
2074
2075 drop:
2076 REASON_SET(reason, PFRES_NORM);
2077 goto dropout;
2078
2079 badfrag:
2080 DPFPRINTF(("dropping bad IPv6 fragment\n"));
2081 REASON_SET(reason, PFRES_FRAG);
2082 goto dropout;
2083
2084 dropout:
2085 if (pff != NULL)
2086 pf_free_fragment(pff);
2087 if (r != NULL && r->log)
2088 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
2089 return (PF_DROP);
2090 }
2091 #endif /* INET6 */
2092
2093 int
2094 pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
2095 int off, void *h, struct pf_pdesc *pd)
2096 {
2097 #pragma unused(ipoff, h)
2098 struct pf_rule *r, *rm = NULL;
2099 struct tcphdr *th = pd->hdr.tcp;
2100 int rewrite = 0;
2101 int asd = 0;
2102 u_short reason;
2103 u_int8_t flags;
2104 sa_family_t af = pd->af;
2105 struct pf_ruleset *ruleset = NULL;
2106 union pf_state_xport sxport, dxport;
2107
2108 sxport.port = th->th_sport;
2109 dxport.port = th->th_dport;
2110
2111 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
2112 while (r != NULL) {
2113 r->evaluations++;
2114 if (pfi_kif_match(r->kif, kif) == r->ifnot)
2115 r = r->skip[PF_SKIP_IFP].ptr;
2116 else if (r->direction && r->direction != dir)
2117 r = r->skip[PF_SKIP_DIR].ptr;
2118 else if (r->af && r->af != af)
2119 r = r->skip[PF_SKIP_AF].ptr;
2120 else if (r->proto && r->proto != pd->proto)
2121 r = r->skip[PF_SKIP_PROTO].ptr;
2122 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
2123 r->src.neg, kif))
2124 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
2125 else if (r->src.xport.range.op &&
2126 !pf_match_xport(r->src.xport.range.op, r->proto_variant,
2127 &r->src.xport, &sxport))
2128 r = r->skip[PF_SKIP_SRC_PORT].ptr;
2129 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
2130 r->dst.neg, NULL))
2131 r = r->skip[PF_SKIP_DST_ADDR].ptr;
2132 else if (r->dst.xport.range.op &&
2133 !pf_match_xport(r->dst.xport.range.op, r->proto_variant,
2134 &r->dst.xport, &dxport))
2135 r = r->skip[PF_SKIP_DST_PORT].ptr;
2136 else if (r->os_fingerprint != PF_OSFP_ANY &&
2137 !pf_osfp_match(pf_osfp_fingerprint(pd, m, off, th),
2138 r->os_fingerprint))
2139 r = TAILQ_NEXT(r, entries);
2140 else {
2141 if (r->anchor == NULL) {
2142 rm = r;
2143 break;
2144 } else {
2145 pf_step_into_anchor(&asd, &ruleset,
2146 PF_RULESET_SCRUB, &r, NULL, NULL);
2147 }
2148 }
2149 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
2150 PF_RULESET_SCRUB, &r, NULL, NULL))
2151 break;
2152 }
2153
2154 if (rm == NULL || rm->action == PF_NOSCRUB)
2155 return (PF_PASS);
2156 else {
2157 r->packets[dir == PF_OUT]++;
2158 r->bytes[dir == PF_OUT] += pd->tot_len;
2159 }
2160
2161 if (rm->rule_flag & PFRULE_REASSEMBLE_TCP)
2162 pd->flags |= PFDESC_TCP_NORM;
2163
2164 flags = th->th_flags;
2165 if (flags & TH_SYN) {
2166 /* Illegal packet */
2167 if (flags & TH_RST)
2168 goto tcp_drop;
2169
2170 if (flags & TH_FIN)
2171 flags &= ~TH_FIN;
2172 } else {
2173 /* Illegal packet */
2174 if (!(flags & (TH_ACK|TH_RST)))
2175 goto tcp_drop;
2176 }
2177
2178 if (!(flags & TH_ACK)) {
2179 /* These flags are only valid if ACK is set */
2180 if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG))
2181 goto tcp_drop;
2182 }
2183
2184 /* Check for illegal header length */
2185 if (th->th_off < (sizeof (struct tcphdr) >> 2))
2186 goto tcp_drop;
2187
2188 /* If flags changed, or reserved data set, then adjust */
2189 if (flags != th->th_flags || th->th_x2 != 0) {
2190 u_int16_t ov, nv;
2191
2192 ov = *(u_int16_t *)(&th->th_ack + 1);
2193 th->th_flags = flags;
2194 th->th_x2 = 0;
2195 nv = *(u_int16_t *)(&th->th_ack + 1);
2196
2197 th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0);
2198 rewrite = 1;
2199 }
2200
2201 /* Remove urgent pointer, if TH_URG is not set */
2202 if (!(flags & TH_URG) && th->th_urp) {
2203 th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0);
2204 th->th_urp = 0;
2205 rewrite = 1;
2206 }
2207
2208 /* copy back packet headers if we sanitized */
2209 /* Process options */
2210 if (r->max_mss) {
2211 int rv = pf_normalize_tcpopt(r, dir, kif, pd, m, th, off,
2212 &rewrite);
2213 if (rv == PF_DROP)
2214 return rv;
2215 m = pd->mp;
2216 }
2217
2218 if (rewrite) {
2219 struct mbuf *mw = pf_lazy_makewritable(pd, m,
2220 off + sizeof (*th));
2221 if (!mw) {
2222 REASON_SET(&reason, PFRES_MEMORY);
2223 if (r->log)
2224 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason,
2225 r, 0, 0, pd);
2226 return PF_DROP;
2227 }
2228
2229 m_copyback(mw, off, sizeof (*th), th);
2230 }
2231
2232 return (PF_PASS);
2233
2234 tcp_drop:
2235 REASON_SET(&reason, PFRES_NORM);
2236 if (rm != NULL && r->log)
2237 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL, pd);
2238 return (PF_DROP);
2239 }
2240
2241 int
2242 pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
2243 struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst)
2244 {
2245 #pragma unused(dst)
2246 u_int32_t tsval, tsecr;
2247 u_int8_t hdr[60];
2248 u_int8_t *opt;
2249
2250 VERIFY(src->scrub == NULL);
2251
2252 src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
2253 if (src->scrub == NULL)
2254 return (1);
2255 bzero(src->scrub, sizeof (*src->scrub));
2256
2257 switch (pd->af) {
2258 #if INET
2259 case AF_INET: {
2260 struct ip *h = mtod(m, struct ip *);
2261 src->scrub->pfss_ttl = h->ip_ttl;
2262 break;
2263 }
2264 #endif /* INET */
2265 #if INET6
2266 case AF_INET6: {
2267 struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
2268 src->scrub->pfss_ttl = h->ip6_hlim;
2269 break;
2270 }
2271 #endif /* INET6 */
2272 }
2273
2274
2275 /*
2276 * All normalizations below are only begun if we see the start of
2277 * the connections. They must all set an enabled bit in pfss_flags
2278 */
2279 if ((th->th_flags & TH_SYN) == 0)
2280 return (0);
2281
2282
2283 if (th->th_off > (sizeof (struct tcphdr) >> 2) && src->scrub &&
2284 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
2285 /* Diddle with TCP options */
2286 int hlen;
2287 opt = hdr + sizeof (struct tcphdr);
2288 hlen = (th->th_off << 2) - sizeof (struct tcphdr);
2289 while (hlen >= TCPOLEN_TIMESTAMP) {
2290 switch (*opt) {
2291 case TCPOPT_EOL: /* FALLTHROUGH */
2292 case TCPOPT_NOP:
2293 opt++;
2294 hlen--;
2295 break;
2296 case TCPOPT_TIMESTAMP:
2297 if (opt[1] >= TCPOLEN_TIMESTAMP) {
2298 src->scrub->pfss_flags |=
2299 PFSS_TIMESTAMP;
2300 src->scrub->pfss_ts_mod =
2301 htonl(random());
2302
2303 /* note PFSS_PAWS not set yet */
2304 memcpy(&tsval, &opt[2],
2305 sizeof (u_int32_t));
2306 memcpy(&tsecr, &opt[6],
2307 sizeof (u_int32_t));
2308 src->scrub->pfss_tsval0 = ntohl(tsval);
2309 src->scrub->pfss_tsval = ntohl(tsval);
2310 src->scrub->pfss_tsecr = ntohl(tsecr);
2311 getmicrouptime(&src->scrub->pfss_last);
2312 }
2313 /* FALLTHROUGH */
2314 default:
2315 hlen -= MAX(opt[1], 2);
2316 opt += MAX(opt[1], 2);
2317 break;
2318 }
2319 }
2320 }
2321
2322 return (0);
2323 }
2324
2325 void
2326 pf_normalize_tcp_cleanup(struct pf_state *state)
2327 {
2328 if (state->src.scrub)
2329 pool_put(&pf_state_scrub_pl, state->src.scrub);
2330 if (state->dst.scrub)
2331 pool_put(&pf_state_scrub_pl, state->dst.scrub);
2332
2333 /* Someday... flush the TCP segment reassembly descriptors. */
2334 }
2335
2336 int
2337 pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
2338 u_short *reason, struct tcphdr *th, struct pf_state *state,
2339 struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback)
2340 {
2341 struct timeval uptime;
2342 u_int32_t tsval, tsecr;
2343 u_int tsval_from_last;
2344 u_int8_t hdr[60];
2345 u_int8_t *opt;
2346 int copyback = 0;
2347 int got_ts = 0;
2348
2349 VERIFY(src->scrub || dst->scrub);
2350
2351 /*
2352 * Enforce the minimum TTL seen for this connection. Negate a common
2353 * technique to evade an intrusion detection system and confuse
2354 * firewall state code.
2355 */
2356 switch (pd->af) {
2357 #if INET
2358 case AF_INET: {
2359 if (src->scrub) {
2360 struct ip *h = mtod(m, struct ip *);
2361 if (h->ip_ttl > src->scrub->pfss_ttl)
2362 src->scrub->pfss_ttl = h->ip_ttl;
2363 h->ip_ttl = src->scrub->pfss_ttl;
2364 }
2365 break;
2366 }
2367 #endif /* INET */
2368 #if INET6
2369 case AF_INET6: {
2370 if (src->scrub) {
2371 struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
2372 if (h->ip6_hlim > src->scrub->pfss_ttl)
2373 src->scrub->pfss_ttl = h->ip6_hlim;
2374 h->ip6_hlim = src->scrub->pfss_ttl;
2375 }
2376 break;
2377 }
2378 #endif /* INET6 */
2379 }
2380
2381 if (th->th_off > (sizeof (struct tcphdr) >> 2) &&
2382 ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
2383 (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
2384 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
2385 /* Diddle with TCP options */
2386 int hlen;
2387 opt = hdr + sizeof (struct tcphdr);
2388 hlen = (th->th_off << 2) - sizeof (struct tcphdr);
2389 while (hlen >= TCPOLEN_TIMESTAMP) {
2390 switch (*opt) {
2391 case TCPOPT_EOL: /* FALLTHROUGH */
2392 case TCPOPT_NOP:
2393 opt++;
2394 hlen--;
2395 break;
2396 case TCPOPT_TIMESTAMP:
2397 /*
2398 * Modulate the timestamps. Can be used for
2399 * NAT detection, OS uptime determination or
2400 * reboot detection.
2401 */
2402
2403 if (got_ts) {
2404 /* Huh? Multiple timestamps!? */
2405 if (pf_status.debug >= PF_DEBUG_MISC) {
2406 DPFPRINTF(("multiple TS??"));
2407 pf_print_state(state);
2408 printf("\n");
2409 }
2410 REASON_SET(reason, PFRES_TS);
2411 return (PF_DROP);
2412 }
2413 if (opt[1] >= TCPOLEN_TIMESTAMP) {
2414 memcpy(&tsval, &opt[2],
2415 sizeof (u_int32_t));
2416 if (tsval && src->scrub &&
2417 (src->scrub->pfss_flags &
2418 PFSS_TIMESTAMP)) {
2419 tsval = ntohl(tsval);
2420 pf_change_a(&opt[2],
2421 &th->th_sum,
2422 htonl(tsval +
2423 src->scrub->pfss_ts_mod),
2424 0);
2425 copyback = 1;
2426 }
2427
2428 /* Modulate TS reply iff valid (!0) */
2429 memcpy(&tsecr, &opt[6],
2430 sizeof (u_int32_t));
2431 if (tsecr && dst->scrub &&
2432 (dst->scrub->pfss_flags &
2433 PFSS_TIMESTAMP)) {
2434 tsecr = ntohl(tsecr)
2435 - dst->scrub->pfss_ts_mod;
2436 pf_change_a(&opt[6],
2437 &th->th_sum, htonl(tsecr),
2438 0);
2439 copyback = 1;
2440 }
2441 got_ts = 1;
2442 }
2443 /* FALLTHROUGH */
2444 default:
2445 hlen -= MAX(opt[1], 2);
2446 opt += MAX(opt[1], 2);
2447 break;
2448 }
2449 }
2450 if (copyback) {
2451 /* Copyback the options, caller copys back header */
2452 int optoff = off + sizeof (*th);
2453 int optlen = (th->th_off << 2) - sizeof (*th);
2454 m = pf_lazy_makewritable(pd, m, optoff + optlen);
2455 if (!m) {
2456 REASON_SET(reason, PFRES_MEMORY);
2457 return PF_DROP;
2458 }
2459 *writeback = optoff + optlen;
2460 m_copyback(m, optoff, optlen, hdr + sizeof (*th));
2461 }
2462 }
2463
2464
2465 /*
2466 * Must invalidate PAWS checks on connections idle for too long.
2467 * The fastest allowed timestamp clock is 1ms. That turns out to
2468 * be about 24 days before it wraps. XXX Right now our lowerbound
2469 * TS echo check only works for the first 12 days of a connection
2470 * when the TS has exhausted half its 32bit space
2471 */
2472 #define TS_MAX_IDLE (24*24*60*60)
2473 #define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */
2474
2475 getmicrouptime(&uptime);
2476 if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
2477 (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
2478 pf_time_second() - state->creation > TS_MAX_CONN)) {
2479 if (pf_status.debug >= PF_DEBUG_MISC) {
2480 DPFPRINTF(("src idled out of PAWS\n"));
2481 pf_print_state(state);
2482 printf("\n");
2483 }
2484 src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
2485 | PFSS_PAWS_IDLED;
2486 }
2487 if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
2488 uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
2489 if (pf_status.debug >= PF_DEBUG_MISC) {
2490 DPFPRINTF(("dst idled out of PAWS\n"));
2491 pf_print_state(state);
2492 printf("\n");
2493 }
2494 dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
2495 | PFSS_PAWS_IDLED;
2496 }
2497
2498 if (got_ts && src->scrub && dst->scrub &&
2499 (src->scrub->pfss_flags & PFSS_PAWS) &&
2500 (dst->scrub->pfss_flags & PFSS_PAWS)) {
2501 /*
2502 * Validate that the timestamps are "in-window".
2503 * RFC1323 describes TCP Timestamp options that allow
2504 * measurement of RTT (round trip time) and PAWS
2505 * (protection against wrapped sequence numbers). PAWS
2506 * gives us a set of rules for rejecting packets on
2507 * long fat pipes (packets that were somehow delayed
2508 * in transit longer than the time it took to send the
2509 * full TCP sequence space of 4Gb). We can use these
2510 * rules and infer a few others that will let us treat
2511 * the 32bit timestamp and the 32bit echoed timestamp
2512 * as sequence numbers to prevent a blind attacker from
2513 * inserting packets into a connection.
2514 *
2515 * RFC1323 tells us:
2516 * - The timestamp on this packet must be greater than
2517 * or equal to the last value echoed by the other
2518 * endpoint. The RFC says those will be discarded
2519 * since it is a dup that has already been acked.
2520 * This gives us a lowerbound on the timestamp.
2521 * timestamp >= other last echoed timestamp
2522 * - The timestamp will be less than or equal to
2523 * the last timestamp plus the time between the
2524 * last packet and now. The RFC defines the max
2525 * clock rate as 1ms. We will allow clocks to be
2526 * up to 10% fast and will allow a total difference
2527 * or 30 seconds due to a route change. And this
2528 * gives us an upperbound on the timestamp.
2529 * timestamp <= last timestamp + max ticks
2530 * We have to be careful here. Windows will send an
2531 * initial timestamp of zero and then initialize it
2532 * to a random value after the 3whs; presumably to
2533 * avoid a DoS by having to call an expensive RNG
2534 * during a SYN flood. Proof MS has at least one
2535 * good security geek.
2536 *
2537 * - The TCP timestamp option must also echo the other
2538 * endpoints timestamp. The timestamp echoed is the
2539 * one carried on the earliest unacknowledged segment
2540 * on the left edge of the sequence window. The RFC
2541 * states that the host will reject any echoed
2542 * timestamps that were larger than any ever sent.
2543 * This gives us an upperbound on the TS echo.
2544 * tescr <= largest_tsval
2545 * - The lowerbound on the TS echo is a little more
2546 * tricky to determine. The other endpoint's echoed
2547 * values will not decrease. But there may be
2548 * network conditions that re-order packets and
2549 * cause our view of them to decrease. For now the
2550 * only lowerbound we can safely determine is that
2551 * the TS echo will never be less than the original
2552 * TS. XXX There is probably a better lowerbound.
2553 * Remove TS_MAX_CONN with better lowerbound check.
2554 * tescr >= other original TS
2555 *
2556 * It is also important to note that the fastest
2557 * timestamp clock of 1ms will wrap its 32bit space in
2558 * 24 days. So we just disable TS checking after 24
2559 * days of idle time. We actually must use a 12d
2560 * connection limit until we can come up with a better
2561 * lowerbound to the TS echo check.
2562 */
2563 struct timeval delta_ts;
2564 int ts_fudge;
2565
2566
2567 /*
2568 * PFTM_TS_DIFF is how many seconds of leeway to allow
2569 * a host's timestamp. This can happen if the previous
2570 * packet got delayed in transit for much longer than
2571 * this packet.
2572 */
2573 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0)
2574 ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF];
2575
2576
2577 /* Calculate max ticks since the last timestamp */
2578 #define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */
2579 #define TS_MICROSECS 1000000 /* microseconds per second */
2580 timersub(&uptime, &src->scrub->pfss_last, &delta_ts);
2581 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
2582 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ);
2583
2584
2585 if ((src->state >= TCPS_ESTABLISHED &&
2586 dst->state >= TCPS_ESTABLISHED) &&
2587 (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
2588 SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
2589 (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
2590 SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
2591 /*
2592 * Bad RFC1323 implementation or an insertion attack.
2593 *
2594 * - Solaris 2.6 and 2.7 are known to send another ACK
2595 * after the FIN,FIN|ACK,ACK closing that carries
2596 * an old timestamp.
2597 */
2598
2599 DPFPRINTF(("Timestamp failed %c%c%c%c\n",
2600 SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
2601 SEQ_GT(tsval, src->scrub->pfss_tsval +
2602 tsval_from_last) ? '1' : ' ',
2603 SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
2604 SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
2605 DPFPRINTF((" tsval: %u tsecr: %u +ticks: %u "
2606 "idle: %lus %ums\n",
2607 tsval, tsecr, tsval_from_last, delta_ts.tv_sec,
2608 delta_ts.tv_usec / 1000));
2609 DPFPRINTF((" src->tsval: %u tsecr: %u\n",
2610 src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
2611 DPFPRINTF((" dst->tsval: %u tsecr: %u tsval0: %u\n",
2612 dst->scrub->pfss_tsval, dst->scrub->pfss_tsecr,
2613 dst->scrub->pfss_tsval0));
2614 if (pf_status.debug >= PF_DEBUG_MISC) {
2615 pf_print_state(state);
2616 pf_print_flags(th->th_flags);
2617 printf("\n");
2618 }
2619 REASON_SET(reason, PFRES_TS);
2620 return (PF_DROP);
2621 }
2622
2623 /* XXX I'd really like to require tsecr but it's optional */
2624
2625 } else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
2626 ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
2627 || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
2628 src->scrub && dst->scrub &&
2629 (src->scrub->pfss_flags & PFSS_PAWS) &&
2630 (dst->scrub->pfss_flags & PFSS_PAWS)) {
2631 /*
2632 * Didn't send a timestamp. Timestamps aren't really useful
2633 * when:
2634 * - connection opening or closing (often not even sent).
2635 * but we must not let an attacker to put a FIN on a
2636 * data packet to sneak it through our ESTABLISHED check.
2637 * - on a TCP reset. RFC suggests not even looking at TS.
2638 * - on an empty ACK. The TS will not be echoed so it will
2639 * probably not help keep the RTT calculation in sync and
2640 * there isn't as much danger when the sequence numbers
2641 * got wrapped. So some stacks don't include TS on empty
2642 * ACKs :-(
2643 *
2644 * To minimize the disruption to mostly RFC1323 conformant
2645 * stacks, we will only require timestamps on data packets.
2646 *
2647 * And what do ya know, we cannot require timestamps on data
2648 * packets. There appear to be devices that do legitimate
2649 * TCP connection hijacking. There are HTTP devices that allow
2650 * a 3whs (with timestamps) and then buffer the HTTP request.
2651 * If the intermediate device has the HTTP response cache, it
2652 * will spoof the response but not bother timestamping its
2653 * packets. So we can look for the presence of a timestamp in
2654 * the first data packet and if there, require it in all future
2655 * packets.
2656 */
2657
2658 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
2659 /*
2660 * Hey! Someone tried to sneak a packet in. Or the
2661 * stack changed its RFC1323 behavior?!?!
2662 */
2663 if (pf_status.debug >= PF_DEBUG_MISC) {
2664 DPFPRINTF(("Did not receive expected RFC1323 "
2665 "timestamp\n"));
2666 pf_print_state(state);
2667 pf_print_flags(th->th_flags);
2668 printf("\n");
2669 }
2670 REASON_SET(reason, PFRES_TS);
2671 return (PF_DROP);
2672 }
2673 }
2674
2675
2676 /*
2677 * We will note if a host sends his data packets with or without
2678 * timestamps. And require all data packets to contain a timestamp
2679 * if the first does. PAWS implicitly requires that all data packets be
2680 * timestamped. But I think there are middle-man devices that hijack
2681 * TCP streams immediately after the 3whs and don't timestamp their
2682 * packets (seen in a WWW accelerator or cache).
2683 */
2684 if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
2685 (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
2686 if (got_ts)
2687 src->scrub->pfss_flags |= PFSS_DATA_TS;
2688 else {
2689 src->scrub->pfss_flags |= PFSS_DATA_NOTS;
2690 if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
2691 (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
2692 /* Don't warn if other host rejected RFC1323 */
2693 DPFPRINTF(("Broken RFC1323 stack did not "
2694 "timestamp data packet. Disabled PAWS "
2695 "security.\n"));
2696 pf_print_state(state);
2697 pf_print_flags(th->th_flags);
2698 printf("\n");
2699 }
2700 }
2701 }
2702
2703
2704 /*
2705 * Update PAWS values
2706 */
2707 if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
2708 (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) {
2709 getmicrouptime(&src->scrub->pfss_last);
2710 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
2711 (src->scrub->pfss_flags & PFSS_PAWS) == 0)
2712 src->scrub->pfss_tsval = tsval;
2713
2714 if (tsecr) {
2715 if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
2716 (src->scrub->pfss_flags & PFSS_PAWS) == 0)
2717 src->scrub->pfss_tsecr = tsecr;
2718
2719 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
2720 (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
2721 src->scrub->pfss_tsval0 == 0)) {
2722 /* tsval0 MUST be the lowest timestamp */
2723 src->scrub->pfss_tsval0 = tsval;
2724 }
2725
2726 /* Only fully initialized after a TS gets echoed */
2727 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0)
2728 src->scrub->pfss_flags |= PFSS_PAWS;
2729 }
2730 }
2731
2732 /* I have a dream.... TCP segment reassembly.... */
2733 return (0);
2734 }
2735
2736 static int
2737 pf_normalize_tcpopt(struct pf_rule *r, int dir, struct pfi_kif *kif,
2738 struct pf_pdesc *pd, struct mbuf *m, struct tcphdr *th, int off,
2739 int *rewrptr)
2740 {
2741 #pragma unused(dir, kif)
2742 sa_family_t af = pd->af;
2743 u_int16_t *mss;
2744 int thoff;
2745 int opt, cnt, optlen = 0;
2746 int rewrite = 0;
2747 u_char opts[MAX_TCPOPTLEN];
2748 u_char *optp = opts;
2749
2750 thoff = th->th_off << 2;
2751 cnt = thoff - sizeof (struct tcphdr);
2752
2753 if (cnt > 0 && !pf_pull_hdr(m, off + sizeof (*th), opts, cnt,
2754 NULL, NULL, af))
2755 return PF_DROP;
2756
2757 for (; cnt > 0; cnt -= optlen, optp += optlen) {
2758 opt = optp[0];
2759 if (opt == TCPOPT_EOL)
2760 break;
2761 if (opt == TCPOPT_NOP)
2762 optlen = 1;
2763 else {
2764 if (cnt < 2)
2765 break;
2766 optlen = optp[1];
2767 if (optlen < 2 || optlen > cnt)
2768 break;
2769 }
2770 switch (opt) {
2771 case TCPOPT_MAXSEG:
2772 mss = (u_int16_t *)(void *)(optp + 2);
2773 if ((ntohs(*mss)) > r->max_mss) {
2774 /*
2775 * <jhw@apple.com>
2776 * Only do the TCP checksum fixup if delayed
2777 * checksum calculation will not be performed.
2778 */
2779 if (m->m_pkthdr.rcvif ||
2780 !(m->m_pkthdr.csum_flags & CSUM_TCP))
2781 th->th_sum = pf_cksum_fixup(th->th_sum,
2782 *mss, htons(r->max_mss), 0);
2783 *mss = htons(r->max_mss);
2784 rewrite = 1;
2785 }
2786 break;
2787 default:
2788 break;
2789 }
2790 }
2791
2792 if (rewrite) {
2793 struct mbuf *mw;
2794 u_short reason;
2795
2796 mw = pf_lazy_makewritable(pd, pd->mp,
2797 off + sizeof (*th) + thoff);
2798 if (!mw) {
2799 REASON_SET(&reason, PFRES_MEMORY);
2800 if (r->log)
2801 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason,
2802 r, 0, 0, pd);
2803 return PF_DROP;
2804 }
2805
2806 *rewrptr = 1;
2807 m_copyback(mw, off + sizeof (*th), thoff - sizeof (*th), opts);
2808 }
2809
2810 return PF_PASS;
2811 }