bsd/net/pf_norm.c

   1 /*
   2  * Copyright (c) 2007-2013 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 /*      $apfw: pf_norm.c,v 1.10 2008/08/28 19:10:53 jhw Exp $ */
  30 /*      $OpenBSD: pf_norm.c,v 1.107 2006/04/16 00:59:52 pascoe Exp $ */
  31
  32 /*
  33  * Copyright 2001 Niels Provos <provos@citi.umich.edu>
  34  * All rights reserved.
  35  *
  36  * Redistribution and use in source and binary forms, with or without
  37  * modification, are permitted provided that the following conditions
  38  * are met:
  39  * 1. Redistributions of source code must retain the above copyright
  40  *    notice, this list of conditions and the following disclaimer.
  41  * 2. Redistributions in binary form must reproduce the above copyright
  42  *    notice, this list of conditions and the following disclaimer in the
  43  *    documentation and/or other materials provided with the distribution.
  44  *
  45  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  46  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  47  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  48  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  49  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  50  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  51  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  52  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  53  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  54  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  55  */
  56
  57 #include <sys/param.h>
  58 #include <sys/systm.h>
  59 #include <sys/mbuf.h>
  60 #include <sys/filio.h>
  61 #include <sys/fcntl.h>
  62 #include <sys/socket.h>
  63 #include <sys/kernel.h>
  64 #include <sys/time.h>
  65 #include <sys/random.h>
  66 #include <sys/mcache.h>
  67
  68 #include <net/if.h>
  69 #include <net/if_types.h>
  70 #include <net/bpf.h>
  71 #include <net/route.h>
  72 #include <net/if_pflog.h>
  73
  74 #include <netinet/in.h>
  75 #include <netinet/in_var.h>
  76 #include <netinet/in_systm.h>
  77 #include <netinet/ip.h>
  78 #include <netinet/ip_var.h>
  79 #include <netinet/tcp.h>
  80 #include <netinet/tcp_seq.h>
  81 #include <netinet/tcp_fsm.h>
  82 #include <netinet/udp.h>
  83 #include <netinet/ip_icmp.h>
  84
  85 #if INET6
  86 #include <netinet/ip6.h>
  87 #endif /* INET6 */
  88
  89 #include <net/pfvar.h>
  90
  91 struct pf_frent {
  92         LIST_ENTRY(pf_frent)    fr_next;
  93         struct mbuf             *fr_m;
  94 #define fr_ip           fr_u.fru_ipv4
  95 #define fr_ip6          fr_u.fru_ipv6
  96         union {
  97                 struct ip       *fru_ipv4;
  98                 struct ip6_hdr  *fru_ipv6;
  99         } fr_u;
 100         struct ip6_frag         fr_ip6f_opt;
 101         int                     fr_ip6f_hlen;
 102 };
 103
 104 struct pf_frcache {
 105         LIST_ENTRY(pf_frcache) fr_next;
 106         uint16_t        fr_off;
 107         uint16_t        fr_end;
 108 };
 109
 110 #define PFFRAG_SEENLAST 0x0001          /* Seen the last fragment for this */
 111 #define PFFRAG_NOBUFFER 0x0002          /* Non-buffering fragment cache */
 112 #define PFFRAG_DROP     0x0004          /* Drop all fragments */
 113 #define BUFFER_FRAGMENTS(fr)    (!((fr)->fr_flags & PFFRAG_NOBUFFER))
 114
 115 struct pf_fragment {
 116         RB_ENTRY(pf_fragment) fr_entry;
 117         TAILQ_ENTRY(pf_fragment) frag_next;
 118         struct pf_addr  fr_srcx;
 119         struct pf_addr  fr_dstx;
 120         u_int8_t        fr_p;           /* protocol of this fragment */
 121         u_int8_t        fr_flags;       /* status flags */
 122         u_int16_t       fr_max;         /* fragment data max */
 123 #define fr_id           fr_uid.fru_id4
 124 #define fr_id6          fr_uid.fru_id6
 125         union {
 126                 u_int16_t       fru_id4;
 127                 u_int32_t       fru_id6;
 128         } fr_uid;
 129         int             fr_af;
 130         u_int32_t       fr_timeout;
 131 #define fr_queue        fr_u.fru_queue
 132 #define fr_cache        fr_u.fru_cache
 133         union {
 134                 LIST_HEAD(pf_fragq, pf_frent) fru_queue;        /* buffering */
 135                 LIST_HEAD(pf_cacheq, pf_frcache) fru_cache;     /* non-buf */
 136         } fr_u;
 137 };
 138
 139 static TAILQ_HEAD(pf_fragqueue, pf_fragment)    pf_fragqueue;
 140 static TAILQ_HEAD(pf_cachequeue, pf_fragment)   pf_cachequeue;
 141
 142 static __inline int  pf_frag_compare(struct pf_fragment *,
 143     struct pf_fragment *);
 144 static RB_HEAD(pf_frag_tree, pf_fragment)       pf_frag_tree, pf_cache_tree;
 145 RB_PROTOTYPE_SC(__private_extern__, pf_frag_tree, pf_fragment, fr_entry,
 146     pf_frag_compare);
 147 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
 148
 149 /* Private prototypes */
 150 static void pf_ip6hdr2key(struct pf_fragment *, struct ip6_hdr *,
 151     struct ip6_frag *);
 152 static void pf_ip2key(struct pf_fragment *, struct ip *);
 153 static void pf_remove_fragment(struct pf_fragment *);
 154 static void pf_flush_fragments(void);
 155 static void pf_free_fragment(struct pf_fragment *);
 156 static struct pf_fragment *pf_find_fragment_by_key(struct pf_fragment *,
 157     struct pf_frag_tree *);
 158 static __inline struct pf_fragment *
 159     pf_find_fragment_by_ipv4_header(struct ip *, struct pf_frag_tree *);
 160 static __inline struct pf_fragment *
 161     pf_find_fragment_by_ipv6_header(struct ip6_hdr *, struct ip6_frag *,
 162     struct pf_frag_tree *);
 163 static struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **,
 164     struct pf_frent *, int);
 165 static struct mbuf *pf_fragcache(struct mbuf **, struct ip *,
 166     struct pf_fragment **, int, int, int *);
 167 static struct mbuf *pf_reassemble6(struct mbuf **, struct pf_fragment **,
 168     struct pf_frent *, int);
 169 static struct mbuf *pf_frag6cache(struct mbuf **, struct ip6_hdr*,
 170     struct ip6_frag *, struct pf_fragment **, int, int, int, int *);
 171 static int pf_normalize_tcpopt(struct pf_rule *, int, struct pfi_kif *,
 172     struct pf_pdesc *, struct mbuf *, struct tcphdr *, int, int *);
 173
 174 #define DPFPRINTF(x) do {                               \
 175         if (pf_status.debug >= PF_DEBUG_MISC) {         \
 176                 printf("%s: ", __func__);               \
 177                 printf x ;                              \
 178         }                                               \
 179 } while (0)
 180
 181 /* Globals */
 182 struct pool              pf_frent_pl, pf_frag_pl;
 183 static struct pool       pf_cache_pl, pf_cent_pl;
 184 struct pool              pf_state_scrub_pl;
 185
 186 static int               pf_nfrents, pf_ncache;
 187
 188 void
 189 pf_normalize_init(void)
 190 {
 191         pool_init(&pf_frent_pl, sizeof (struct pf_frent), 0, 0, 0, "pffrent",
 192             NULL);
 193         pool_init(&pf_frag_pl, sizeof (struct pf_fragment), 0, 0, 0, "pffrag",
 194             NULL);
 195         pool_init(&pf_cache_pl, sizeof (struct pf_fragment), 0, 0, 0,
 196             "pffrcache", NULL);
 197         pool_init(&pf_cent_pl, sizeof (struct pf_frcache), 0, 0, 0, "pffrcent",
 198             NULL);
 199         pool_init(&pf_state_scrub_pl, sizeof (struct pf_state_scrub), 0, 0, 0,
 200             "pfstscr", NULL);
 201
 202         pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT);
 203         pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0);
 204         pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0);
 205         pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0);
 206
 207         TAILQ_INIT(&pf_fragqueue);
 208         TAILQ_INIT(&pf_cachequeue);
 209 }
 210
 211 #if 0
 212 void
 213 pf_normalize_destroy(void)
 214 {
 215         pool_destroy(&pf_state_scrub_pl);
 216         pool_destroy(&pf_cent_pl);
 217         pool_destroy(&pf_cache_pl);
 218         pool_destroy(&pf_frag_pl);
 219         pool_destroy(&pf_frent_pl);
 220 }
 221 #endif
 222
 223 int
 224 pf_normalize_isempty(void)
 225 {
 226         return (TAILQ_EMPTY(&pf_fragqueue) && TAILQ_EMPTY(&pf_cachequeue));
 227 }
 228
 229 static __inline int
 230 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
 231 {
 232         int     diff;
 233
 234         if ((diff = a->fr_af - b->fr_af))
 235                 return (diff);
 236         else if ((diff = a->fr_p - b->fr_p))
 237                 return (diff);
 238         else {
 239                 struct pf_addr *sa = &a->fr_srcx;
 240                 struct pf_addr *sb = &b->fr_srcx;
 241                 struct pf_addr *da = &a->fr_dstx;
 242                 struct pf_addr *db = &b->fr_dstx;
 243
 244                 switch (a->fr_af) {
 245 #ifdef INET
 246                 case AF_INET:
 247                         if ((diff = a->fr_id - b->fr_id))
 248                                 return (diff);
 249                         else if (sa->v4.s_addr < sb->v4.s_addr)
 250                                 return (-1);
 251                         else if (sa->v4.s_addr > sb->v4.s_addr)
 252                                 return (1);
 253                         else if (da->v4.s_addr < db->v4.s_addr)
 254                                 return (-1);
 255                         else if (da->v4.s_addr > db->v4.s_addr)
 256                                 return (1);
 257                         break;
 258 #endif
 259 #ifdef INET6
 260                 case AF_INET6:
 261                         if ((diff = a->fr_id6 - b->fr_id6))
 262                                 return (diff);
 263                         else if (sa->addr32[3] < sb->addr32[3])
 264                                 return (-1);
 265                         else if (sa->addr32[3] > sb->addr32[3])
 266                                 return (1);
 267                         else if (sa->addr32[2] < sb->addr32[2])
 268                                 return (-1);
 269                         else if (sa->addr32[2] > sb->addr32[2])
 270                                 return (1);
 271                         else if (sa->addr32[1] < sb->addr32[1])
 272                                 return (-1);
 273                         else if (sa->addr32[1] > sb->addr32[1])
 274                                 return (1);
 275                         else if (sa->addr32[0] < sb->addr32[0])
 276                                 return (-1);
 277                         else if (sa->addr32[0] > sb->addr32[0])
 278                                 return (1);
 279                         else if (da->addr32[3] < db->addr32[3])
 280                                 return (-1);
 281                         else if (da->addr32[3] > db->addr32[3])
 282                                 return (1);
 283                         else if (da->addr32[2] < db->addr32[2])
 284                                 return (-1);
 285                         else if (da->addr32[2] > db->addr32[2])
 286                                 return (1);
 287                         else if (da->addr32[1] < db->addr32[1])
 288                                 return (-1);
 289                         else if (da->addr32[1] > db->addr32[1])
 290                                 return (1);
 291                         else if (da->addr32[0] < db->addr32[0])
 292                                 return (-1);
 293                         else if (da->addr32[0] > db->addr32[0])
 294                                 return (1);
 295                         break;
 296 #endif
 297                 default:
 298                         VERIFY(!0 && "only IPv4 and IPv6 supported!");
 299                         break;
 300                 }
 301         }
 302         return (0);
 303 }
 304
 305 void
 306 pf_purge_expired_fragments(void)
 307 {
 308         struct pf_fragment *frag;
 309         u_int32_t expire = pf_time_second() -
 310             pf_default_rule.timeout[PFTM_FRAG];
 311
 312         while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
 313                 VERIFY(BUFFER_FRAGMENTS(frag));
 314                 if (frag->fr_timeout > expire)
 315                         break;
 316
 317                 switch (frag->fr_af) {
 318                 case AF_INET:
 319                       DPFPRINTF(("expiring IPv4 %d(0x%llx) from queue.\n",
 320                           ntohs(frag->fr_id),
 321                           (uint64_t)VM_KERNEL_ADDRPERM(frag)));
 322                       break;
 323                 case AF_INET6:
 324                       DPFPRINTF(("expiring IPv6 %d(0x%llx) from queue.\n",
 325                           ntohl(frag->fr_id6),
 326                           (uint64_t)VM_KERNEL_ADDRPERM(frag)));
 327                       break;
 328                 default:
 329                       VERIFY(0 && "only IPv4 and IPv6 supported");
 330                       break;
 331                 }
 332                 pf_free_fragment(frag);
 333         }
 334
 335         while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) {
 336                 VERIFY(!BUFFER_FRAGMENTS(frag));
 337                 if (frag->fr_timeout > expire)
 338                         break;
 339
 340                 switch (frag->fr_af) {
 341                 case AF_INET:
 342                       DPFPRINTF(("expiring IPv4 %d(0x%llx) from cache.\n",
 343                           ntohs(frag->fr_id),
 344                           (uint64_t)VM_KERNEL_ADDRPERM(frag)));
 345                       break;
 346                 case AF_INET6:
 347                       DPFPRINTF(("expiring IPv6 %d(0x%llx) from cache.\n",
 348                           ntohl(frag->fr_id6),
 349                           (uint64_t)VM_KERNEL_ADDRPERM(frag)));
 350                       break;
 351                 default:
 352                       VERIFY(0 && "only IPv4 and IPv6 supported");
 353                       break;
 354                 }
 355                 pf_free_fragment(frag);
 356                 VERIFY(TAILQ_EMPTY(&pf_cachequeue) ||
 357                     TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag);
 358         }
 359 }
 360
 361 /*
 362  * Try to flush old fragments to make space for new ones
 363  */
 364
 365 static void
 366 pf_flush_fragments(void)
 367 {
 368         struct pf_fragment      *frag;
 369         int                      goal;
 370
 371         goal = pf_nfrents * 9 / 10;
 372         DPFPRINTF(("trying to free > %d frents\n",
 373             pf_nfrents - goal));
 374         while (goal < pf_nfrents) {
 375                 frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue);
 376                 if (frag == NULL)
 377                         break;
 378                 pf_free_fragment(frag);
 379         }
 380
 381
 382         goal = pf_ncache * 9 / 10;
 383         DPFPRINTF(("trying to free > %d cache entries\n",
 384             pf_ncache - goal));
 385         while (goal < pf_ncache) {
 386                 frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue);
 387                 if (frag == NULL)
 388                         break;
 389                 pf_free_fragment(frag);
 390         }
 391 }
 392
 393 /* Frees the fragments and all associated entries */
 394
 395 static void
 396 pf_free_fragment(struct pf_fragment *frag)
 397 {
 398         struct pf_frent         *frent;
 399         struct pf_frcache       *frcache;
 400
 401         /* Free all fragments */
 402         if (BUFFER_FRAGMENTS(frag)) {
 403                 for (frent = LIST_FIRST(&frag->fr_queue); frent;
 404                     frent = LIST_FIRST(&frag->fr_queue)) {
 405                         LIST_REMOVE(frent, fr_next);
 406
 407                         m_freem(frent->fr_m);
 408                         pool_put(&pf_frent_pl, frent);
 409                         pf_nfrents--;
 410                 }
 411         } else {
 412                 for (frcache = LIST_FIRST(&frag->fr_cache); frcache;
 413                     frcache = LIST_FIRST(&frag->fr_cache)) {
 414                         LIST_REMOVE(frcache, fr_next);
 415
 416                         VERIFY(LIST_EMPTY(&frag->fr_cache) ||
 417                             LIST_FIRST(&frag->fr_cache)->fr_off >
 418                             frcache->fr_end);
 419
 420                         pool_put(&pf_cent_pl, frcache);
 421                         pf_ncache--;
 422                 }
 423         }
 424
 425         pf_remove_fragment(frag);
 426 }
 427
 428 static void
 429 pf_ip6hdr2key(struct pf_fragment *key, struct ip6_hdr *ip6,
 430     struct ip6_frag *fh)
 431 {
 432         key->fr_p = fh->ip6f_nxt;
 433         key->fr_id6 = fh->ip6f_ident;
 434         key->fr_af = AF_INET6;
 435         key->fr_srcx.v6 = ip6->ip6_src;
 436         key->fr_dstx.v6 = ip6->ip6_dst;
 437 }
 438
 439 static void
 440 pf_ip2key(struct pf_fragment *key, struct ip *ip)
 441 {
 442         key->fr_p = ip->ip_p;
 443         key->fr_id = ip->ip_id;
 444         key->fr_af = AF_INET;
 445         key->fr_srcx.v4.s_addr = ip->ip_src.s_addr;
 446         key->fr_dstx.v4.s_addr = ip->ip_dst.s_addr;
 447 }
 448
 449 static struct pf_fragment *
 450 pf_find_fragment_by_key(struct pf_fragment *key, struct pf_frag_tree *tree)
 451 {
 452         struct pf_fragment *frag;
 453
 454         frag = RB_FIND(pf_frag_tree, tree, key);
 455         if (frag != NULL) {
 456                 /* XXX Are we sure we want to update the timeout? */
 457                 frag->fr_timeout = pf_time_second();
 458                 if (BUFFER_FRAGMENTS(frag)) {
 459                         TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
 460                         TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
 461                 } else {
 462                         TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
 463                         TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next);
 464                 }
 465         }
 466
 467         return (frag);
 468 }
 469
 470 static __inline struct pf_fragment *
 471 pf_find_fragment_by_ipv4_header(struct ip *ip, struct pf_frag_tree *tree)
 472 {
 473         struct pf_fragment key;
 474         pf_ip2key(&key, ip);
 475         return pf_find_fragment_by_key(&key, tree);
 476 }
 477
 478 static __inline struct pf_fragment *
 479 pf_find_fragment_by_ipv6_header(struct ip6_hdr *ip6, struct ip6_frag *fh,
 480     struct pf_frag_tree *tree)
 481 {
 482       struct pf_fragment key;
 483       pf_ip6hdr2key(&key, ip6, fh);
 484       return pf_find_fragment_by_key(&key, tree);
 485 }
 486
 487 /* Removes a fragment from the fragment queue and frees the fragment */
 488
 489 static void
 490 pf_remove_fragment(struct pf_fragment *frag)
 491 {
 492         if (BUFFER_FRAGMENTS(frag)) {
 493                 RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag);
 494                 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
 495                 pool_put(&pf_frag_pl, frag);
 496         } else {
 497                 RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag);
 498                 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
 499                 pool_put(&pf_cache_pl, frag);
 500         }
 501 }
 502
 503 #define FR_IP_OFF(fr)   ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
 504 static struct mbuf *
 505 pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
 506     struct pf_frent *frent, int mff)
 507 {
 508         struct mbuf     *m = *m0, *m2;
 509         struct pf_frent *frea, *next;
 510         struct pf_frent *frep = NULL;
 511         struct ip       *ip = frent->fr_ip;
 512         int              hlen = ip->ip_hl << 2;
 513         u_int16_t        off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
 514         u_int16_t        ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4;
 515         u_int16_t        fr_max = ip_len + off;
 516
 517         VERIFY(*frag == NULL || BUFFER_FRAGMENTS(*frag));
 518
 519         /* Strip off ip header */
 520         m->m_data += hlen;
 521         m->m_len -= hlen;
 522
 523         /* Create a new reassembly queue for this packet */
 524         if (*frag == NULL) {
 525                 *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
 526                 if (*frag == NULL) {
 527                         pf_flush_fragments();
 528                         *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
 529                         if (*frag == NULL)
 530                                 goto drop_fragment;
 531                 }
 532
 533                 (*frag)->fr_flags = 0;
 534                 (*frag)->fr_max = 0;
 535                 (*frag)->fr_af = AF_INET;
 536                 (*frag)->fr_srcx.v4 = frent->fr_ip->ip_src;
 537                 (*frag)->fr_dstx.v4 = frent->fr_ip->ip_dst;
 538                 (*frag)->fr_p = frent->fr_ip->ip_p;
 539                 (*frag)->fr_id = frent->fr_ip->ip_id;
 540                 (*frag)->fr_timeout = pf_time_second();
 541                 LIST_INIT(&(*frag)->fr_queue);
 542
 543                 RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
 544                 TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
 545
 546                 /* We do not have a previous fragment */
 547                 frep = NULL;
 548                 goto insert;
 549         }
 550
 551         /*
 552          * Find a fragment after the current one:
 553          *  - off contains the real shifted offset.
 554          */
 555         LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
 556                 if (FR_IP_OFF(frea) > off)
 557                         break;
 558                 frep = frea;
 559         }
 560
 561         VERIFY(frep != NULL || frea != NULL);
 562
 563         if (frep != NULL &&
 564             FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl *
 565             4 > off) {
 566                 u_int16_t       precut;
 567
 568                 precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
 569                     frep->fr_ip->ip_hl * 4 - off;
 570                 if (precut >= ip_len)
 571                         goto drop_fragment;
 572                 m_adj(frent->fr_m, precut);
 573                 DPFPRINTF(("overlap -%d\n", precut));
 574                 /* Enforce 8 byte boundaries */
 575                 ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3));
 576                 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
 577                 ip_len -= precut;
 578                 ip->ip_len = htons(ip_len);
 579         }
 580
 581         for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
 582             frea = next) {
 583                 u_int16_t       aftercut;
 584
 585                 aftercut = ip_len + off - FR_IP_OFF(frea);
 586                 DPFPRINTF(("adjust overlap %d\n", aftercut));
 587                 if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl
 588                     * 4) {
 589                         frea->fr_ip->ip_len =
 590                             htons(ntohs(frea->fr_ip->ip_len) - aftercut);
 591                         frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) +
 592                             (aftercut >> 3));
 593                         m_adj(frea->fr_m, aftercut);
 594                         break;
 595                 }
 596
 597                 /* This fragment is completely overlapped, lose it */
 598                 next = LIST_NEXT(frea, fr_next);
 599                 m_freem(frea->fr_m);
 600                 LIST_REMOVE(frea, fr_next);
 601                 pool_put(&pf_frent_pl, frea);
 602                 pf_nfrents--;
 603         }
 604
 605 insert:
 606         /* Update maximum data size */
 607         if ((*frag)->fr_max < fr_max)
 608                 (*frag)->fr_max = fr_max;
 609         /* This is the last segment */
 610         if (!mff)
 611                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
 612
 613         if (frep == NULL)
 614                 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
 615         else
 616                 LIST_INSERT_AFTER(frep, frent, fr_next);
 617
 618         /* Check if we are completely reassembled */
 619         if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
 620                 return (NULL);
 621
 622         /* Check if we have all the data */
 623         off = 0;
 624         for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
 625                 next = LIST_NEXT(frep, fr_next);
 626
 627                 off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4;
 628                 if (off < (*frag)->fr_max &&
 629                     (next == NULL || FR_IP_OFF(next) != off)) {
 630                         DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
 631                             off, next == NULL ? -1 : FR_IP_OFF(next),
 632                             (*frag)->fr_max));
 633                         return (NULL);
 634                 }
 635         }
 636         DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
 637         if (off < (*frag)->fr_max)
 638                 return (NULL);
 639
 640         /* We have all the data */
 641         frent = LIST_FIRST(&(*frag)->fr_queue);
 642         VERIFY(frent != NULL);
 643         if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) {
 644                 DPFPRINTF(("drop: too big: %d\n", off));
 645                 pf_free_fragment(*frag);
 646                 *frag = NULL;
 647                 return (NULL);
 648         }
 649         next = LIST_NEXT(frent, fr_next);
 650
 651         /* Magic from ip_input */
 652         ip = frent->fr_ip;
 653         m = frent->fr_m;
 654         m2 = m->m_next;
 655         m->m_next = NULL;
 656         m_cat(m, m2);
 657         pool_put(&pf_frent_pl, frent);
 658         pf_nfrents--;
 659         for (frent = next; frent != NULL; frent = next) {
 660                 next = LIST_NEXT(frent, fr_next);
 661
 662                 m2 = frent->fr_m;
 663                 pool_put(&pf_frent_pl, frent);
 664                 pf_nfrents--;
 665                 m_cat(m, m2);
 666         }
 667
 668         ip->ip_src = (*frag)->fr_srcx.v4;
 669         ip->ip_dst = (*frag)->fr_dstx.v4;
 670
 671         /* Remove from fragment queue */
 672         pf_remove_fragment(*frag);
 673         *frag = NULL;
 674
 675         hlen = ip->ip_hl << 2;
 676         ip->ip_len = htons(off + hlen);
 677         m->m_len += hlen;
 678         m->m_data -= hlen;
 679
 680         /* some debugging cruft by sklower, below, will go away soon */
 681         /* XXX this should be done elsewhere */
 682         if (m->m_flags & M_PKTHDR) {
 683                 int plen = 0;
 684                 for (m2 = m; m2; m2 = m2->m_next)
 685                         plen += m2->m_len;
 686                 m->m_pkthdr.len = plen;
 687         }
 688
 689         DPFPRINTF(("complete: 0x%llx(%d)\n",
 690             (uint64_t)VM_KERNEL_ADDRPERM(m), ntohs(ip->ip_len)));
 691         return (m);
 692
 693 drop_fragment:
 694         /* Oops - fail safe - drop packet */
 695         pool_put(&pf_frent_pl, frent);
 696         pf_nfrents--;
 697         m_freem(m);
 698         return (NULL);
 699 }
 700
 701 static struct mbuf *
 702 pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
 703     int drop, int *nomem)
 704 {
 705         struct mbuf             *m = *m0;
 706         struct pf_frcache       *frp, *fra, *cur = NULL;
 707         int                      ip_len = ntohs(h->ip_len) - (h->ip_hl << 2);
 708         u_int16_t                off = ntohs(h->ip_off) << 3;
 709         u_int16_t                fr_max = ip_len + off;
 710         int                      hosed = 0;
 711
 712         VERIFY(*frag == NULL || !BUFFER_FRAGMENTS(*frag));
 713
 714         /* Create a new range queue for this packet */
 715         if (*frag == NULL) {
 716                 *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
 717                 if (*frag == NULL) {
 718                         pf_flush_fragments();
 719                         *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
 720                         if (*frag == NULL)
 721                                 goto no_mem;
 722                 }
 723
 724                 /* Get an entry for the queue */
 725                 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
 726                 if (cur == NULL) {
 727                         pool_put(&pf_cache_pl, *frag);
 728                         *frag = NULL;
 729                         goto no_mem;
 730                 }
 731                 pf_ncache++;
 732
 733                 (*frag)->fr_flags = PFFRAG_NOBUFFER;
 734                 (*frag)->fr_max = 0;
 735                 (*frag)->fr_af = AF_INET;
 736                 (*frag)->fr_srcx.v4 = h->ip_src;
 737                 (*frag)->fr_dstx.v4 = h->ip_dst;
 738                 (*frag)->fr_p = h->ip_p;
 739                 (*frag)->fr_id = h->ip_id;
 740                 (*frag)->fr_timeout = pf_time_second();
 741
 742                 cur->fr_off = off;
 743                 cur->fr_end = fr_max;
 744                 LIST_INIT(&(*frag)->fr_cache);
 745                 LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
 746
 747                 RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
 748                 TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
 749
 750                 DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off,
 751                     fr_max));
 752
 753                 goto pass;
 754         }
 755
 756         /*
 757          * Find a fragment after the current one:
 758          *  - off contains the real shifted offset.
 759          */
 760         frp = NULL;
 761         LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
 762                 if (fra->fr_off > off)
 763                         break;
 764                 frp = fra;
 765         }
 766
 767         VERIFY(frp != NULL || fra != NULL);
 768
 769         if (frp != NULL) {
 770                 int     precut;
 771
 772                 precut = frp->fr_end - off;
 773                 if (precut >= ip_len) {
 774                         /* Fragment is entirely a duplicate */
 775                         DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
 776                             h->ip_id, frp->fr_off, frp->fr_end, off, fr_max));
 777                         goto drop_fragment;
 778                 }
 779                 if (precut == 0) {
 780                         /* They are adjacent.  Fixup cache entry */
 781                         DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
 782                             h->ip_id, frp->fr_off, frp->fr_end, off, fr_max));
 783                         frp->fr_end = fr_max;
 784                 } else if (precut > 0) {
 785                         /*
 786                          * The first part of this payload overlaps with a
 787                          * fragment that has already been passed.
 788                          * Need to trim off the first part of the payload.
 789                          * But to do so easily, we need to create another
 790                          * mbuf to throw the original header into.
 791                          */
 792
 793                         DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
 794                             h->ip_id, precut, frp->fr_off, frp->fr_end, off,
 795                             fr_max));
 796
 797                         off += precut;
 798                         fr_max -= precut;
 799                         /* Update the previous frag to encompass this one */
 800                         frp->fr_end = fr_max;
 801
 802                         if (!drop) {
 803                                 /*
 804                                  * XXX Optimization opportunity
 805                                  * This is a very heavy way to trim the payload.
 806                                  * we could do it much faster by diddling mbuf
 807                                  * internals but that would be even less legible
 808                                  * than this mbuf magic.  For my next trick,
 809                                  * I'll pull a rabbit out of my laptop.
 810                                  */
 811                                 *m0 = m_copym(m, 0, h->ip_hl << 2, M_NOWAIT);
 812                                 if (*m0 == NULL)
 813                                         goto no_mem;
 814                                 VERIFY((*m0)->m_next == NULL);
 815                                 m_adj(m, precut + (h->ip_hl << 2));
 816                                 m_cat(*m0, m);
 817                                 m = *m0;
 818                                 if (m->m_flags & M_PKTHDR) {
 819                                         int plen = 0;
 820                                         struct mbuf *t;
 821                                         for (t = m; t; t = t->m_next)
 822                                                 plen += t->m_len;
 823                                         m->m_pkthdr.len = plen;
 824                                 }
 825
 826
 827                                 h = mtod(m, struct ip *);
 828
 829
 830                                 VERIFY((int)m->m_len ==
 831                                     ntohs(h->ip_len) - precut);
 832                                 h->ip_off = htons(ntohs(h->ip_off) +
 833                                     (precut >> 3));
 834                                 h->ip_len = htons(ntohs(h->ip_len) - precut);
 835                         } else {
 836                                 hosed++;
 837                         }
 838                 } else {
 839                         /* There is a gap between fragments */
 840
 841                         DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
 842                             h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
 843                             fr_max));
 844
 845                         cur = pool_get(&pf_cent_pl, PR_NOWAIT);
 846                         if (cur == NULL)
 847                                 goto no_mem;
 848                         pf_ncache++;
 849
 850                         cur->fr_off = off;
 851                         cur->fr_end = fr_max;
 852                         LIST_INSERT_AFTER(frp, cur, fr_next);
 853                 }
 854         }
 855
 856         if (fra != NULL) {
 857                 int     aftercut;
 858                 int     merge = 0;
 859
 860                 aftercut = fr_max - fra->fr_off;
 861                 if (aftercut == 0) {
 862                         /* Adjacent fragments */
 863                         DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
 864                             h->ip_id, off, fr_max, fra->fr_off, fra->fr_end));
 865                         fra->fr_off = off;
 866                         merge = 1;
 867                 } else if (aftercut > 0) {
 868                         /* Need to chop off the tail of this fragment */
 869                         DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
 870                             h->ip_id, aftercut, off, fr_max, fra->fr_off,
 871                             fra->fr_end));
 872                         fra->fr_off = off;
 873                         fr_max -= aftercut;
 874
 875                         merge = 1;
 876
 877                         if (!drop) {
 878                                 m_adj(m, -aftercut);
 879                                 if (m->m_flags & M_PKTHDR) {
 880                                         int plen = 0;
 881                                         struct mbuf *t;
 882                                         for (t = m; t; t = t->m_next)
 883                                                 plen += t->m_len;
 884                                         m->m_pkthdr.len = plen;
 885                                 }
 886                                 h = mtod(m, struct ip *);
 887                                 VERIFY((int)m->m_len ==
 888                                     ntohs(h->ip_len) - aftercut);
 889                                 h->ip_len = htons(ntohs(h->ip_len) - aftercut);
 890                         } else {
 891                                 hosed++;
 892                         }
 893                 } else if (frp == NULL) {
 894                         /* There is a gap between fragments */
 895                         DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
 896                             h->ip_id, -aftercut, off, fr_max, fra->fr_off,
 897                             fra->fr_end));
 898
 899                         cur = pool_get(&pf_cent_pl, PR_NOWAIT);
 900                         if (cur == NULL)
 901                                 goto no_mem;
 902                         pf_ncache++;
 903
 904                         cur->fr_off = off;
 905                         cur->fr_end = fr_max;
 906                         LIST_INSERT_BEFORE(fra, cur, fr_next);
 907                 }
 908
 909
 910                 /* Need to glue together two separate fragment descriptors */
 911                 if (merge) {
 912                         if (cur && fra->fr_off <= cur->fr_end) {
 913                                 /* Need to merge in a previous 'cur' */
 914                                 DPFPRINTF(("fragcache[%d]: adjacent(merge "
 915                                     "%d-%d) %d-%d (%d-%d)\n",
 916                                     h->ip_id, cur->fr_off, cur->fr_end, off,
 917                                     fr_max, fra->fr_off, fra->fr_end));
 918                                 fra->fr_off = cur->fr_off;
 919                                 LIST_REMOVE(cur, fr_next);
 920                                 pool_put(&pf_cent_pl, cur);
 921                                 pf_ncache--;
 922                                 cur = NULL;
 923
 924                         } else if (frp && fra->fr_off <= frp->fr_end) {
 925                                 /* Need to merge in a modified 'frp' */
 926                                 VERIFY(cur == NULL);
 927                                 DPFPRINTF(("fragcache[%d]: adjacent(merge "
 928                                     "%d-%d) %d-%d (%d-%d)\n",
 929                                     h->ip_id, frp->fr_off, frp->fr_end, off,
 930                                     fr_max, fra->fr_off, fra->fr_end));
 931                                 fra->fr_off = frp->fr_off;
 932                                 LIST_REMOVE(frp, fr_next);
 933                                 pool_put(&pf_cent_pl, frp);
 934                                 pf_ncache--;
 935                                 frp = NULL;
 936
 937                         }
 938                 }
 939         }
 940
 941         if (hosed) {
 942                 /*
 943                  * We must keep tracking the overall fragment even when
 944                  * we're going to drop it anyway so that we know when to
 945                  * free the overall descriptor.  Thus we drop the frag late.
 946                  */
 947                 goto drop_fragment;
 948         }
 949
 950
 951 pass:
 952         /* Update maximum data size */
 953         if ((*frag)->fr_max < fr_max)
 954                 (*frag)->fr_max = fr_max;
 955
 956         /* This is the last segment */
 957         if (!mff)
 958                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
 959
 960         /* Check if we are completely reassembled */
 961         if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
 962             LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
 963             LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) {
 964                 /* Remove from fragment queue */
 965                 DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
 966                     (*frag)->fr_max));
 967                 pf_free_fragment(*frag);
 968                 *frag = NULL;
 969         }
 970
 971         return (m);
 972
 973 no_mem:
 974         *nomem = 1;
 975
 976         /* Still need to pay attention to !IP_MF */
 977         if (!mff && *frag != NULL)
 978                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
 979
 980         m_freem(m);
 981         return (NULL);
 982
 983 drop_fragment:
 984
 985         /* Still need to pay attention to !IP_MF */
 986         if (!mff && *frag != NULL)
 987                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
 988
 989         if (drop) {
 990                 /* This fragment has been deemed bad.  Don't reass */
 991                 if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
 992                         DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
 993                             h->ip_id));
 994                 (*frag)->fr_flags |= PFFRAG_DROP;
 995         }
 996
 997         m_freem(m);
 998         return (NULL);
 999 }
1000
1001 #define FR_IP6_OFF(fr) \
1002         (ntohs((fr)->fr_ip6f_opt.ip6f_offlg & IP6F_OFF_MASK))
1003 #define FR_IP6_PLEN(fr) (ntohs((fr)->fr_ip6->ip6_plen))
1004 struct mbuf *
1005 pf_reassemble6(struct mbuf **m0, struct pf_fragment **frag,
1006     struct pf_frent *frent, int mff)
1007 {
1008         struct mbuf *m, *m2;
1009         struct pf_frent *frea, *frep, *next;
1010         struct ip6_hdr *ip6;
1011         int plen, off, fr_max;
1012
1013         VERIFY(*frag == NULL || BUFFER_FRAGMENTS(*frag));
1014         m = *m0;
1015         frep = NULL;
1016         ip6 = frent->fr_ip6;
1017         off = FR_IP6_OFF(frent);
1018         plen = FR_IP6_PLEN(frent);
1019         fr_max = off + plen - (frent->fr_ip6f_hlen - sizeof *ip6);
1020
1021         DPFPRINTF(("0x%llx IPv6 frag plen %u off %u fr_ip6f_hlen %u "
1022             "fr_max %u m_len %u\n", (uint64_t)VM_KERNEL_ADDRPERM(m), plen, off,
1023             frent->fr_ip6f_hlen, fr_max, m->m_len));
1024
1025         /* strip off headers up to the fragment payload */
1026         m->m_data += frent->fr_ip6f_hlen;
1027         m->m_len -= frent->fr_ip6f_hlen;
1028
1029         /* Create a new reassembly queue for this packet */
1030         if (*frag == NULL) {
1031                 *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
1032                 if (*frag == NULL) {
1033                         pf_flush_fragments();
1034                         *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
1035                         if (*frag == NULL)
1036                                 goto drop_fragment;
1037                 }
1038
1039                 (*frag)->fr_flags = 0;
1040                 (*frag)->fr_max = 0;
1041                 (*frag)->fr_af = AF_INET6;
1042                 (*frag)->fr_srcx.v6 = frent->fr_ip6->ip6_src;
1043                 (*frag)->fr_dstx.v6 = frent->fr_ip6->ip6_dst;
1044                 (*frag)->fr_p = frent->fr_ip6f_opt.ip6f_nxt;
1045                 (*frag)->fr_id6 = frent->fr_ip6f_opt.ip6f_ident;
1046                 (*frag)->fr_timeout = pf_time_second();
1047                 LIST_INIT(&(*frag)->fr_queue);
1048
1049                 RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
1050                 TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
1051
1052                 /* We do not have a previous fragment */
1053                 frep = NULL;
1054                 goto insert;
1055         }
1056
1057         /*
1058          * Find a fragment after the current one:
1059          *  - off contains the real shifted offset.
1060          */
1061         LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
1062                 if (FR_IP6_OFF(frea) > off)
1063                         break;
1064                 frep = frea;
1065         }
1066
1067         VERIFY(frep != NULL || frea != NULL);
1068
1069         if (frep != NULL &&
1070             FR_IP6_OFF(frep) + FR_IP6_PLEN(frep) - frep->fr_ip6f_hlen > off)
1071         {
1072                 u_int16_t precut;
1073
1074                 precut = FR_IP6_OFF(frep) + FR_IP6_PLEN(frep) -
1075                     frep->fr_ip6f_hlen - off;
1076                 if (precut >= plen)
1077                         goto drop_fragment;
1078                 m_adj(frent->fr_m, precut);
1079                 DPFPRINTF(("overlap -%d\n", precut));
1080                 /* Enforce 8 byte boundaries */
1081                 frent->fr_ip6f_opt.ip6f_offlg =
1082                     htons(ntohs(frent->fr_ip6f_opt.ip6f_offlg) +
1083                     (precut >> 3));
1084                 off = FR_IP6_OFF(frent);
1085                 plen -= precut;
1086                 ip6->ip6_plen = htons(plen);
1087         }
1088
1089         for (; frea != NULL && plen + off > FR_IP6_OFF(frea); frea = next) {
1090                 u_int16_t       aftercut;
1091
1092                 aftercut = plen + off - FR_IP6_OFF(frea);
1093                 DPFPRINTF(("adjust overlap %d\n", aftercut));
1094                 if (aftercut < FR_IP6_PLEN(frea) - frea->fr_ip6f_hlen) {
1095                         frea->fr_ip6->ip6_plen = htons(FR_IP6_PLEN(frea) -
1096                                 aftercut);
1097                         frea->fr_ip6f_opt.ip6f_offlg =
1098                             htons(ntohs(frea->fr_ip6f_opt.ip6f_offlg) +
1099                             (aftercut >> 3));
1100                         m_adj(frea->fr_m, aftercut);
1101                         break;
1102                 }
1103
1104                 /* This fragment is completely overlapped, lose it */
1105                 next = LIST_NEXT(frea, fr_next);
1106                 m_freem(frea->fr_m);
1107                 LIST_REMOVE(frea, fr_next);
1108                 pool_put(&pf_frent_pl, frea);
1109                 pf_nfrents--;
1110         }
1111
1112   insert:
1113         /* Update maximum data size */
1114         if ((*frag)->fr_max < fr_max)
1115                 (*frag)->fr_max = fr_max;
1116         /* This is the last segment */
1117         if (!mff)
1118                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1119
1120         if (frep == NULL)
1121                 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
1122         else
1123                 LIST_INSERT_AFTER(frep, frent, fr_next);
1124
1125         /* Check if we are completely reassembled */
1126         if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
1127                 return (NULL);
1128
1129         /* Check if we have all the data */
1130         off = 0;
1131         for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
1132                 next = LIST_NEXT(frep, fr_next);
1133                 off += FR_IP6_PLEN(frep) - (frent->fr_ip6f_hlen - sizeof *ip6);
1134                 DPFPRINTF(("frep at %d, next %d, max %d\n",
1135                         off, next == NULL ? -1 : FR_IP6_OFF(next),
1136                         (*frag)->fr_max));
1137                 if (off < (*frag)->fr_max &&
1138                     (next == NULL || FR_IP6_OFF(next) != off)) {
1139                         DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
1140                             off, next == NULL ? -1 : FR_IP6_OFF(next),
1141                             (*frag)->fr_max));
1142                         return (NULL);
1143                 }
1144         }
1145         DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
1146         if (off < (*frag)->fr_max)
1147                 return (NULL);
1148
1149         /* We have all the data */
1150         frent = LIST_FIRST(&(*frag)->fr_queue);
1151         VERIFY(frent != NULL);
1152         if (frent->fr_ip6f_hlen + off > IP_MAXPACKET) {
1153                 DPFPRINTF(("drop: too big: %d\n", off));
1154                 pf_free_fragment(*frag);
1155                 *frag = NULL;
1156                 return (NULL);
1157         }
1158
1159         ip6 = frent->fr_ip6;
1160         ip6->ip6_nxt = (*frag)->fr_p;
1161         ip6->ip6_plen = htons(off);
1162         ip6->ip6_src = (*frag)->fr_srcx.v6;
1163         ip6->ip6_dst = (*frag)->fr_dstx.v6;
1164
1165         /* Remove from fragment queue */
1166         pf_remove_fragment(*frag);
1167         *frag = NULL;
1168
1169         m = frent->fr_m;
1170         m->m_len += sizeof(struct ip6_hdr);
1171         m->m_data -= sizeof(struct ip6_hdr);
1172         memmove(m->m_data, ip6, sizeof(struct ip6_hdr));
1173
1174         next = LIST_NEXT(frent, fr_next);
1175         pool_put(&pf_frent_pl, frent);
1176         pf_nfrents--;
1177         for (frent = next; next != NULL; frent = next) {
1178                 m2 = frent->fr_m;
1179
1180                 m_cat(m, m2);
1181                 next = LIST_NEXT(frent, fr_next);
1182                 pool_put(&pf_frent_pl, frent);
1183                 pf_nfrents--;
1184         }
1185
1186         /* XXX this should be done elsewhere */
1187         if (m->m_flags & M_PKTHDR) {
1188                 int pktlen = 0;
1189                 for (m2 = m; m2; m2 = m2->m_next)
1190                         pktlen += m2->m_len;
1191                 m->m_pkthdr.len = pktlen;
1192         }
1193
1194         DPFPRINTF(("complete: 0x%llx ip6_plen %d m_pkthdr.len %d\n",
1195             (uint64_t)VM_KERNEL_ADDRPERM(m), ntohs(ip6->ip6_plen),
1196             m->m_pkthdr.len));
1197
1198         return m;
1199
1200  drop_fragment:
1201         /* Oops - fail safe - drop packet */
1202         pool_put(&pf_frent_pl, frent);
1203         --pf_nfrents;
1204         m_freem(m);
1205         return NULL;
1206 }
1207
1208 static struct mbuf *
1209 pf_frag6cache(struct mbuf **m0, struct ip6_hdr *h, struct ip6_frag *fh,
1210     struct pf_fragment **frag, int hlen, int mff, int drop, int *nomem)
1211 {
1212         struct mbuf *m = *m0;
1213         u_int16_t plen, off, fr_max;
1214         struct pf_frcache *frp, *fra, *cur = NULL;
1215         int hosed = 0;
1216
1217         VERIFY(*frag == NULL || !BUFFER_FRAGMENTS(*frag));
1218         m = *m0;
1219         off = ntohs(fh->ip6f_offlg & IP6F_OFF_MASK);
1220         plen = ntohs(h->ip6_plen) - (hlen - sizeof *h);
1221
1222         /*
1223          * Apple Modification: dimambro@apple.com. The hlen, being passed
1224          * into this function Includes all the headers associated with
1225          * the packet, and may include routing headers, so to get to
1226          * the data payload as stored in the original IPv6 header we need
1227          * to subtract al those headers and the IP header.
1228          *
1229          * The 'max' local variable should also contain the offset from the start
1230          * of the reassembled packet to the octet just past the end of the octets
1231          * in the current fragment where:
1232          * - 'off' is the offset from the start of the reassembled packet to the
1233          *    first octet in the fragment,
1234          * - 'plen' is the length of the "payload data length" Excluding all the
1235          *   IPv6 headers of the fragment.
1236          * - 'hlen' is computed in pf_normalize_ip6() as the offset from the start
1237          *   of the IPv6 packet to the beginning of the data.
1238          */
1239         fr_max = off + plen;
1240
1241         DPFPRINTF(("0x%llx plen %u off %u fr_max %u\n",
1242             (uint64_t)VM_KERNEL_ADDRPERM(m), plen, off, fr_max));
1243
1244         /* Create a new range queue for this packet */
1245         if (*frag == NULL) {
1246                 *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
1247                 if (*frag == NULL) {
1248                         pf_flush_fragments();
1249                         *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
1250                         if (*frag == NULL)
1251                                 goto no_mem;
1252                 }
1253
1254                 /* Get an entry for the queue */
1255                 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1256                 if (cur == NULL) {
1257                         pool_put(&pf_cache_pl, *frag);
1258                         *frag = NULL;
1259                         goto no_mem;
1260                 }
1261                 pf_ncache++;
1262
1263                 (*frag)->fr_flags = PFFRAG_NOBUFFER;
1264                 (*frag)->fr_max = 0;
1265                 (*frag)->fr_af = AF_INET6;
1266                 (*frag)->fr_srcx.v6 = h->ip6_src;
1267                 (*frag)->fr_dstx.v6 = h->ip6_dst;
1268                 (*frag)->fr_p = fh->ip6f_nxt;
1269                 (*frag)->fr_id6 = fh->ip6f_ident;
1270                 (*frag)->fr_timeout = pf_time_second();
1271
1272                 cur->fr_off = off;
1273                 cur->fr_end = fr_max;
1274                 LIST_INIT(&(*frag)->fr_cache);
1275                 LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
1276
1277                 RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
1278                 TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
1279
1280                 DPFPRINTF(("frag6cache[%d]: new %d-%d\n", ntohl(fh->ip6f_ident),
1281                     off, fr_max));
1282
1283                 goto pass;
1284         }
1285
1286         /*
1287          * Find a fragment after the current one:
1288          *  - off contains the real shifted offset.
1289          */
1290         frp = NULL;
1291         LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
1292                 if (fra->fr_off > off)
1293                         break;
1294                 frp = fra;
1295         }
1296
1297         VERIFY(frp != NULL || fra != NULL);
1298
1299         if (frp != NULL) {
1300                 int precut;
1301
1302                 precut = frp->fr_end - off;
1303                 if (precut >= plen) {
1304                         /* Fragment is entirely a duplicate */
1305                         DPFPRINTF(("frag6cache[%u]: dead (%d-%d) %d-%d\n",
1306                             ntohl(fh->ip6f_ident), frp->fr_off, frp->fr_end,
1307                             off, fr_max));
1308                         goto drop_fragment;
1309                 }
1310                 if (precut == 0) {
1311                         /* They are adjacent.  Fixup cache entry */
1312                         DPFPRINTF(("frag6cache[%u]: adjacent (%d-%d) %d-%d\n",
1313                             ntohl(fh->ip6f_ident), frp->fr_off, frp->fr_end,
1314                             off, fr_max));
1315                         frp->fr_end = fr_max;
1316                 } else if (precut > 0) {
1317                         /* The first part of this payload overlaps with a
1318                          * fragment that has already been passed.
1319                          * Need to trim off the first part of the payload.
1320                          * But to do so easily, we need to create another
1321                          * mbuf to throw the original header into.
1322                          */
1323
1324                         DPFPRINTF(("frag6cache[%u]: chop %d (%d-%d) %d-%d\n",
1325                             ntohl(fh->ip6f_ident), precut, frp->fr_off,
1326                             frp->fr_end, off, fr_max));
1327
1328                         off += precut;
1329                         fr_max -= precut;
1330                         /* Update the previous frag to encompass this one */
1331                         frp->fr_end = fr_max;
1332
1333                         if (!drop) {
1334                                 /* XXX Optimization opportunity
1335                                  * This is a very heavy way to trim the payload.
1336                                  * we could do it much faster by diddling mbuf
1337                                  * internals but that would be even less legible
1338                                  * than this mbuf magic.  For my next trick,
1339                                  * I'll pull a rabbit out of my laptop.
1340                                  */
1341                                 *m0 = m_copym(m, 0, hlen, M_NOWAIT);
1342                                 if (*m0 == NULL)
1343                                         goto no_mem;
1344                                 VERIFY((*m0)->m_next == NULL);
1345                                 m_adj(m, precut + hlen);
1346                                 m_cat(*m0, m);
1347                                 m = *m0;
1348                                 if (m->m_flags & M_PKTHDR) {
1349                                         int pktlen = 0;
1350                                         struct mbuf *t;
1351                                         for (t = m; t; t = t->m_next)
1352                                                 pktlen += t->m_len;
1353                                         m->m_pkthdr.len = pktlen;
1354                                 }
1355
1356                                 h = mtod(m, struct ip6_hdr *);
1357
1358                                 VERIFY((int)m->m_len ==
1359                                     ntohs(h->ip6_plen) - precut);
1360                                 fh->ip6f_offlg &= ~IP6F_OFF_MASK;
1361                                 fh->ip6f_offlg |=
1362                                     htons(ntohs(fh->ip6f_offlg & IP6F_OFF_MASK)
1363                                     + (precut >> 3));
1364                                 h->ip6_plen = htons(ntohs(h->ip6_plen) -
1365                                     precut);
1366                         } else {
1367                                 hosed++;
1368                         }
1369                 } else {
1370                         /* There is a gap between fragments */
1371
1372                         DPFPRINTF(("frag6cache[%u]: gap %d (%d-%d) %d-%d\n",
1373                             ntohl(fh->ip6f_ident), -precut, frp->fr_off,
1374                             frp->fr_end, off, fr_max));
1375
1376                         cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1377                         if (cur == NULL)
1378                                 goto no_mem;
1379                         pf_ncache++;
1380
1381                         cur->fr_off = off;
1382                         cur->fr_end = fr_max;
1383                         LIST_INSERT_AFTER(frp, cur, fr_next);
1384                 }
1385         }
1386
1387         if (fra != NULL) {
1388                 int     aftercut;
1389                 int     merge = 0;
1390
1391                 aftercut = fr_max - fra->fr_off;
1392                 if (aftercut == 0) {
1393                         /* Adjacent fragments */
1394                         DPFPRINTF(("frag6cache[%u]: adjacent %d-%d (%d-%d)\n",
1395                             ntohl(fh->ip6f_ident), off, fr_max, fra->fr_off,
1396                             fra->fr_end));
1397                         fra->fr_off = off;
1398                         merge = 1;
1399                 } else if (aftercut > 0) {
1400                         /* Need to chop off the tail of this fragment */
1401                         DPFPRINTF(("frag6cache[%u]: chop %d %d-%d (%d-%d)\n",
1402                             ntohl(fh->ip6f_ident), aftercut, off, fr_max,
1403                             fra->fr_off, fra->fr_end));
1404                         fra->fr_off = off;
1405                         fr_max -= aftercut;
1406
1407                         merge = 1;
1408
1409                         if (!drop) {
1410                                 m_adj(m, -aftercut);
1411                                 if (m->m_flags & M_PKTHDR) {
1412                                         int pktlen = 0;
1413                                         struct mbuf *t;
1414                                         for (t = m; t; t = t->m_next)
1415                                                 pktlen += t->m_len;
1416                                         m->m_pkthdr.len = pktlen;
1417                                 }
1418                                 h = mtod(m, struct ip6_hdr *);
1419                                 VERIFY((int)m->m_len ==
1420                                     ntohs(h->ip6_plen) - aftercut);
1421                                 h->ip6_plen =
1422                                     htons(ntohs(h->ip6_plen) - aftercut);
1423                         } else {
1424                                 hosed++;
1425                         }
1426                 } else if (frp == NULL) {
1427                         /* There is a gap between fragments */
1428                         DPFPRINTF(("frag6cache[%u]: gap %d %d-%d (%d-%d)\n",
1429                             ntohl(fh->ip6f_ident), -aftercut, off, fr_max,
1430                             fra->fr_off, fra->fr_end));
1431
1432                         cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1433                         if (cur == NULL)
1434                                 goto no_mem;
1435                         pf_ncache++;
1436
1437                         cur->fr_off = off;
1438                         cur->fr_end = fr_max;
1439                         LIST_INSERT_BEFORE(fra, cur, fr_next);
1440                 }
1441
1442                 /* Need to glue together two separate fragment descriptors */
1443                 if (merge) {
1444                         if (cur && fra->fr_off <= cur->fr_end) {
1445                                 /* Need to merge in a previous 'cur' */
1446                                 DPFPRINTF(("frag6cache[%u]: adjacent(merge "
1447                                     "%d-%d) %d-%d (%d-%d)\n",
1448                                     ntohl(fh->ip6f_ident), cur->fr_off,
1449                                     cur->fr_end, off, fr_max, fra->fr_off,
1450                                     fra->fr_end));
1451                                 fra->fr_off = cur->fr_off;
1452                                 LIST_REMOVE(cur, fr_next);
1453                                 pool_put(&pf_cent_pl, cur);
1454                                 pf_ncache--;
1455                                 cur = NULL;
1456                         } else if (frp && fra->fr_off <= frp->fr_end) {
1457                                 /* Need to merge in a modified 'frp' */
1458                                 VERIFY(cur == NULL);
1459                                 DPFPRINTF(("frag6cache[%u]: adjacent(merge "
1460                                     "%d-%d) %d-%d (%d-%d)\n",
1461                                     ntohl(fh->ip6f_ident), frp->fr_off,
1462                                     frp->fr_end, off, fr_max, fra->fr_off,
1463                                     fra->fr_end));
1464                                 fra->fr_off = frp->fr_off;
1465                                 LIST_REMOVE(frp, fr_next);
1466                                 pool_put(&pf_cent_pl, frp);
1467                                 pf_ncache--;
1468                                 frp = NULL;
1469                         }
1470                 }
1471         }
1472
1473         if (hosed) {
1474                 /*
1475                  * We must keep tracking the overall fragment even when
1476                  * we're going to drop it anyway so that we know when to
1477                  * free the overall descriptor.  Thus we drop the frag late.
1478                  */
1479                 goto drop_fragment;
1480         }
1481
1482  pass:
1483         /* Update maximum data size */
1484         if ((*frag)->fr_max < fr_max)
1485                 (*frag)->fr_max = fr_max;
1486
1487         /* This is the last segment */
1488         if (!mff)
1489                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1490
1491         /* Check if we are completely reassembled */
1492         if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
1493             LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
1494             LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) {
1495                 /* Remove from fragment queue */
1496                 DPFPRINTF(("frag6cache[%u]: done 0-%d\n",
1497                     ntohl(fh->ip6f_ident), (*frag)->fr_max));
1498                 pf_free_fragment(*frag);
1499                 *frag = NULL;
1500         }
1501
1502         return (m);
1503
1504  no_mem:
1505         *nomem = 1;
1506
1507         /* Still need to pay attention to !IP_MF */
1508         if (!mff && *frag != NULL)
1509                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1510
1511         m_freem(m);
1512         return (NULL);
1513
1514  drop_fragment:
1515
1516         /* Still need to pay attention to !IP_MF */
1517         if (!mff && *frag != NULL)
1518                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1519
1520         if (drop) {
1521                 /* This fragment has been deemed bad.  Don't reass */
1522                 if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
1523                         DPFPRINTF(("frag6cache[%u]: dropping overall fragment\n",
1524                             ntohl(fh->ip6f_ident)));
1525                 (*frag)->fr_flags |= PFFRAG_DROP;
1526         }
1527
1528         m_freem(m);
1529         return (NULL);
1530 }
1531
1532 int
1533 pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
1534     struct pf_pdesc *pd)
1535 {
1536         struct mbuf             *m = *m0;
1537         struct pf_rule          *r;
1538         struct pf_frent         *frent;
1539         struct pf_fragment      *frag = NULL;
1540         struct ip               *h = mtod(m, struct ip *);
1541         int                      mff = (ntohs(h->ip_off) & IP_MF);
1542         int                      hlen = h->ip_hl << 2;
1543         u_int16_t                fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
1544         u_int16_t                fr_max;
1545         int                      ip_len;
1546         int                      ip_off;
1547         int                      asd = 0;
1548         struct pf_ruleset       *ruleset = NULL;
1549
1550         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1551         while (r != NULL) {
1552                 r->evaluations++;
1553                 if (pfi_kif_match(r->kif, kif) == r->ifnot)
1554                         r = r->skip[PF_SKIP_IFP].ptr;
1555                 else if (r->direction && r->direction != dir)
1556                         r = r->skip[PF_SKIP_DIR].ptr;
1557                 else if (r->af && r->af != AF_INET)
1558                         r = r->skip[PF_SKIP_AF].ptr;
1559                 else if (r->proto && r->proto != h->ip_p)
1560                         r = r->skip[PF_SKIP_PROTO].ptr;
1561                 else if (PF_MISMATCHAW(&r->src.addr,
1562                     (struct pf_addr *)&h->ip_src.s_addr, AF_INET,
1563                     r->src.neg, kif))
1564                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1565                 else if (PF_MISMATCHAW(&r->dst.addr,
1566                     (struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
1567                     r->dst.neg, NULL))
1568                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
1569                 else {
1570                         if (r->anchor == NULL)
1571                                 break;
1572                         else
1573                                 pf_step_into_anchor(&asd, &ruleset,
1574                                     PF_RULESET_SCRUB, &r, NULL, NULL);
1575                 }
1576                 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
1577                     PF_RULESET_SCRUB, &r, NULL, NULL))
1578                         break;
1579         }
1580
1581         if (r == NULL || r->action == PF_NOSCRUB)
1582                 return (PF_PASS);
1583         else {
1584                 r->packets[dir == PF_OUT]++;
1585                 r->bytes[dir == PF_OUT] += pd->tot_len;
1586         }
1587
1588         /* Check for illegal packets */
1589         if (hlen < (int)sizeof (struct ip))
1590                 goto drop;
1591
1592         if (hlen > ntohs(h->ip_len))
1593                 goto drop;
1594
1595         /* Clear IP_DF if the rule uses the no-df option */
1596         if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
1597                 u_int16_t ipoff = h->ip_off;
1598
1599                 h->ip_off &= htons(~IP_DF);
1600                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ipoff, h->ip_off, 0);
1601         }
1602
1603         /* We will need other tests here */
1604         if (!fragoff && !mff)
1605                 goto no_fragment;
1606
1607         /*
1608          * We're dealing with a fragment now. Don't allow fragments
1609          * with IP_DF to enter the cache. If the flag was cleared by
1610          * no-df above, fine. Otherwise drop it.
1611          */
1612         if (h->ip_off & htons(IP_DF)) {
1613                 DPFPRINTF(("IP_DF\n"));
1614                 goto bad;
1615         }
1616
1617         ip_len = ntohs(h->ip_len) - hlen;
1618         ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
1619
1620         /* All fragments are 8 byte aligned */
1621         if (mff && (ip_len & 0x7)) {
1622                 DPFPRINTF(("mff and %d\n", ip_len));
1623                 goto bad;
1624         }
1625
1626         /* Respect maximum length */
1627         if (fragoff + ip_len > IP_MAXPACKET) {
1628                 DPFPRINTF(("max packet %d\n", fragoff + ip_len));
1629                 goto bad;
1630         }
1631         fr_max = fragoff + ip_len;
1632
1633         if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
1634                 /* Fully buffer all of the fragments */
1635
1636                 frag = pf_find_fragment_by_ipv4_header(h, &pf_frag_tree);
1637                 /* Check if we saw the last fragment already */
1638                 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
1639                     fr_max > frag->fr_max)
1640                         goto bad;
1641
1642                 /* Get an entry for the fragment queue */
1643                 frent = pool_get(&pf_frent_pl, PR_NOWAIT);
1644                 if (frent == NULL) {
1645                         REASON_SET(reason, PFRES_MEMORY);
1646                         return (PF_DROP);
1647                 }
1648                 pf_nfrents++;
1649                 frent->fr_ip = h;
1650                 frent->fr_m = m;
1651
1652                 /* Might return a completely reassembled mbuf, or NULL */
1653                 DPFPRINTF(("reass IPv4 frag %d @ %d-%d\n", ntohs(h->ip_id),
1654                     fragoff, fr_max));
1655                 *m0 = m = pf_reassemble(m0, &frag, frent, mff);
1656
1657                 if (m == NULL)
1658                         return (PF_DROP);
1659
1660                 VERIFY(m->m_flags & M_PKTHDR);
1661
1662                 /* use mtag from concatenated mbuf chain */
1663                 pd->pf_mtag = pf_find_mtag(m);
1664 #if DIAGNOSTIC
1665                 if (pd->pf_mtag == NULL) {
1666                         printf("%s: pf_find_mtag returned NULL(1)\n", __func__);
1667                         if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
1668                                 m_freem(m);
1669                                 *m0 = NULL;
1670                                 goto no_mem;
1671                         }
1672                 }
1673 #endif
1674                 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
1675                         goto drop;
1676
1677                 h = mtod(m, struct ip *);
1678         } else {
1679                 /* non-buffering fragment cache (drops or masks overlaps) */
1680                 int     nomem = 0;
1681
1682                 if (dir == PF_OUT && (pd->pf_mtag->pftag_flags & PF_TAG_FRAGCACHE)) {
1683                         /*
1684                          * Already passed the fragment cache in the
1685                          * input direction.  If we continued, it would
1686                          * appear to be a dup and would be dropped.
1687                          */
1688                         goto fragment_pass;
1689                 }
1690
1691                 frag = pf_find_fragment_by_ipv4_header(h, &pf_cache_tree);
1692
1693                 /* Check if we saw the last fragment already */
1694                 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
1695                     fr_max > frag->fr_max) {
1696                         if (r->rule_flag & PFRULE_FRAGDROP)
1697                                 frag->fr_flags |= PFFRAG_DROP;
1698                         goto bad;
1699                 }
1700
1701                 *m0 = m = pf_fragcache(m0, h, &frag, mff,
1702                     (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
1703                 if (m == NULL) {
1704                         if (nomem)
1705                                 goto no_mem;
1706                         goto drop;
1707                 }
1708
1709                 VERIFY(m->m_flags & M_PKTHDR);
1710
1711                 /* use mtag from copied and trimmed mbuf chain */
1712                 pd->pf_mtag = pf_find_mtag(m);
1713 #if DIAGNOSTIC
1714                 if (pd->pf_mtag == NULL) {
1715                         printf("%s: pf_find_mtag returned NULL(2)\n", __func__);
1716                         if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
1717                                 m_freem(m);
1718                                 *m0 = NULL;
1719                                 goto no_mem;
1720                         }
1721                 }
1722 #endif
1723                 if (dir == PF_IN)
1724                         pd->pf_mtag->pftag_flags |= PF_TAG_FRAGCACHE;
1725
1726                 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
1727                         goto drop;
1728                 goto fragment_pass;
1729         }
1730
1731 no_fragment:
1732         /* At this point, only IP_DF is allowed in ip_off */
1733         if (h->ip_off & ~htons(IP_DF)) {
1734                 u_int16_t ipoff = h->ip_off;
1735
1736                 h->ip_off &= htons(IP_DF);
1737                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ipoff, h->ip_off, 0);
1738         }
1739
1740         /* Enforce a minimum ttl, may cause endless packet loops */
1741         if (r->min_ttl && h->ip_ttl < r->min_ttl) {
1742                 u_int16_t ip_ttl = h->ip_ttl;
1743
1744                 h->ip_ttl = r->min_ttl;
1745                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
1746         }
1747         if (r->rule_flag & PFRULE_RANDOMID) {
1748                 u_int16_t oip_id = h->ip_id;
1749
1750                 h->ip_id = ip_randomid();
1751                 h->ip_sum = pf_cksum_fixup(h->ip_sum, oip_id, h->ip_id, 0);
1752         }
1753         if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
1754                 pd->flags |= PFDESC_IP_REAS;
1755
1756         return (PF_PASS);
1757
1758 fragment_pass:
1759         /* Enforce a minimum ttl, may cause endless packet loops */
1760         if (r->min_ttl && h->ip_ttl < r->min_ttl) {
1761                 u_int16_t ip_ttl = h->ip_ttl;
1762
1763                 h->ip_ttl = r->min_ttl;
1764                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
1765         }
1766         if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
1767                 pd->flags |= PFDESC_IP_REAS;
1768         return (PF_PASS);
1769
1770 no_mem:
1771         REASON_SET(reason, PFRES_MEMORY);
1772         if (r != NULL && r->log)
1773                 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r,
1774                     NULL, NULL, pd);
1775         return (PF_DROP);
1776
1777 drop:
1778         REASON_SET(reason, PFRES_NORM);
1779         if (r != NULL && r->log)
1780                 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r,
1781                     NULL, NULL, pd);
1782         return (PF_DROP);
1783
1784 bad:
1785         DPFPRINTF(("dropping bad IPv4 fragment\n"));
1786
1787         /* Free associated fragments */
1788         if (frag != NULL)
1789                 pf_free_fragment(frag);
1790
1791         REASON_SET(reason, PFRES_FRAG);
1792         if (r != NULL && r->log)
1793                 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1794
1795         return (PF_DROP);
1796 }
1797
1798 #if INET6
1799 int
1800 pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
1801     u_short *reason, struct pf_pdesc *pd)
1802 {
1803         struct mbuf             *m = *m0;
1804         struct pf_rule          *r;
1805         struct ip6_hdr          *h = mtod(m, struct ip6_hdr *);
1806         int                      off;
1807         struct ip6_ext           ext;
1808 /* adi XXX */
1809 #if 0
1810         struct ip6_opt           opt;
1811         struct ip6_opt_jumbo     jumbo;
1812         int                      optend;
1813         int                      ooff;
1814 #endif
1815         struct ip6_frag          frag;
1816         u_int32_t                jumbolen = 0, plen;
1817         u_int16_t                fragoff = 0;
1818         u_int8_t                 proto;
1819         int                      terminal;
1820         struct pf_frent         *frent;
1821         struct pf_fragment      *pff = NULL;
1822         int                      mff = 0, rh_cnt = 0;
1823         u_int16_t                fr_max;
1824         int                      asd = 0;
1825         struct pf_ruleset       *ruleset = NULL;
1826
1827         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1828         while (r != NULL) {
1829                 r->evaluations++;
1830                 if (pfi_kif_match(r->kif, kif) == r->ifnot)
1831                         r = r->skip[PF_SKIP_IFP].ptr;
1832                 else if (r->direction && r->direction != dir)
1833                         r = r->skip[PF_SKIP_DIR].ptr;
1834                 else if (r->af && r->af != AF_INET6)
1835                         r = r->skip[PF_SKIP_AF].ptr;
1836 #if 0 /* header chain! */
1837                 else if (r->proto && r->proto != h->ip6_nxt)
1838                         r = r->skip[PF_SKIP_PROTO].ptr;
1839 #endif
1840                 else if (PF_MISMATCHAW(&r->src.addr,
1841                     (struct pf_addr *)&h->ip6_src, AF_INET6,
1842                     r->src.neg, kif))
1843                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1844                 else if (PF_MISMATCHAW(&r->dst.addr,
1845                     (struct pf_addr *)&h->ip6_dst, AF_INET6,
1846                     r->dst.neg, NULL))
1847                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
1848                 else {
1849                         if (r->anchor == NULL)
1850                                 break;
1851                         else
1852                                 pf_step_into_anchor(&asd, &ruleset,
1853                                     PF_RULESET_SCRUB, &r, NULL, NULL);
1854                 }
1855                 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
1856                     PF_RULESET_SCRUB, &r, NULL, NULL))
1857                         break;
1858         }
1859
1860         if (r == NULL || r->action == PF_NOSCRUB)
1861                 return (PF_PASS);
1862         else {
1863                 r->packets[dir == PF_OUT]++;
1864                 r->bytes[dir == PF_OUT] += pd->tot_len;
1865         }
1866
1867         /* Check for illegal packets */
1868         if ((int)(sizeof (struct ip6_hdr) + IPV6_MAXPACKET) < m->m_pkthdr.len)
1869                 goto drop;
1870
1871         off = sizeof (struct ip6_hdr);
1872         proto = h->ip6_nxt;
1873         terminal = 0;
1874         do {
1875                 pd->proto = proto;
1876                 switch (proto) {
1877                 case IPPROTO_FRAGMENT:
1878                         goto fragment;
1879                         break;
1880                 case IPPROTO_AH:
1881                 case IPPROTO_ROUTING:
1882                 case IPPROTO_DSTOPTS:
1883                         if (!pf_pull_hdr(m, off, &ext, sizeof (ext), NULL,
1884                             NULL, AF_INET6))
1885                                 goto shortpkt;
1886                         /*
1887                          * <jhw@apple.com>
1888                          * Multiple routing headers not allowed.
1889                          * Routing header type zero considered harmful.
1890                          */
1891                         if (proto == IPPROTO_ROUTING) {
1892                                 const struct ip6_rthdr *rh =
1893                                     (const struct ip6_rthdr *)&ext;
1894                                 if (rh_cnt++)
1895                                         goto drop;
1896                                 if (rh->ip6r_type == IPV6_RTHDR_TYPE_0)
1897                                         goto drop;
1898                         }
1899                         else
1900                         if (proto == IPPROTO_AH)
1901                                 off += (ext.ip6e_len + 2) * 4;
1902                         else
1903                                 off += (ext.ip6e_len + 1) * 8;
1904                         proto = ext.ip6e_nxt;
1905                         break;
1906                 case IPPROTO_HOPOPTS:
1907 /* adi XXX */
1908 #if 0
1909                         if (!pf_pull_hdr(m, off, &ext, sizeof (ext), NULL,
1910                             NULL, AF_INET6))
1911                                 goto shortpkt;
1912                         optend = off + (ext.ip6e_len + 1) * 8;
1913                         ooff = off + sizeof (ext);
1914                         do {
1915                                 if (!pf_pull_hdr(m, ooff, &opt.ip6o_type,
1916                                     sizeof (opt.ip6o_type), NULL, NULL,
1917                                     AF_INET6))
1918                                         goto shortpkt;
1919                                 if (opt.ip6o_type == IP6OPT_PAD1) {
1920                                         ooff++;
1921                                         continue;
1922                                 }
1923                                 if (!pf_pull_hdr(m, ooff, &opt, sizeof (opt),
1924                                     NULL, NULL, AF_INET6))
1925                                         goto shortpkt;
1926                                 if (ooff + sizeof (opt) + opt.ip6o_len > optend)
1927                                         goto drop;
1928                                 switch (opt.ip6o_type) {
1929                                 case IP6OPT_JUMBO:
1930                                         if (h->ip6_plen != 0)
1931                                                 goto drop;
1932                                         if (!pf_pull_hdr(m, ooff, &jumbo,
1933                                             sizeof (jumbo), NULL, NULL,
1934                                             AF_INET6))
1935                                                 goto shortpkt;
1936                                         memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
1937                                             sizeof (jumbolen));
1938                                         jumbolen = ntohl(jumbolen);
1939                                         if (jumbolen <= IPV6_MAXPACKET)
1940                                                 goto drop;
1941                                         if (sizeof (struct ip6_hdr) +
1942                                             jumbolen != m->m_pkthdr.len)
1943                                                 goto drop;
1944                                         break;
1945                                 default:
1946                                         break;
1947                                 }
1948                                 ooff += sizeof (opt) + opt.ip6o_len;
1949                         } while (ooff < optend);
1950
1951                         off = optend;
1952                         proto = ext.ip6e_nxt;
1953                         break;
1954 #endif
1955                 default:
1956                         terminal = 1;
1957                         break;
1958                 }
1959         } while (!terminal);
1960
1961         /* jumbo payload option must be present, or plen > 0 */
1962         if (ntohs(h->ip6_plen) == 0)
1963                 plen = jumbolen;
1964         else
1965                 plen = ntohs(h->ip6_plen);
1966         if (plen == 0)
1967                 goto drop;
1968         if ((int)(sizeof (struct ip6_hdr) + plen) > m->m_pkthdr.len)
1969                 goto shortpkt;
1970
1971         /* Enforce a minimum ttl, may cause endless packet loops */
1972         if (r->min_ttl && h->ip6_hlim < r->min_ttl)
1973                 h->ip6_hlim = r->min_ttl;
1974
1975         return (PF_PASS);
1976
1977 fragment:
1978         if (ntohs(h->ip6_plen) == 0 || jumbolen)
1979                 goto drop;
1980         plen = ntohs(h->ip6_plen);
1981
1982         if (!pf_pull_hdr(m, off, &frag, sizeof (frag), NULL, NULL, AF_INET6))
1983                 goto shortpkt;
1984         fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
1985         pd->proto = frag.ip6f_nxt;
1986         mff = ntohs(frag.ip6f_offlg & IP6F_MORE_FRAG);
1987         off += sizeof frag;
1988         if (fragoff + (plen - off) > IPV6_MAXPACKET)
1989                goto badfrag;
1990
1991         fr_max = fragoff + plen - (off - sizeof(struct ip6_hdr));
1992         DPFPRINTF(("0x%llx IPv6 frag plen %u mff %d off %u fragoff %u "
1993             "fr_max %u\n", (uint64_t)VM_KERNEL_ADDRPERM(m), plen, mff, off,
1994             fragoff, fr_max));
1995
1996         if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
1997                 /* Fully buffer all of the fragments */
1998                 pd->flags |= PFDESC_IP_REAS;
1999
2000                 pff = pf_find_fragment_by_ipv6_header(h, &frag,
2001                    &pf_frag_tree);
2002
2003                 /* Check if we saw the last fragment already */
2004                 if (pff != NULL && (pff->fr_flags & PFFRAG_SEENLAST) &&
2005                     fr_max > pff->fr_max)
2006                         goto badfrag;
2007
2008                 /* Get an entry for the fragment queue */
2009                 frent = pool_get(&pf_frent_pl, PR_NOWAIT);
2010                 if (frent == NULL) {
2011                         REASON_SET(reason, PFRES_MEMORY);
2012                         return (PF_DROP);
2013                 }
2014                 pf_nfrents++;
2015                 frent->fr_ip6 = h;
2016                 frent->fr_m = m;
2017                 frent->fr_ip6f_opt = frag;
2018                 frent->fr_ip6f_hlen = off;
2019
2020                 /* Might return a completely reassembled mbuf, or NULL */
2021                 DPFPRINTF(("reass IPv6 frag %d @ %d-%d\n",
2022                      ntohl(frag.ip6f_ident), fragoff, fr_max));
2023                 *m0 = m = pf_reassemble6(m0, &pff, frent, mff);
2024
2025                 if (m == NULL)
2026                         return (PF_DROP);
2027
2028                 if (pff != NULL && (pff->fr_flags & PFFRAG_DROP))
2029                         goto drop;
2030
2031                 h = mtod(m, struct ip6_hdr *);
2032         }
2033         else if (dir == PF_IN || !(pd->pf_mtag->pftag_flags & PF_TAG_FRAGCACHE)) {
2034                 /* non-buffering fragment cache (overlaps: see RFC 5722) */
2035                 int nomem = 0;
2036
2037                 pff = pf_find_fragment_by_ipv6_header(h, &frag,
2038                     &pf_cache_tree);
2039
2040                 /* Check if we saw the last fragment already */
2041                 if (pff != NULL && (pff->fr_flags & PFFRAG_SEENLAST) &&
2042                     fr_max > pff->fr_max) {
2043                        if (r->rule_flag & PFRULE_FRAGDROP)
2044                                 pff->fr_flags |= PFFRAG_DROP;
2045                        goto badfrag;
2046                 }
2047
2048                 *m0 = m = pf_frag6cache(m0, h, &frag, &pff, off, mff,
2049                      (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
2050                 if (m == NULL) {
2051                         if (nomem)
2052                                 goto no_mem;
2053                         goto drop;
2054                 }
2055
2056                 if (dir == PF_IN)
2057                         pd->pf_mtag->pftag_flags |= PF_TAG_FRAGCACHE;
2058
2059                 if (pff != NULL && (pff->fr_flags & PFFRAG_DROP))
2060                         goto drop;
2061         }
2062
2063         /* Enforce a minimum ttl, may cause endless packet loops */
2064         if (r->min_ttl && h->ip6_hlim < r->min_ttl)
2065                 h->ip6_hlim = r->min_ttl;
2066         return (PF_PASS);
2067
2068   no_mem:
2069         REASON_SET(reason, PFRES_MEMORY);
2070         goto dropout;
2071
2072   shortpkt:
2073         REASON_SET(reason, PFRES_SHORT);
2074         goto dropout;
2075
2076   drop:
2077         REASON_SET(reason, PFRES_NORM);
2078         goto dropout;
2079
2080   badfrag:
2081         DPFPRINTF(("dropping bad IPv6 fragment\n"));
2082         REASON_SET(reason, PFRES_FRAG);
2083         goto dropout;
2084
2085   dropout:
2086         if (pff != NULL)
2087                 pf_free_fragment(pff);
2088         if (r != NULL && r->log)
2089                 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
2090         return (PF_DROP);
2091 }
2092 #endif /* INET6 */
2093
2094 int
2095 pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
2096     int off, void *h, struct pf_pdesc *pd)
2097 {
2098 #pragma unused(ipoff, h)
2099         struct pf_rule  *r, *rm = NULL;
2100         struct tcphdr   *th = pd->hdr.tcp;
2101         int              rewrite = 0;
2102         int              asd = 0;
2103         u_short          reason;
2104         u_int8_t         flags;
2105         sa_family_t      af = pd->af;
2106         struct pf_ruleset *ruleset = NULL;
2107         union pf_state_xport sxport, dxport;
2108
2109         sxport.port = th->th_sport;
2110         dxport.port = th->th_dport;
2111
2112         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
2113         while (r != NULL) {
2114                 r->evaluations++;
2115                 if (pfi_kif_match(r->kif, kif) == r->ifnot)
2116                         r = r->skip[PF_SKIP_IFP].ptr;
2117                 else if (r->direction && r->direction != dir)
2118                         r = r->skip[PF_SKIP_DIR].ptr;
2119                 else if (r->af && r->af != af)
2120                         r = r->skip[PF_SKIP_AF].ptr;
2121                 else if (r->proto && r->proto != pd->proto)
2122                         r = r->skip[PF_SKIP_PROTO].ptr;
2123                 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
2124                     r->src.neg, kif))
2125                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
2126                 else if (r->src.xport.range.op &&
2127                     !pf_match_xport(r->src.xport.range.op, r->proto_variant,
2128                     &r->src.xport, &sxport))
2129                         r = r->skip[PF_SKIP_SRC_PORT].ptr;
2130                 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
2131                     r->dst.neg, NULL))
2132                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
2133                 else if (r->dst.xport.range.op &&
2134                     !pf_match_xport(r->dst.xport.range.op, r->proto_variant,
2135                     &r->dst.xport, &dxport))
2136                         r = r->skip[PF_SKIP_DST_PORT].ptr;
2137                 else if (r->os_fingerprint != PF_OSFP_ANY &&
2138                     !pf_osfp_match(pf_osfp_fingerprint(pd, m, off, th),
2139                     r->os_fingerprint))
2140                         r = TAILQ_NEXT(r, entries);
2141                 else {
2142                         if (r->anchor == NULL) {
2143                                 rm = r;
2144                                 break;
2145                         } else {
2146                                 pf_step_into_anchor(&asd, &ruleset,
2147                                     PF_RULESET_SCRUB, &r, NULL, NULL);
2148                         }
2149                 }
2150                 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
2151                     PF_RULESET_SCRUB, &r, NULL, NULL))
2152                         break;
2153         }
2154
2155         if (rm == NULL || rm->action == PF_NOSCRUB)
2156                 return (PF_PASS);
2157         else {
2158                 r->packets[dir == PF_OUT]++;
2159                 r->bytes[dir == PF_OUT] += pd->tot_len;
2160         }
2161
2162         if (rm->rule_flag & PFRULE_REASSEMBLE_TCP)
2163                 pd->flags |= PFDESC_TCP_NORM;
2164
2165         flags = th->th_flags;
2166         if (flags & TH_SYN) {
2167                 /* Illegal packet */
2168                 if (flags & TH_RST)
2169                         goto tcp_drop;
2170
2171                 if (flags & TH_FIN)
2172                         flags &= ~TH_FIN;
2173         } else {
2174                 /* Illegal packet */
2175                 if (!(flags & (TH_ACK|TH_RST)))
2176                         goto tcp_drop;
2177         }
2178
2179         if (!(flags & TH_ACK)) {
2180                 /* These flags are only valid if ACK is set */
2181                 if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG))
2182                         goto tcp_drop;
2183         }
2184
2185         /* Check for illegal header length */
2186         if (th->th_off < (sizeof (struct tcphdr) >> 2))
2187                 goto tcp_drop;
2188
2189         /* If flags changed, or reserved data set, then adjust */
2190         if (flags != th->th_flags || th->th_x2 != 0) {
2191                 u_int16_t       ov, nv;
2192
2193                 ov = *(u_int16_t *)(&th->th_ack + 1);
2194                 th->th_flags = flags;
2195                 th->th_x2 = 0;
2196                 nv = *(u_int16_t *)(&th->th_ack + 1);
2197
2198                 th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0);
2199                 rewrite = 1;
2200         }
2201
2202         /* Remove urgent pointer, if TH_URG is not set */
2203         if (!(flags & TH_URG) && th->th_urp) {
2204                 th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0);
2205                 th->th_urp = 0;
2206                 rewrite = 1;
2207         }
2208
2209         /* copy back packet headers if we sanitized */
2210         /* Process options */
2211         if (r->max_mss) {
2212                 int rv = pf_normalize_tcpopt(r, dir, kif, pd, m, th, off,
2213                     &rewrite);
2214                 if (rv == PF_DROP)
2215                         return rv;
2216                 m = pd->mp;
2217         }
2218
2219         if (rewrite) {
2220                 struct mbuf *mw = pf_lazy_makewritable(pd, m,
2221                     off + sizeof (*th));
2222                 if (!mw) {
2223                         REASON_SET(&reason, PFRES_MEMORY);
2224                         if (r->log)
2225                                 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason,
2226                                     r, 0, 0, pd);
2227                         return PF_DROP;
2228                 }
2229
2230                 m_copyback(mw, off, sizeof (*th), th);
2231         }
2232
2233         return (PF_PASS);
2234
2235 tcp_drop:
2236         REASON_SET(&reason, PFRES_NORM);
2237         if (rm != NULL && r->log)
2238                 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL, pd);
2239         return (PF_DROP);
2240 }
2241
2242 int
2243 pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
2244     struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst)
2245 {
2246 #pragma unused(dst)
2247         u_int32_t tsval, tsecr;
2248         u_int8_t hdr[60];
2249         u_int8_t *opt;
2250
2251         VERIFY(src->scrub == NULL);
2252
2253         src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
2254         if (src->scrub == NULL)
2255                 return (1);
2256         bzero(src->scrub, sizeof (*src->scrub));
2257
2258         switch (pd->af) {
2259 #if INET
2260         case AF_INET: {
2261                 struct ip *h = mtod(m, struct ip *);
2262                 src->scrub->pfss_ttl = h->ip_ttl;
2263                 break;
2264         }
2265 #endif /* INET */
2266 #if INET6
2267         case AF_INET6: {
2268                 struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
2269                 src->scrub->pfss_ttl = h->ip6_hlim;
2270                 break;
2271         }
2272 #endif /* INET6 */
2273         }
2274
2275
2276         /*
2277          * All normalizations below are only begun if we see the start of
2278          * the connections.  They must all set an enabled bit in pfss_flags
2279          */
2280         if ((th->th_flags & TH_SYN) == 0)
2281                 return (0);
2282
2283
2284         if (th->th_off > (sizeof (struct tcphdr) >> 2) && src->scrub &&
2285             pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
2286                 /* Diddle with TCP options */
2287                 int hlen;
2288                 opt = hdr + sizeof (struct tcphdr);
2289                 hlen = (th->th_off << 2) - sizeof (struct tcphdr);
2290                 while (hlen >= TCPOLEN_TIMESTAMP) {
2291                         switch (*opt) {
2292                         case TCPOPT_EOL:        /* FALLTHROUGH */
2293                         case TCPOPT_NOP:
2294                                 opt++;
2295                                 hlen--;
2296                                 break;
2297                         case TCPOPT_TIMESTAMP:
2298                                 if (opt[1] >= TCPOLEN_TIMESTAMP) {
2299                                         src->scrub->pfss_flags |=
2300                                             PFSS_TIMESTAMP;
2301                                         src->scrub->pfss_ts_mod =
2302                                             htonl(random());
2303
2304                                         /* note PFSS_PAWS not set yet */
2305                                         memcpy(&tsval, &opt[2],
2306                                             sizeof (u_int32_t));
2307                                         memcpy(&tsecr, &opt[6],
2308                                             sizeof (u_int32_t));
2309                                         src->scrub->pfss_tsval0 = ntohl(tsval);
2310                                         src->scrub->pfss_tsval = ntohl(tsval);
2311                                         src->scrub->pfss_tsecr = ntohl(tsecr);
2312                                         getmicrouptime(&src->scrub->pfss_last);
2313                                 }
2314                                 /* FALLTHROUGH */
2315                         default:
2316                                 hlen -= MAX(opt[1], 2);
2317                                 opt += MAX(opt[1], 2);
2318                                 break;
2319                         }
2320                 }
2321         }
2322
2323         return (0);
2324 }
2325
2326 void
2327 pf_normalize_tcp_cleanup(struct pf_state *state)
2328 {
2329         if (state->src.scrub)
2330                 pool_put(&pf_state_scrub_pl, state->src.scrub);
2331         if (state->dst.scrub)
2332                 pool_put(&pf_state_scrub_pl, state->dst.scrub);
2333
2334         /* Someday... flush the TCP segment reassembly descriptors. */
2335 }
2336
2337 int
2338 pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
2339     u_short *reason, struct tcphdr *th, struct pf_state *state,
2340     struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback)
2341 {
2342         struct timeval uptime;
2343         u_int32_t tsval, tsecr;
2344         u_int tsval_from_last;
2345         u_int8_t hdr[60];
2346         u_int8_t *opt;
2347         int copyback = 0;
2348         int got_ts = 0;
2349
2350         VERIFY(src->scrub || dst->scrub);
2351
2352         /*
2353          * Enforce the minimum TTL seen for this connection.  Negate a common
2354          * technique to evade an intrusion detection system and confuse
2355          * firewall state code.
2356          */
2357         switch (pd->af) {
2358 #if INET
2359         case AF_INET: {
2360                 if (src->scrub) {
2361                         struct ip *h = mtod(m, struct ip *);
2362                         if (h->ip_ttl > src->scrub->pfss_ttl)
2363                                 src->scrub->pfss_ttl = h->ip_ttl;
2364                         h->ip_ttl = src->scrub->pfss_ttl;
2365                 }
2366                 break;
2367         }
2368 #endif /* INET */
2369 #if INET6
2370         case AF_INET6: {
2371                 if (src->scrub) {
2372                         struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
2373                         if (h->ip6_hlim > src->scrub->pfss_ttl)
2374                                 src->scrub->pfss_ttl = h->ip6_hlim;
2375                         h->ip6_hlim = src->scrub->pfss_ttl;
2376                 }
2377                 break;
2378         }
2379 #endif /* INET6 */
2380         }
2381
2382         if (th->th_off > (sizeof (struct tcphdr) >> 2) &&
2383             ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
2384             (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
2385             pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
2386                 /* Diddle with TCP options */
2387                 int hlen;
2388                 opt = hdr + sizeof (struct tcphdr);
2389                 hlen = (th->th_off << 2) - sizeof (struct tcphdr);
2390                 while (hlen >= TCPOLEN_TIMESTAMP) {
2391                         switch (*opt) {
2392                         case TCPOPT_EOL:        /* FALLTHROUGH */
2393                         case TCPOPT_NOP:
2394                                 opt++;
2395                                 hlen--;
2396                                 break;
2397                         case TCPOPT_TIMESTAMP:
2398                                 /*
2399                                  * Modulate the timestamps.  Can be used for
2400                                  * NAT detection, OS uptime determination or
2401                                  * reboot detection.
2402                                  */
2403
2404                                 if (got_ts) {
2405                                         /* Huh?  Multiple timestamps!? */
2406                                         if (pf_status.debug >= PF_DEBUG_MISC) {
2407                                                 DPFPRINTF(("multiple TS??"));
2408                                                 pf_print_state(state);
2409                                                 printf("\n");
2410                                         }
2411                                         REASON_SET(reason, PFRES_TS);
2412                                         return (PF_DROP);
2413                                 }
2414                                 if (opt[1] >= TCPOLEN_TIMESTAMP) {
2415                                         memcpy(&tsval, &opt[2],
2416                                             sizeof (u_int32_t));
2417                                         if (tsval && src->scrub &&
2418                                             (src->scrub->pfss_flags &
2419                                             PFSS_TIMESTAMP)) {
2420                                                 tsval = ntohl(tsval);
2421                                                 pf_change_a(&opt[2],
2422                                                     &th->th_sum,
2423                                                     htonl(tsval +
2424                                                     src->scrub->pfss_ts_mod),
2425                                                     0);
2426                                                 copyback = 1;
2427                                         }
2428
2429                                         /* Modulate TS reply iff valid (!0) */
2430                                         memcpy(&tsecr, &opt[6],
2431                                             sizeof (u_int32_t));
2432                                         if (tsecr && dst->scrub &&
2433                                             (dst->scrub->pfss_flags &
2434                                             PFSS_TIMESTAMP)) {
2435                                                 tsecr = ntohl(tsecr)
2436                                                     - dst->scrub->pfss_ts_mod;
2437                                                 pf_change_a(&opt[6],
2438                                                     &th->th_sum, htonl(tsecr),
2439                                                     0);
2440                                                 copyback = 1;
2441                                         }
2442                                         got_ts = 1;
2443                                 }
2444                                 /* FALLTHROUGH */
2445                         default:
2446                                 hlen -= MAX(opt[1], 2);
2447                                 opt += MAX(opt[1], 2);
2448                                 break;
2449                         }
2450                 }
2451                 if (copyback) {
2452                         /* Copyback the options, caller copys back header */
2453                         int optoff = off + sizeof (*th);
2454                         int optlen = (th->th_off << 2) - sizeof (*th);
2455                         m = pf_lazy_makewritable(pd, m, optoff + optlen);
2456                         if (!m) {
2457                                 REASON_SET(reason, PFRES_MEMORY);
2458                                 return PF_DROP;
2459                         }
2460                         *writeback = optoff + optlen;
2461                         m_copyback(m, optoff, optlen, hdr + sizeof (*th));
2462                 }
2463         }
2464
2465
2466         /*
2467          * Must invalidate PAWS checks on connections idle for too long.
2468          * The fastest allowed timestamp clock is 1ms.  That turns out to
2469          * be about 24 days before it wraps.  XXX Right now our lowerbound
2470          * TS echo check only works for the first 12 days of a connection
2471          * when the TS has exhausted half its 32bit space
2472          */
2473 #define TS_MAX_IDLE     (24*24*60*60)
2474 #define TS_MAX_CONN     (12*24*60*60)   /* XXX remove when better tsecr check */
2475
2476         getmicrouptime(&uptime);
2477         if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
2478             (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
2479             pf_time_second() - state->creation > TS_MAX_CONN))  {
2480                 if (pf_status.debug >= PF_DEBUG_MISC) {
2481                         DPFPRINTF(("src idled out of PAWS\n"));
2482                         pf_print_state(state);
2483                         printf("\n");
2484                 }
2485                 src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
2486                     | PFSS_PAWS_IDLED;
2487         }
2488         if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
2489             uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
2490                 if (pf_status.debug >= PF_DEBUG_MISC) {
2491                         DPFPRINTF(("dst idled out of PAWS\n"));
2492                         pf_print_state(state);
2493                         printf("\n");
2494                 }
2495                 dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
2496                     | PFSS_PAWS_IDLED;
2497         }
2498
2499         if (got_ts && src->scrub && dst->scrub &&
2500             (src->scrub->pfss_flags & PFSS_PAWS) &&
2501             (dst->scrub->pfss_flags & PFSS_PAWS)) {
2502                 /*
2503                  * Validate that the timestamps are "in-window".
2504                  * RFC1323 describes TCP Timestamp options that allow
2505                  * measurement of RTT (round trip time) and PAWS
2506                  * (protection against wrapped sequence numbers).  PAWS
2507                  * gives us a set of rules for rejecting packets on
2508                  * long fat pipes (packets that were somehow delayed
2509                  * in transit longer than the time it took to send the
2510                  * full TCP sequence space of 4Gb).  We can use these
2511                  * rules and infer a few others that will let us treat
2512                  * the 32bit timestamp and the 32bit echoed timestamp
2513                  * as sequence numbers to prevent a blind attacker from
2514                  * inserting packets into a connection.
2515                  *
2516                  * RFC1323 tells us:
2517                  *  - The timestamp on this packet must be greater than
2518                  *    or equal to the last value echoed by the other
2519                  *    endpoint.  The RFC says those will be discarded
2520                  *    since it is a dup that has already been acked.
2521                  *    This gives us a lowerbound on the timestamp.
2522                  *        timestamp >= other last echoed timestamp
2523                  *  - The timestamp will be less than or equal to
2524                  *    the last timestamp plus the time between the
2525                  *    last packet and now.  The RFC defines the max
2526                  *    clock rate as 1ms.  We will allow clocks to be
2527                  *    up to 10% fast and will allow a total difference
2528                  *    or 30 seconds due to a route change.  And this
2529                  *    gives us an upperbound on the timestamp.
2530                  *        timestamp <= last timestamp + max ticks
2531                  *    We have to be careful here.  Windows will send an
2532                  *    initial timestamp of zero and then initialize it
2533                  *    to a random value after the 3whs; presumably to
2534                  *    avoid a DoS by having to call an expensive RNG
2535                  *    during a SYN flood.  Proof MS has at least one
2536                  *    good security geek.
2537                  *
2538                  *  - The TCP timestamp option must also echo the other
2539                  *    endpoints timestamp.  The timestamp echoed is the
2540                  *    one carried on the earliest unacknowledged segment
2541                  *    on the left edge of the sequence window.  The RFC
2542                  *    states that the host will reject any echoed
2543                  *    timestamps that were larger than any ever sent.
2544                  *    This gives us an upperbound on the TS echo.
2545                  *        tescr <= largest_tsval
2546                  *  - The lowerbound on the TS echo is a little more
2547                  *    tricky to determine.  The other endpoint's echoed
2548                  *    values will not decrease.  But there may be
2549                  *    network conditions that re-order packets and
2550                  *    cause our view of them to decrease.  For now the
2551                  *    only lowerbound we can safely determine is that
2552                  *    the TS echo will never be less than the original
2553                  *    TS.  XXX There is probably a better lowerbound.
2554                  *    Remove TS_MAX_CONN with better lowerbound check.
2555                  *        tescr >= other original TS
2556                  *
2557                  * It is also important to note that the fastest
2558                  * timestamp clock of 1ms will wrap its 32bit space in
2559                  * 24 days.  So we just disable TS checking after 24
2560                  * days of idle time.  We actually must use a 12d
2561                  * connection limit until we can come up with a better
2562                  * lowerbound to the TS echo check.
2563                  */
2564                 struct timeval delta_ts;
2565                 int ts_fudge;
2566
2567
2568                 /*
2569                  * PFTM_TS_DIFF is how many seconds of leeway to allow
2570                  * a host's timestamp.  This can happen if the previous
2571                  * packet got delayed in transit for much longer than
2572                  * this packet.
2573                  */
2574                 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0)
2575                         ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF];
2576
2577
2578                 /* Calculate max ticks since the last timestamp */
2579 #define TS_MAXFREQ      1100            /* RFC max TS freq of 1Khz + 10% skew */
2580 #define TS_MICROSECS    1000000         /* microseconds per second */
2581                 timersub(&uptime, &src->scrub->pfss_last, &delta_ts);
2582                 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
2583                 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ);
2584
2585
2586                 if ((src->state >= TCPS_ESTABLISHED &&
2587                     dst->state >= TCPS_ESTABLISHED) &&
2588                     (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
2589                     SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
2590                     (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
2591                     SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
2592                         /*
2593                          * Bad RFC1323 implementation or an insertion attack.
2594                          *
2595                          * - Solaris 2.6 and 2.7 are known to send another ACK
2596                          *   after the FIN,FIN|ACK,ACK closing that carries
2597                          *   an old timestamp.
2598                          */
2599
2600                         DPFPRINTF(("Timestamp failed %c%c%c%c\n",
2601                             SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
2602                             SEQ_GT(tsval, src->scrub->pfss_tsval +
2603                             tsval_from_last) ? '1' : ' ',
2604                             SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
2605                             SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
2606                         DPFPRINTF((" tsval: %u  tsecr: %u  +ticks: %u  "
2607                             "idle: %lus %ums\n",
2608                             tsval, tsecr, tsval_from_last, delta_ts.tv_sec,
2609                             delta_ts.tv_usec / 1000));
2610                         DPFPRINTF((" src->tsval: %u  tsecr: %u\n",
2611                             src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
2612                         DPFPRINTF((" dst->tsval: %u  tsecr: %u  tsval0: %u\n",
2613                             dst->scrub->pfss_tsval, dst->scrub->pfss_tsecr,
2614                             dst->scrub->pfss_tsval0));
2615                         if (pf_status.debug >= PF_DEBUG_MISC) {
2616                                 pf_print_state(state);
2617                                 pf_print_flags(th->th_flags);
2618                                 printf("\n");
2619                         }
2620                         REASON_SET(reason, PFRES_TS);
2621                         return (PF_DROP);
2622                 }
2623
2624                 /* XXX I'd really like to require tsecr but it's optional */
2625
2626         } else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
2627             ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
2628             || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
2629             src->scrub && dst->scrub &&
2630             (src->scrub->pfss_flags & PFSS_PAWS) &&
2631             (dst->scrub->pfss_flags & PFSS_PAWS)) {
2632                 /*
2633                  * Didn't send a timestamp.  Timestamps aren't really useful
2634                  * when:
2635                  *  - connection opening or closing (often not even sent).
2636                  *    but we must not let an attacker to put a FIN on a
2637                  *    data packet to sneak it through our ESTABLISHED check.
2638                  *  - on a TCP reset.  RFC suggests not even looking at TS.
2639                  *  - on an empty ACK.  The TS will not be echoed so it will
2640                  *    probably not help keep the RTT calculation in sync and
2641                  *    there isn't as much danger when the sequence numbers
2642                  *    got wrapped.  So some stacks don't include TS on empty
2643                  *    ACKs :-(
2644                  *
2645                  * To minimize the disruption to mostly RFC1323 conformant
2646                  * stacks, we will only require timestamps on data packets.
2647                  *
2648                  * And what do ya know, we cannot require timestamps on data
2649                  * packets.  There appear to be devices that do legitimate
2650                  * TCP connection hijacking.  There are HTTP devices that allow
2651                  * a 3whs (with timestamps) and then buffer the HTTP request.
2652                  * If the intermediate device has the HTTP response cache, it
2653                  * will spoof the response but not bother timestamping its
2654                  * packets.  So we can look for the presence of a timestamp in
2655                  * the first data packet and if there, require it in all future
2656                  * packets.
2657                  */
2658
2659                 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
2660                         /*
2661                          * Hey!  Someone tried to sneak a packet in.  Or the
2662                          * stack changed its RFC1323 behavior?!?!
2663                          */
2664                         if (pf_status.debug >= PF_DEBUG_MISC) {
2665                                 DPFPRINTF(("Did not receive expected RFC1323 "
2666                                     "timestamp\n"));
2667                                 pf_print_state(state);
2668                                 pf_print_flags(th->th_flags);
2669                                 printf("\n");
2670                         }
2671                         REASON_SET(reason, PFRES_TS);
2672                         return (PF_DROP);
2673                 }
2674         }
2675
2676
2677         /*
2678          * We will note if a host sends his data packets with or without
2679          * timestamps.  And require all data packets to contain a timestamp
2680          * if the first does.  PAWS implicitly requires that all data packets be
2681          * timestamped.  But I think there are middle-man devices that hijack
2682          * TCP streams immediately after the 3whs and don't timestamp their
2683          * packets (seen in a WWW accelerator or cache).
2684          */
2685         if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
2686             (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
2687                 if (got_ts)
2688                         src->scrub->pfss_flags |= PFSS_DATA_TS;
2689                 else {
2690                         src->scrub->pfss_flags |= PFSS_DATA_NOTS;
2691                         if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
2692                             (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
2693                                 /* Don't warn if other host rejected RFC1323 */
2694                                 DPFPRINTF(("Broken RFC1323 stack did not "
2695                                     "timestamp data packet. Disabled PAWS "
2696                                     "security.\n"));
2697                                 pf_print_state(state);
2698                                 pf_print_flags(th->th_flags);
2699                                 printf("\n");
2700                         }
2701                 }
2702         }
2703
2704
2705         /*
2706          * Update PAWS values
2707          */
2708         if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
2709             (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) {
2710                 getmicrouptime(&src->scrub->pfss_last);
2711                 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
2712                     (src->scrub->pfss_flags & PFSS_PAWS) == 0)
2713                         src->scrub->pfss_tsval = tsval;
2714
2715                 if (tsecr) {
2716                         if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
2717                             (src->scrub->pfss_flags & PFSS_PAWS) == 0)
2718                                 src->scrub->pfss_tsecr = tsecr;
2719
2720                         if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
2721                             (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
2722                             src->scrub->pfss_tsval0 == 0)) {
2723                                 /* tsval0 MUST be the lowest timestamp */
2724                                 src->scrub->pfss_tsval0 = tsval;
2725                         }
2726
2727                         /* Only fully initialized after a TS gets echoed */
2728                         if ((src->scrub->pfss_flags & PFSS_PAWS) == 0)
2729                                 src->scrub->pfss_flags |= PFSS_PAWS;
2730                 }
2731         }
2732
2733         /* I have a dream....  TCP segment reassembly.... */
2734         return (0);
2735 }
2736
2737 static int
2738 pf_normalize_tcpopt(struct pf_rule *r, int dir, struct pfi_kif *kif,
2739     struct pf_pdesc *pd, struct mbuf *m, struct tcphdr *th, int off,
2740     int *rewrptr)
2741 {
2742 #pragma unused(dir, kif)
2743         sa_family_t af = pd->af;
2744         u_int16_t       *mss;
2745         int             thoff;
2746         int             opt, cnt, optlen = 0;
2747         int             rewrite = 0;
2748         u_char          opts[MAX_TCPOPTLEN];
2749         u_char          *optp = opts;
2750
2751         thoff = th->th_off << 2;
2752         cnt = thoff - sizeof (struct tcphdr);
2753
2754         if (cnt > 0 && !pf_pull_hdr(m, off + sizeof (*th), opts, cnt,
2755             NULL, NULL, af))
2756                 return PF_DROP;
2757
2758         for (; cnt > 0; cnt -= optlen, optp += optlen) {
2759                 opt = optp[0];
2760                 if (opt == TCPOPT_EOL)
2761                         break;
2762                 if (opt == TCPOPT_NOP)
2763                         optlen = 1;
2764                 else {
2765                         if (cnt < 2)
2766                                 break;
2767                         optlen = optp[1];
2768                         if (optlen < 2 || optlen > cnt)
2769                                 break;
2770                 }
2771                 switch (opt) {
2772                 case TCPOPT_MAXSEG:
2773                         mss = (u_int16_t *)(void *)(optp + 2);
2774                         if ((ntohs(*mss)) > r->max_mss) {
2775                                 /*
2776                                  * <jhw@apple.com>
2777                                  *  Only do the TCP checksum fixup if delayed
2778                                  * checksum calculation will not be performed.
2779                                  */
2780                                 if (m->m_pkthdr.rcvif ||
2781                                     !(m->m_pkthdr.csum_flags & CSUM_TCP))
2782                                         th->th_sum = pf_cksum_fixup(th->th_sum,
2783                                             *mss, htons(r->max_mss), 0);
2784                                 *mss = htons(r->max_mss);
2785                                 rewrite = 1;
2786                         }
2787                         break;
2788                 default:
2789                         break;
2790                 }
2791         }
2792
2793         if (rewrite) {
2794                 struct mbuf *mw;
2795                 u_short reason;
2796
2797                 mw = pf_lazy_makewritable(pd, pd->mp,
2798                     off + sizeof (*th) + thoff);
2799                 if (!mw) {
2800                         REASON_SET(&reason, PFRES_MEMORY);
2801                         if (r->log)
2802                                 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason,
2803                                     r, 0, 0, pd);
2804                         return PF_DROP;
2805                 }
2806
2807                 *rewrptr = 1;
2808                 m_copyback(mw, off + sizeof (*th), thoff - sizeof (*th), opts);
2809         }
2810
2811         return PF_PASS;
2812 }