bsd/net/pf_norm.c

   1 /*
   2  * Copyright (c) 2007-2012 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 /*      $apfw: pf_norm.c,v 1.10 2008/08/28 19:10:53 jhw Exp $ */
  30 /*      $OpenBSD: pf_norm.c,v 1.107 2006/04/16 00:59:52 pascoe Exp $ */
  31
  32 /*
  33  * Copyright 2001 Niels Provos <provos@citi.umich.edu>
  34  * All rights reserved.
  35  *
  36  * Redistribution and use in source and binary forms, with or without
  37  * modification, are permitted provided that the following conditions
  38  * are met:
  39  * 1. Redistributions of source code must retain the above copyright
  40  *    notice, this list of conditions and the following disclaimer.
  41  * 2. Redistributions in binary form must reproduce the above copyright
  42  *    notice, this list of conditions and the following disclaimer in the
  43  *    documentation and/or other materials provided with the distribution.
  44  *
  45  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  46  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  47  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  48  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  49  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  50  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  51  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  52  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  53  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  54  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  55  */
  56
  57 #include <sys/param.h>
  58 #include <sys/systm.h>
  59 #include <sys/mbuf.h>
  60 #include <sys/filio.h>
  61 #include <sys/fcntl.h>
  62 #include <sys/socket.h>
  63 #include <sys/kernel.h>
  64 #include <sys/time.h>
  65 #include <sys/random.h>
  66 #include <sys/mcache.h>
  67
  68 #include <net/if.h>
  69 #include <net/if_types.h>
  70 #include <net/bpf.h>
  71 #include <net/route.h>
  72 #include <net/if_pflog.h>
  73
  74 #include <netinet/in.h>
  75 #include <netinet/in_var.h>
  76 #include <netinet/in_systm.h>
  77 #include <netinet/ip.h>
  78 #include <netinet/ip_var.h>
  79 #include <netinet/tcp.h>
  80 #include <netinet/tcp_seq.h>
  81 #include <netinet/tcp_fsm.h>
  82 #include <netinet/udp.h>
  83 #include <netinet/ip_icmp.h>
  84
  85 #if INET6
  86 #include <netinet/ip6.h>
  87 #endif /* INET6 */
  88
  89 #include <net/pfvar.h>
  90
  91 struct pf_frent {
  92         LIST_ENTRY(pf_frent)    fr_next;
  93         struct mbuf             *fr_m;
  94 #define fr_ip           fr_u.fru_ipv4
  95 #define fr_ip6          fr_u.fru_ipv6
  96         union {
  97                 struct ip       *fru_ipv4;
  98                 struct ip6_hdr  *fru_ipv6;
  99         } fr_u;
 100         struct ip6_frag         fr_ip6f_opt;
 101         int                     fr_ip6f_hlen;
 102 };
 103
 104 struct pf_frcache {
 105         LIST_ENTRY(pf_frcache) fr_next;
 106         uint16_t        fr_off;
 107         uint16_t        fr_end;
 108 };
 109
 110 #define PFFRAG_SEENLAST 0x0001          /* Seen the last fragment for this */
 111 #define PFFRAG_NOBUFFER 0x0002          /* Non-buffering fragment cache */
 112 #define PFFRAG_DROP     0x0004          /* Drop all fragments */
 113 #define BUFFER_FRAGMENTS(fr)    (!((fr)->fr_flags & PFFRAG_NOBUFFER))
 114
 115 struct pf_fragment {
 116         RB_ENTRY(pf_fragment) fr_entry;
 117         TAILQ_ENTRY(pf_fragment) frag_next;
 118         struct pf_addr  fr_srcx;
 119         struct pf_addr  fr_dstx;
 120         u_int8_t        fr_p;           /* protocol of this fragment */
 121         u_int8_t        fr_flags;       /* status flags */
 122         u_int16_t       fr_max;         /* fragment data max */
 123 #define fr_id           fr_uid.fru_id4
 124 #define fr_id6          fr_uid.fru_id6
 125         union {
 126                 u_int16_t       fru_id4;
 127                 u_int32_t       fru_id6;
 128         } fr_uid;
 129         int             fr_af;
 130         u_int32_t       fr_timeout;
 131 #define fr_queue        fr_u.fru_queue
 132 #define fr_cache        fr_u.fru_cache
 133         union {
 134                 LIST_HEAD(pf_fragq, pf_frent) fru_queue;        /* buffering */
 135                 LIST_HEAD(pf_cacheq, pf_frcache) fru_cache;     /* non-buf */
 136         } fr_u;
 137 };
 138
 139 static TAILQ_HEAD(pf_fragqueue, pf_fragment)    pf_fragqueue;
 140 static TAILQ_HEAD(pf_cachequeue, pf_fragment)   pf_cachequeue;
 141
 142 static __inline int  pf_frag_compare(struct pf_fragment *,
 143     struct pf_fragment *);
 144 static RB_HEAD(pf_frag_tree, pf_fragment)       pf_frag_tree, pf_cache_tree;
 145 RB_PROTOTYPE_SC(__private_extern__, pf_frag_tree, pf_fragment, fr_entry,
 146     pf_frag_compare);
 147 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
 148
 149 /* Private prototypes */
 150 static void pf_ip6hdr2key(struct pf_fragment *, struct ip6_hdr *,
 151     struct ip6_frag *);
 152 static void pf_ip2key(struct pf_fragment *, struct ip *);
 153 static void pf_remove_fragment(struct pf_fragment *);
 154 static void pf_flush_fragments(void);
 155 static void pf_free_fragment(struct pf_fragment *);
 156 static struct pf_fragment *pf_find_fragment_by_key(struct pf_fragment *,
 157     struct pf_frag_tree *);
 158 static __inline struct pf_fragment *
 159     pf_find_fragment_by_ipv4_header(struct ip *, struct pf_frag_tree *);
 160 static __inline struct pf_fragment *
 161     pf_find_fragment_by_ipv6_header(struct ip6_hdr *, struct ip6_frag *,
 162     struct pf_frag_tree *);
 163 static struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **,
 164     struct pf_frent *, int);
 165 static struct mbuf *pf_fragcache(struct mbuf **, struct ip *,
 166     struct pf_fragment **, int, int, int *);
 167 static struct mbuf *pf_reassemble6(struct mbuf **, struct pf_fragment **,
 168     struct pf_frent *, int);
 169 static struct mbuf *pf_frag6cache(struct mbuf **, struct ip6_hdr*,
 170     struct ip6_frag *, struct pf_fragment **, int, int, int, int *);
 171 static int pf_normalize_tcpopt(struct pf_rule *, int, struct pfi_kif *,
 172     struct pf_pdesc *, struct mbuf *, struct tcphdr *, int, int *);
 173
 174 #define DPFPRINTF(x) do {                               \
 175         if (pf_status.debug >= PF_DEBUG_MISC) {         \
 176                 printf("%s: ", __func__);               \
 177                 printf x ;                              \
 178         }                                               \
 179 } while (0)
 180
 181 /* Globals */
 182 struct pool              pf_frent_pl, pf_frag_pl;
 183 static struct pool       pf_cache_pl, pf_cent_pl;
 184 struct pool              pf_state_scrub_pl;
 185
 186 static int               pf_nfrents, pf_ncache;
 187
 188 void
 189 pf_normalize_init(void)
 190 {
 191         pool_init(&pf_frent_pl, sizeof (struct pf_frent), 0, 0, 0, "pffrent",
 192             NULL);
 193         pool_init(&pf_frag_pl, sizeof (struct pf_fragment), 0, 0, 0, "pffrag",
 194             NULL);
 195         pool_init(&pf_cache_pl, sizeof (struct pf_fragment), 0, 0, 0,
 196             "pffrcache", NULL);
 197         pool_init(&pf_cent_pl, sizeof (struct pf_frcache), 0, 0, 0, "pffrcent",
 198             NULL);
 199         pool_init(&pf_state_scrub_pl, sizeof (struct pf_state_scrub), 0, 0, 0,
 200             "pfstscr", NULL);
 201
 202         pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT);
 203         pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0);
 204         pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0);
 205         pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0);
 206
 207         TAILQ_INIT(&pf_fragqueue);
 208         TAILQ_INIT(&pf_cachequeue);
 209 }
 210
 211 #if 0
 212 void
 213 pf_normalize_destroy(void)
 214 {
 215         pool_destroy(&pf_state_scrub_pl);
 216         pool_destroy(&pf_cent_pl);
 217         pool_destroy(&pf_cache_pl);
 218         pool_destroy(&pf_frag_pl);
 219         pool_destroy(&pf_frent_pl);
 220 }
 221 #endif
 222
 223 int
 224 pf_normalize_isempty(void)
 225 {
 226         return (TAILQ_EMPTY(&pf_fragqueue) && TAILQ_EMPTY(&pf_cachequeue));
 227 }
 228
 229 static __inline int
 230 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
 231 {
 232         int     diff;
 233
 234         if ((diff = a->fr_af - b->fr_af))
 235                 return (diff);
 236         else if ((diff = a->fr_p - b->fr_p))
 237                 return (diff);
 238         else {
 239                 struct pf_addr *sa = &a->fr_srcx;
 240                 struct pf_addr *sb = &b->fr_srcx;
 241                 struct pf_addr *da = &a->fr_dstx;
 242                 struct pf_addr *db = &b->fr_dstx;
 243
 244                 switch (a->fr_af) {
 245 #ifdef INET
 246                 case AF_INET:
 247                         if ((diff = a->fr_id - b->fr_id))
 248                                 return (diff);
 249                         else if (sa->v4.s_addr < sb->v4.s_addr)
 250                                 return (-1);
 251                         else if (sa->v4.s_addr > sb->v4.s_addr)
 252                                 return (1);
 253                         else if (da->v4.s_addr < db->v4.s_addr)
 254                                 return (-1);
 255                         else if (da->v4.s_addr > db->v4.s_addr)
 256                                 return (1);
 257                         break;
 258 #endif
 259 #ifdef INET6
 260                 case AF_INET6:
 261                         if ((diff = a->fr_id6 - b->fr_id6))
 262                                 return (diff);
 263                         else if (sa->addr32[3] < sb->addr32[3])
 264                                 return (-1);
 265                         else if (sa->addr32[3] > sb->addr32[3])
 266                                 return (1);
 267                         else if (sa->addr32[2] < sb->addr32[2])
 268                                 return (-1);
 269                         else if (sa->addr32[2] > sb->addr32[2])
 270                                 return (1);
 271                         else if (sa->addr32[1] < sb->addr32[1])
 272                                 return (-1);
 273                         else if (sa->addr32[1] > sb->addr32[1])
 274                                 return (1);
 275                         else if (sa->addr32[0] < sb->addr32[0])
 276                                 return (-1);
 277                         else if (sa->addr32[0] > sb->addr32[0])
 278                                 return (1);
 279                         else if (da->addr32[3] < db->addr32[3])
 280                                 return (-1);
 281                         else if (da->addr32[3] > db->addr32[3])
 282                                 return (1);
 283                         else if (da->addr32[2] < db->addr32[2])
 284                                 return (-1);
 285                         else if (da->addr32[2] > db->addr32[2])
 286                                 return (1);
 287                         else if (da->addr32[1] < db->addr32[1])
 288                                 return (-1);
 289                         else if (da->addr32[1] > db->addr32[1])
 290                                 return (1);
 291                         else if (da->addr32[0] < db->addr32[0])
 292                                 return (-1);
 293                         else if (da->addr32[0] > db->addr32[0])
 294                                 return (1);
 295                         break;
 296 #endif
 297                 default:
 298                         VERIFY(!0 && "only IPv4 and IPv6 supported!");
 299                         break;
 300                 }
 301         }
 302         return (0);
 303 }
 304
 305 void
 306 pf_purge_expired_fragments(void)
 307 {
 308         struct pf_fragment *frag;
 309         u_int32_t expire = pf_time_second() -
 310             pf_default_rule.timeout[PFTM_FRAG];
 311
 312         while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
 313                 VERIFY(BUFFER_FRAGMENTS(frag));
 314                 if (frag->fr_timeout > expire)
 315                         break;
 316
 317                 switch (frag->fr_af) {
 318                 case AF_INET:
 319                       DPFPRINTF(("expiring IPv4 %d(%p) from queue.\n",
 320                           ntohs(frag->fr_id), frag));
 321                       break;
 322                 case AF_INET6:
 323                       DPFPRINTF(("expiring IPv6 %d(%p) from queue.\n",
 324                           ntohl(frag->fr_id6), frag));
 325                       break;
 326                 default:
 327                       VERIFY(0 && "only IPv4 and IPv6 supported");
 328                       break;
 329                 }
 330                 pf_free_fragment(frag);
 331         }
 332
 333         while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) {
 334                 VERIFY(!BUFFER_FRAGMENTS(frag));
 335                 if (frag->fr_timeout > expire)
 336                         break;
 337
 338                 switch (frag->fr_af) {
 339                 case AF_INET:
 340                       DPFPRINTF(("expiring IPv4 %d(%p) from cache.\n",
 341                           ntohs(frag->fr_id), frag));
 342                       break;
 343                 case AF_INET6:
 344                       DPFPRINTF(("expiring IPv6 %d(%p) from cache.\n",
 345                           ntohl(frag->fr_id6), frag));
 346                       break;
 347                 default:
 348                       VERIFY(0 && "only IPv4 and IPv6 supported");
 349                       break;
 350                 }
 351                 pf_free_fragment(frag);
 352                 VERIFY(TAILQ_EMPTY(&pf_cachequeue) ||
 353                     TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag);
 354         }
 355 }
 356
 357 /*
 358  * Try to flush old fragments to make space for new ones
 359  */
 360
 361 static void
 362 pf_flush_fragments(void)
 363 {
 364         struct pf_fragment      *frag;
 365         int                      goal;
 366
 367         goal = pf_nfrents * 9 / 10;
 368         DPFPRINTF(("trying to free > %d frents\n",
 369             pf_nfrents - goal));
 370         while (goal < pf_nfrents) {
 371                 frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue);
 372                 if (frag == NULL)
 373                         break;
 374                 pf_free_fragment(frag);
 375         }
 376
 377
 378         goal = pf_ncache * 9 / 10;
 379         DPFPRINTF(("trying to free > %d cache entries\n",
 380             pf_ncache - goal));
 381         while (goal < pf_ncache) {
 382                 frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue);
 383                 if (frag == NULL)
 384                         break;
 385                 pf_free_fragment(frag);
 386         }
 387 }
 388
 389 /* Frees the fragments and all associated entries */
 390
 391 static void
 392 pf_free_fragment(struct pf_fragment *frag)
 393 {
 394         struct pf_frent         *frent;
 395         struct pf_frcache       *frcache;
 396
 397         /* Free all fragments */
 398         if (BUFFER_FRAGMENTS(frag)) {
 399                 for (frent = LIST_FIRST(&frag->fr_queue); frent;
 400                     frent = LIST_FIRST(&frag->fr_queue)) {
 401                         LIST_REMOVE(frent, fr_next);
 402
 403                         m_freem(frent->fr_m);
 404                         pool_put(&pf_frent_pl, frent);
 405                         pf_nfrents--;
 406                 }
 407         } else {
 408                 for (frcache = LIST_FIRST(&frag->fr_cache); frcache;
 409                     frcache = LIST_FIRST(&frag->fr_cache)) {
 410                         LIST_REMOVE(frcache, fr_next);
 411
 412                         VERIFY(LIST_EMPTY(&frag->fr_cache) ||
 413                             LIST_FIRST(&frag->fr_cache)->fr_off >
 414                             frcache->fr_end);
 415
 416                         pool_put(&pf_cent_pl, frcache);
 417                         pf_ncache--;
 418                 }
 419         }
 420
 421         pf_remove_fragment(frag);
 422 }
 423
 424 static void
 425 pf_ip6hdr2key(struct pf_fragment *key, struct ip6_hdr *ip6,
 426     struct ip6_frag *fh)
 427 {
 428         key->fr_p = fh->ip6f_nxt;
 429         key->fr_id6 = fh->ip6f_ident;
 430         key->fr_af = AF_INET6;
 431         key->fr_srcx.v6 = ip6->ip6_src;
 432         key->fr_dstx.v6 = ip6->ip6_dst;
 433 }
 434
 435 static void
 436 pf_ip2key(struct pf_fragment *key, struct ip *ip)
 437 {
 438         key->fr_p = ip->ip_p;
 439         key->fr_id = ip->ip_id;
 440         key->fr_af = AF_INET;
 441         key->fr_srcx.v4.s_addr = ip->ip_src.s_addr;
 442         key->fr_dstx.v4.s_addr = ip->ip_dst.s_addr;
 443 }
 444
 445 static struct pf_fragment *
 446 pf_find_fragment_by_key(struct pf_fragment *key, struct pf_frag_tree *tree)
 447 {
 448         struct pf_fragment *frag;
 449
 450         frag = RB_FIND(pf_frag_tree, tree, key);
 451         if (frag != NULL) {
 452                 /* XXX Are we sure we want to update the timeout? */
 453                 frag->fr_timeout = pf_time_second();
 454                 if (BUFFER_FRAGMENTS(frag)) {
 455                         TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
 456                         TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
 457                 } else {
 458                         TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
 459                         TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next);
 460                 }
 461         }
 462
 463         return (frag);
 464 }
 465
 466 static __inline struct pf_fragment *
 467 pf_find_fragment_by_ipv4_header(struct ip *ip, struct pf_frag_tree *tree)
 468 {
 469         struct pf_fragment key;
 470         pf_ip2key(&key, ip);
 471         return pf_find_fragment_by_key(&key, tree);
 472 }
 473
 474 static __inline struct pf_fragment *
 475 pf_find_fragment_by_ipv6_header(struct ip6_hdr *ip6, struct ip6_frag *fh,
 476     struct pf_frag_tree *tree)
 477 {
 478       struct pf_fragment key;
 479       pf_ip6hdr2key(&key, ip6, fh);
 480       return pf_find_fragment_by_key(&key, tree);
 481 }
 482
 483 /* Removes a fragment from the fragment queue and frees the fragment */
 484
 485 static void
 486 pf_remove_fragment(struct pf_fragment *frag)
 487 {
 488         if (BUFFER_FRAGMENTS(frag)) {
 489                 RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag);
 490                 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
 491                 pool_put(&pf_frag_pl, frag);
 492         } else {
 493                 RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag);
 494                 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
 495                 pool_put(&pf_cache_pl, frag);
 496         }
 497 }
 498
 499 #define FR_IP_OFF(fr)   ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
 500 static struct mbuf *
 501 pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
 502     struct pf_frent *frent, int mff)
 503 {
 504         struct mbuf     *m = *m0, *m2;
 505         struct pf_frent *frea, *next;
 506         struct pf_frent *frep = NULL;
 507         struct ip       *ip = frent->fr_ip;
 508         int              hlen = ip->ip_hl << 2;
 509         u_int16_t        off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
 510         u_int16_t        ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4;
 511         u_int16_t        fr_max = ip_len + off;
 512
 513         VERIFY(*frag == NULL || BUFFER_FRAGMENTS(*frag));
 514
 515         /* Strip off ip header */
 516         m->m_data += hlen;
 517         m->m_len -= hlen;
 518
 519         /* Create a new reassembly queue for this packet */
 520         if (*frag == NULL) {
 521                 *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
 522                 if (*frag == NULL) {
 523                         pf_flush_fragments();
 524                         *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
 525                         if (*frag == NULL)
 526                                 goto drop_fragment;
 527                 }
 528
 529                 (*frag)->fr_flags = 0;
 530                 (*frag)->fr_max = 0;
 531                 (*frag)->fr_af = AF_INET;
 532                 (*frag)->fr_srcx.v4 = frent->fr_ip->ip_src;
 533                 (*frag)->fr_dstx.v4 = frent->fr_ip->ip_dst;
 534                 (*frag)->fr_p = frent->fr_ip->ip_p;
 535                 (*frag)->fr_id = frent->fr_ip->ip_id;
 536                 (*frag)->fr_timeout = pf_time_second();
 537                 LIST_INIT(&(*frag)->fr_queue);
 538
 539                 RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
 540                 TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
 541
 542                 /* We do not have a previous fragment */
 543                 frep = NULL;
 544                 goto insert;
 545         }
 546
 547         /*
 548          * Find a fragment after the current one:
 549          *  - off contains the real shifted offset.
 550          */
 551         LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
 552                 if (FR_IP_OFF(frea) > off)
 553                         break;
 554                 frep = frea;
 555         }
 556
 557         VERIFY(frep != NULL || frea != NULL);
 558
 559         if (frep != NULL &&
 560             FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl *
 561             4 > off) {
 562                 u_int16_t       precut;
 563
 564                 precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
 565                     frep->fr_ip->ip_hl * 4 - off;
 566                 if (precut >= ip_len)
 567                         goto drop_fragment;
 568                 m_adj(frent->fr_m, precut);
 569                 DPFPRINTF(("overlap -%d\n", precut));
 570                 /* Enforce 8 byte boundaries */
 571                 ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3));
 572                 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
 573                 ip_len -= precut;
 574                 ip->ip_len = htons(ip_len);
 575         }
 576
 577         for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
 578             frea = next) {
 579                 u_int16_t       aftercut;
 580
 581                 aftercut = ip_len + off - FR_IP_OFF(frea);
 582                 DPFPRINTF(("adjust overlap %d\n", aftercut));
 583                 if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl
 584                     * 4) {
 585                         frea->fr_ip->ip_len =
 586                             htons(ntohs(frea->fr_ip->ip_len) - aftercut);
 587                         frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) +
 588                             (aftercut >> 3));
 589                         m_adj(frea->fr_m, aftercut);
 590                         break;
 591                 }
 592
 593                 /* This fragment is completely overlapped, lose it */
 594                 next = LIST_NEXT(frea, fr_next);
 595                 m_freem(frea->fr_m);
 596                 LIST_REMOVE(frea, fr_next);
 597                 pool_put(&pf_frent_pl, frea);
 598                 pf_nfrents--;
 599         }
 600
 601 insert:
 602         /* Update maximum data size */
 603         if ((*frag)->fr_max < fr_max)
 604                 (*frag)->fr_max = fr_max;
 605         /* This is the last segment */
 606         if (!mff)
 607                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
 608
 609         if (frep == NULL)
 610                 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
 611         else
 612                 LIST_INSERT_AFTER(frep, frent, fr_next);
 613
 614         /* Check if we are completely reassembled */
 615         if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
 616                 return (NULL);
 617
 618         /* Check if we have all the data */
 619         off = 0;
 620         for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
 621                 next = LIST_NEXT(frep, fr_next);
 622
 623                 off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4;
 624                 if (off < (*frag)->fr_max &&
 625                     (next == NULL || FR_IP_OFF(next) != off)) {
 626                         DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
 627                             off, next == NULL ? -1 : FR_IP_OFF(next),
 628                             (*frag)->fr_max));
 629                         return (NULL);
 630                 }
 631         }
 632         DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
 633         if (off < (*frag)->fr_max)
 634                 return (NULL);
 635
 636         /* We have all the data */
 637         frent = LIST_FIRST(&(*frag)->fr_queue);
 638         VERIFY(frent != NULL);
 639         if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) {
 640                 DPFPRINTF(("drop: too big: %d\n", off));
 641                 pf_free_fragment(*frag);
 642                 *frag = NULL;
 643                 return (NULL);
 644         }
 645         next = LIST_NEXT(frent, fr_next);
 646
 647         /* Magic from ip_input */
 648         ip = frent->fr_ip;
 649         m = frent->fr_m;
 650         m2 = m->m_next;
 651         m->m_next = NULL;
 652         m_cat(m, m2);
 653         pool_put(&pf_frent_pl, frent);
 654         pf_nfrents--;
 655         for (frent = next; frent != NULL; frent = next) {
 656                 next = LIST_NEXT(frent, fr_next);
 657
 658                 m2 = frent->fr_m;
 659                 pool_put(&pf_frent_pl, frent);
 660                 pf_nfrents--;
 661                 m_cat(m, m2);
 662         }
 663
 664         ip->ip_src = (*frag)->fr_srcx.v4;
 665         ip->ip_dst = (*frag)->fr_dstx.v4;
 666
 667         /* Remove from fragment queue */
 668         pf_remove_fragment(*frag);
 669         *frag = NULL;
 670
 671         hlen = ip->ip_hl << 2;
 672         ip->ip_len = htons(off + hlen);
 673         m->m_len += hlen;
 674         m->m_data -= hlen;
 675
 676         /* some debugging cruft by sklower, below, will go away soon */
 677         /* XXX this should be done elsewhere */
 678         if (m->m_flags & M_PKTHDR) {
 679                 int plen = 0;
 680                 for (m2 = m; m2; m2 = m2->m_next)
 681                         plen += m2->m_len;
 682                 m->m_pkthdr.len = plen;
 683         }
 684
 685         DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len)));
 686         return (m);
 687
 688 drop_fragment:
 689         /* Oops - fail safe - drop packet */
 690         pool_put(&pf_frent_pl, frent);
 691         pf_nfrents--;
 692         m_freem(m);
 693         return (NULL);
 694 }
 695
 696 static struct mbuf *
 697 pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
 698     int drop, int *nomem)
 699 {
 700         struct mbuf             *m = *m0;
 701         struct pf_frcache       *frp, *fra, *cur = NULL;
 702         int                      ip_len = ntohs(h->ip_len) - (h->ip_hl << 2);
 703         u_int16_t                off = ntohs(h->ip_off) << 3;
 704         u_int16_t                fr_max = ip_len + off;
 705         int                      hosed = 0;
 706
 707         VERIFY(*frag == NULL || !BUFFER_FRAGMENTS(*frag));
 708
 709         /* Create a new range queue for this packet */
 710         if (*frag == NULL) {
 711                 *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
 712                 if (*frag == NULL) {
 713                         pf_flush_fragments();
 714                         *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
 715                         if (*frag == NULL)
 716                                 goto no_mem;
 717                 }
 718
 719                 /* Get an entry for the queue */
 720                 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
 721                 if (cur == NULL) {
 722                         pool_put(&pf_cache_pl, *frag);
 723                         *frag = NULL;
 724                         goto no_mem;
 725                 }
 726                 pf_ncache++;
 727
 728                 (*frag)->fr_flags = PFFRAG_NOBUFFER;
 729                 (*frag)->fr_max = 0;
 730                 (*frag)->fr_af = AF_INET;
 731                 (*frag)->fr_srcx.v4 = h->ip_src;
 732                 (*frag)->fr_dstx.v4 = h->ip_dst;
 733                 (*frag)->fr_p = h->ip_p;
 734                 (*frag)->fr_id = h->ip_id;
 735                 (*frag)->fr_timeout = pf_time_second();
 736
 737                 cur->fr_off = off;
 738                 cur->fr_end = fr_max;
 739                 LIST_INIT(&(*frag)->fr_cache);
 740                 LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
 741
 742                 RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
 743                 TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
 744
 745                 DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off,
 746                     fr_max));
 747
 748                 goto pass;
 749         }
 750
 751         /*
 752          * Find a fragment after the current one:
 753          *  - off contains the real shifted offset.
 754          */
 755         frp = NULL;
 756         LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
 757                 if (fra->fr_off > off)
 758                         break;
 759                 frp = fra;
 760         }
 761
 762         VERIFY(frp != NULL || fra != NULL);
 763
 764         if (frp != NULL) {
 765                 int     precut;
 766
 767                 precut = frp->fr_end - off;
 768                 if (precut >= ip_len) {
 769                         /* Fragment is entirely a duplicate */
 770                         DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
 771                             h->ip_id, frp->fr_off, frp->fr_end, off, fr_max));
 772                         goto drop_fragment;
 773                 }
 774                 if (precut == 0) {
 775                         /* They are adjacent.  Fixup cache entry */
 776                         DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
 777                             h->ip_id, frp->fr_off, frp->fr_end, off, fr_max));
 778                         frp->fr_end = fr_max;
 779                 } else if (precut > 0) {
 780                         /*
 781                          * The first part of this payload overlaps with a
 782                          * fragment that has already been passed.
 783                          * Need to trim off the first part of the payload.
 784                          * But to do so easily, we need to create another
 785                          * mbuf to throw the original header into.
 786                          */
 787
 788                         DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
 789                             h->ip_id, precut, frp->fr_off, frp->fr_end, off,
 790                             fr_max));
 791
 792                         off += precut;
 793                         fr_max -= precut;
 794                         /* Update the previous frag to encompass this one */
 795                         frp->fr_end = fr_max;
 796
 797                         if (!drop) {
 798                                 /*
 799                                  * XXX Optimization opportunity
 800                                  * This is a very heavy way to trim the payload.
 801                                  * we could do it much faster by diddling mbuf
 802                                  * internals but that would be even less legible
 803                                  * than this mbuf magic.  For my next trick,
 804                                  * I'll pull a rabbit out of my laptop.
 805                                  */
 806                                 *m0 = m_copym(m, 0, h->ip_hl << 2, M_NOWAIT);
 807                                 if (*m0 == NULL)
 808                                         goto no_mem;
 809                                 VERIFY((*m0)->m_next == NULL);
 810                                 m_adj(m, precut + (h->ip_hl << 2));
 811                                 m_cat(*m0, m);
 812                                 m = *m0;
 813                                 if (m->m_flags & M_PKTHDR) {
 814                                         int plen = 0;
 815                                         struct mbuf *t;
 816                                         for (t = m; t; t = t->m_next)
 817                                                 plen += t->m_len;
 818                                         m->m_pkthdr.len = plen;
 819                                 }
 820
 821
 822                                 h = mtod(m, struct ip *);
 823
 824
 825                                 VERIFY((int)m->m_len ==
 826                                     ntohs(h->ip_len) - precut);
 827                                 h->ip_off = htons(ntohs(h->ip_off) +
 828                                     (precut >> 3));
 829                                 h->ip_len = htons(ntohs(h->ip_len) - precut);
 830                         } else {
 831                                 hosed++;
 832                         }
 833                 } else {
 834                         /* There is a gap between fragments */
 835
 836                         DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
 837                             h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
 838                             fr_max));
 839
 840                         cur = pool_get(&pf_cent_pl, PR_NOWAIT);
 841                         if (cur == NULL)
 842                                 goto no_mem;
 843                         pf_ncache++;
 844
 845                         cur->fr_off = off;
 846                         cur->fr_end = fr_max;
 847                         LIST_INSERT_AFTER(frp, cur, fr_next);
 848                 }
 849         }
 850
 851         if (fra != NULL) {
 852                 int     aftercut;
 853                 int     merge = 0;
 854
 855                 aftercut = fr_max - fra->fr_off;
 856                 if (aftercut == 0) {
 857                         /* Adjacent fragments */
 858                         DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
 859                             h->ip_id, off, fr_max, fra->fr_off, fra->fr_end));
 860                         fra->fr_off = off;
 861                         merge = 1;
 862                 } else if (aftercut > 0) {
 863                         /* Need to chop off the tail of this fragment */
 864                         DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
 865                             h->ip_id, aftercut, off, fr_max, fra->fr_off,
 866                             fra->fr_end));
 867                         fra->fr_off = off;
 868                         fr_max -= aftercut;
 869
 870                         merge = 1;
 871
 872                         if (!drop) {
 873                                 m_adj(m, -aftercut);
 874                                 if (m->m_flags & M_PKTHDR) {
 875                                         int plen = 0;
 876                                         struct mbuf *t;
 877                                         for (t = m; t; t = t->m_next)
 878                                                 plen += t->m_len;
 879                                         m->m_pkthdr.len = plen;
 880                                 }
 881                                 h = mtod(m, struct ip *);
 882                                 VERIFY((int)m->m_len ==
 883                                     ntohs(h->ip_len) - aftercut);
 884                                 h->ip_len = htons(ntohs(h->ip_len) - aftercut);
 885                         } else {
 886                                 hosed++;
 887                         }
 888                 } else if (frp == NULL) {
 889                         /* There is a gap between fragments */
 890                         DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
 891                             h->ip_id, -aftercut, off, fr_max, fra->fr_off,
 892                             fra->fr_end));
 893
 894                         cur = pool_get(&pf_cent_pl, PR_NOWAIT);
 895                         if (cur == NULL)
 896                                 goto no_mem;
 897                         pf_ncache++;
 898
 899                         cur->fr_off = off;
 900                         cur->fr_end = fr_max;
 901                         LIST_INSERT_BEFORE(fra, cur, fr_next);
 902                 }
 903
 904
 905                 /* Need to glue together two separate fragment descriptors */
 906                 if (merge) {
 907                         if (cur && fra->fr_off <= cur->fr_end) {
 908                                 /* Need to merge in a previous 'cur' */
 909                                 DPFPRINTF(("fragcache[%d]: adjacent(merge "
 910                                     "%d-%d) %d-%d (%d-%d)\n",
 911                                     h->ip_id, cur->fr_off, cur->fr_end, off,
 912                                     fr_max, fra->fr_off, fra->fr_end));
 913                                 fra->fr_off = cur->fr_off;
 914                                 LIST_REMOVE(cur, fr_next);
 915                                 pool_put(&pf_cent_pl, cur);
 916                                 pf_ncache--;
 917                                 cur = NULL;
 918
 919                         } else if (frp && fra->fr_off <= frp->fr_end) {
 920                                 /* Need to merge in a modified 'frp' */
 921                                 VERIFY(cur == NULL);
 922                                 DPFPRINTF(("fragcache[%d]: adjacent(merge "
 923                                     "%d-%d) %d-%d (%d-%d)\n",
 924                                     h->ip_id, frp->fr_off, frp->fr_end, off,
 925                                     fr_max, fra->fr_off, fra->fr_end));
 926                                 fra->fr_off = frp->fr_off;
 927                                 LIST_REMOVE(frp, fr_next);
 928                                 pool_put(&pf_cent_pl, frp);
 929                                 pf_ncache--;
 930                                 frp = NULL;
 931
 932                         }
 933                 }
 934         }
 935
 936         if (hosed) {
 937                 /*
 938                  * We must keep tracking the overall fragment even when
 939                  * we're going to drop it anyway so that we know when to
 940                  * free the overall descriptor.  Thus we drop the frag late.
 941                  */
 942                 goto drop_fragment;
 943         }
 944
 945
 946 pass:
 947         /* Update maximum data size */
 948         if ((*frag)->fr_max < fr_max)
 949                 (*frag)->fr_max = fr_max;
 950
 951         /* This is the last segment */
 952         if (!mff)
 953                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
 954
 955         /* Check if we are completely reassembled */
 956         if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
 957             LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
 958             LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) {
 959                 /* Remove from fragment queue */
 960                 DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
 961                     (*frag)->fr_max));
 962                 pf_free_fragment(*frag);
 963                 *frag = NULL;
 964         }
 965
 966         return (m);
 967
 968 no_mem:
 969         *nomem = 1;
 970
 971         /* Still need to pay attention to !IP_MF */
 972         if (!mff && *frag != NULL)
 973                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
 974
 975         m_freem(m);
 976         return (NULL);
 977
 978 drop_fragment:
 979
 980         /* Still need to pay attention to !IP_MF */
 981         if (!mff && *frag != NULL)
 982                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
 983
 984         if (drop) {
 985                 /* This fragment has been deemed bad.  Don't reass */
 986                 if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
 987                         DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
 988                             h->ip_id));
 989                 (*frag)->fr_flags |= PFFRAG_DROP;
 990         }
 991
 992         m_freem(m);
 993         return (NULL);
 994 }
 995
 996 #define FR_IP6_OFF(fr) \
 997         (ntohs((fr)->fr_ip6f_opt.ip6f_offlg & IP6F_OFF_MASK))
 998 #define FR_IP6_PLEN(fr) (ntohs((fr)->fr_ip6->ip6_plen))
 999 struct mbuf *
1000 pf_reassemble6(struct mbuf **m0, struct pf_fragment **frag,
1001     struct pf_frent *frent, int mff)
1002 {
1003         struct mbuf *m, *m2;
1004         struct pf_frent *frea, *frep, *next;
1005         struct ip6_hdr *ip6;
1006         int plen, off, fr_max;
1007
1008         VERIFY(*frag == NULL || BUFFER_FRAGMENTS(*frag));
1009         m = *m0;
1010         frep = NULL;
1011         ip6 = frent->fr_ip6;
1012         off = FR_IP6_OFF(frent);
1013         plen = FR_IP6_PLEN(frent);
1014         fr_max = off + plen - (frent->fr_ip6f_hlen - sizeof *ip6);
1015
1016         DPFPRINTF(("%p IPv6 frag plen %u off %u fr_ip6f_hlen %u fr_max %u m_len %u\n", m,
1017                 plen, off, frent->fr_ip6f_hlen, fr_max, m->m_len));
1018
1019         /* strip off headers up to the fragment payload */
1020         m->m_data += frent->fr_ip6f_hlen;
1021         m->m_len -= frent->fr_ip6f_hlen;
1022
1023         /* Create a new reassembly queue for this packet */
1024         if (*frag == NULL) {
1025                 *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
1026                 if (*frag == NULL) {
1027                         pf_flush_fragments();
1028                         *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
1029                         if (*frag == NULL)
1030                                 goto drop_fragment;
1031                 }
1032
1033                 (*frag)->fr_flags = 0;
1034                 (*frag)->fr_max = 0;
1035                 (*frag)->fr_af = AF_INET6;
1036                 (*frag)->fr_srcx.v6 = frent->fr_ip6->ip6_src;
1037                 (*frag)->fr_dstx.v6 = frent->fr_ip6->ip6_dst;
1038                 (*frag)->fr_p = frent->fr_ip6f_opt.ip6f_nxt;
1039                 (*frag)->fr_id6 = frent->fr_ip6f_opt.ip6f_ident;
1040                 (*frag)->fr_timeout = pf_time_second();
1041                 LIST_INIT(&(*frag)->fr_queue);
1042
1043                 RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
1044                 TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
1045
1046                 /* We do not have a previous fragment */
1047                 frep = NULL;
1048                 goto insert;
1049         }
1050
1051         /*
1052          * Find a fragment after the current one:
1053          *  - off contains the real shifted offset.
1054          */
1055         LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
1056                 if (FR_IP6_OFF(frea) > off)
1057                         break;
1058                 frep = frea;
1059         }
1060
1061         VERIFY(frep != NULL || frea != NULL);
1062
1063         if (frep != NULL &&
1064             FR_IP6_OFF(frep) + FR_IP6_PLEN(frep) - frep->fr_ip6f_hlen > off)
1065         {
1066                 u_int16_t precut;
1067
1068                 precut = FR_IP6_OFF(frep) + FR_IP6_PLEN(frep) -
1069                     frep->fr_ip6f_hlen - off;
1070                 if (precut >= plen)
1071                         goto drop_fragment;
1072                 m_adj(frent->fr_m, precut);
1073                 DPFPRINTF(("overlap -%d\n", precut));
1074                 /* Enforce 8 byte boundaries */
1075                 frent->fr_ip6f_opt.ip6f_offlg =
1076                     htons(ntohs(frent->fr_ip6f_opt.ip6f_offlg) +
1077                     (precut >> 3));
1078                 off = FR_IP6_OFF(frent);
1079                 plen -= precut;
1080                 ip6->ip6_plen = htons(plen);
1081         }
1082
1083         for (; frea != NULL && plen + off > FR_IP6_OFF(frea); frea = next) {
1084                 u_int16_t       aftercut;
1085
1086                 aftercut = plen + off - FR_IP6_OFF(frea);
1087                 DPFPRINTF(("adjust overlap %d\n", aftercut));
1088                 if (aftercut < FR_IP6_PLEN(frea) - frea->fr_ip6f_hlen) {
1089                         frea->fr_ip6->ip6_plen = htons(FR_IP6_PLEN(frea) -
1090                                 aftercut);
1091                         frea->fr_ip6f_opt.ip6f_offlg =
1092                             htons(ntohs(frea->fr_ip6f_opt.ip6f_offlg) +
1093                             (aftercut >> 3));
1094                         m_adj(frea->fr_m, aftercut);
1095                         break;
1096                 }
1097
1098                 /* This fragment is completely overlapped, lose it */
1099                 next = LIST_NEXT(frea, fr_next);
1100                 m_freem(frea->fr_m);
1101                 LIST_REMOVE(frea, fr_next);
1102                 pool_put(&pf_frent_pl, frea);
1103                 pf_nfrents--;
1104         }
1105
1106   insert:
1107         /* Update maximum data size */
1108         if ((*frag)->fr_max < fr_max)
1109                 (*frag)->fr_max = fr_max;
1110         /* This is the last segment */
1111         if (!mff)
1112                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1113
1114         if (frep == NULL)
1115                 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
1116         else
1117                 LIST_INSERT_AFTER(frep, frent, fr_next);
1118
1119         /* Check if we are completely reassembled */
1120         if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
1121                 return (NULL);
1122
1123         /* Check if we have all the data */
1124         off = 0;
1125         for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
1126                 next = LIST_NEXT(frep, fr_next);
1127                 off += FR_IP6_PLEN(frep) - (frent->fr_ip6f_hlen - sizeof *ip6);
1128                 DPFPRINTF(("frep at %d, next %d, max %d\n",
1129                         off, next == NULL ? -1 : FR_IP6_OFF(next),
1130                         (*frag)->fr_max));
1131                 if (off < (*frag)->fr_max &&
1132                     (next == NULL || FR_IP6_OFF(next) != off)) {
1133                         DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
1134                             off, next == NULL ? -1 : FR_IP6_OFF(next),
1135                             (*frag)->fr_max));
1136                         return (NULL);
1137                 }
1138         }
1139         DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
1140         if (off < (*frag)->fr_max)
1141                 return (NULL);
1142
1143         /* We have all the data */
1144         frent = LIST_FIRST(&(*frag)->fr_queue);
1145         VERIFY(frent != NULL);
1146         if (frent->fr_ip6f_hlen + off > IP_MAXPACKET) {
1147                 DPFPRINTF(("drop: too big: %d\n", off));
1148                 pf_free_fragment(*frag);
1149                 *frag = NULL;
1150                 return (NULL);
1151         }
1152
1153         ip6 = frent->fr_ip6;
1154         ip6->ip6_nxt = (*frag)->fr_p;
1155         ip6->ip6_plen = htons(off);
1156         ip6->ip6_src = (*frag)->fr_srcx.v6;
1157         ip6->ip6_dst = (*frag)->fr_dstx.v6;
1158
1159         /* Remove from fragment queue */
1160         pf_remove_fragment(*frag);
1161         *frag = NULL;
1162
1163         m = frent->fr_m;
1164         m->m_len += sizeof(struct ip6_hdr);
1165         m->m_data -= sizeof(struct ip6_hdr);
1166         memmove(m->m_data, ip6, sizeof(struct ip6_hdr));
1167
1168         next = LIST_NEXT(frent, fr_next);
1169         pool_put(&pf_frent_pl, frent);
1170         pf_nfrents--;
1171         for (frent = next; next != NULL; frent = next) {
1172                 m2 = frent->fr_m;
1173
1174                 m_cat(m, m2);
1175                 next = LIST_NEXT(frent, fr_next);
1176                 pool_put(&pf_frent_pl, frent);
1177                 pf_nfrents--;
1178         }
1179
1180         /* XXX this should be done elsewhere */
1181         if (m->m_flags & M_PKTHDR) {
1182                 int pktlen = 0;
1183                 for (m2 = m; m2; m2 = m2->m_next)
1184                         pktlen += m2->m_len;
1185                 m->m_pkthdr.len = pktlen;
1186         }
1187
1188         DPFPRINTF(("complete: %p ip6_plen %d m_pkthdr.len %d\n",
1189                 m, ntohs(ip6->ip6_plen), m->m_pkthdr.len));
1190
1191         return m;
1192
1193  drop_fragment:
1194         /* Oops - fail safe - drop packet */
1195         pool_put(&pf_frent_pl, frent);
1196         --pf_nfrents;
1197         m_freem(m);
1198         return NULL;
1199 }
1200
1201 static struct mbuf *
1202 pf_frag6cache(struct mbuf **m0, struct ip6_hdr *h, struct ip6_frag *fh,
1203     struct pf_fragment **frag, int hlen, int mff, int drop, int *nomem)
1204 {
1205         struct mbuf *m = *m0;
1206         u_int16_t plen, off, fr_max;
1207         struct pf_frcache *frp, *fra, *cur = NULL;
1208         int hosed = 0;
1209
1210         VERIFY(*frag == NULL || !BUFFER_FRAGMENTS(*frag));
1211         m = *m0;
1212         off = ntohs(fh->ip6f_offlg & IP6F_OFF_MASK);
1213         plen = ntohs(h->ip6_plen) - (hlen - sizeof *h);
1214
1215         /*
1216          * Apple Modification: dimambro@apple.com. The hlen, being passed
1217          * into this function Includes all the headers associated with
1218          * the packet, and may include routing headers, so to get to
1219          * the data payload as stored in the original IPv6 header we need
1220          * to subtract al those headers and the IP header.
1221          *
1222          * The 'max' local variable should also contain the offset from the start
1223          * of the reassembled packet to the octet just past the end of the octets
1224          * in the current fragment where:
1225          * - 'off' is the offset from the start of the reassembled packet to the
1226          *    first octet in the fragment,
1227          * - 'plen' is the length of the "payload data length" Excluding all the
1228          *   IPv6 headers of the fragment.
1229          * - 'hlen' is computed in pf_normalize_ip6() as the offset from the start
1230          *   of the IPv6 packet to the beginning of the data.
1231          */
1232         fr_max = off + plen;
1233
1234         DPFPRINTF(("%p plen %u off %u fr_max %u\n", m,
1235                 plen, off, fr_max));
1236
1237         /* Create a new range queue for this packet */
1238         if (*frag == NULL) {
1239                 *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
1240                 if (*frag == NULL) {
1241                         pf_flush_fragments();
1242                         *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
1243                         if (*frag == NULL)
1244                                 goto no_mem;
1245                 }
1246
1247                 /* Get an entry for the queue */
1248                 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1249                 if (cur == NULL) {
1250                         pool_put(&pf_cache_pl, *frag);
1251                         *frag = NULL;
1252                         goto no_mem;
1253                 }
1254                 pf_ncache++;
1255
1256                 (*frag)->fr_flags = PFFRAG_NOBUFFER;
1257                 (*frag)->fr_max = 0;
1258                 (*frag)->fr_af = AF_INET6;
1259                 (*frag)->fr_srcx.v6 = h->ip6_src;
1260                 (*frag)->fr_dstx.v6 = h->ip6_dst;
1261                 (*frag)->fr_p = fh->ip6f_nxt;
1262                 (*frag)->fr_id6 = fh->ip6f_ident;
1263                 (*frag)->fr_timeout = pf_time_second();
1264
1265                 cur->fr_off = off;
1266                 cur->fr_end = fr_max;
1267                 LIST_INIT(&(*frag)->fr_cache);
1268                 LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
1269
1270                 RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
1271                 TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
1272
1273                 DPFPRINTF(("frag6cache[%d]: new %d-%d\n", ntohl(fh->ip6f_ident),
1274                     off, fr_max));
1275
1276                 goto pass;
1277         }
1278
1279         /*
1280          * Find a fragment after the current one:
1281          *  - off contains the real shifted offset.
1282          */
1283         frp = NULL;
1284         LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
1285                 if (fra->fr_off > off)
1286                         break;
1287                 frp = fra;
1288         }
1289
1290         VERIFY(frp != NULL || fra != NULL);
1291
1292         if (frp != NULL) {
1293                 int precut;
1294
1295                 precut = frp->fr_end - off;
1296                 if (precut >= plen) {
1297                         /* Fragment is entirely a duplicate */
1298                         DPFPRINTF(("frag6cache[%u]: dead (%d-%d) %d-%d\n",
1299                             ntohl(fh->ip6f_ident), frp->fr_off, frp->fr_end,
1300                             off, fr_max));
1301                         goto drop_fragment;
1302                 }
1303                 if (precut == 0) {
1304                         /* They are adjacent.  Fixup cache entry */
1305                         DPFPRINTF(("frag6cache[%u]: adjacent (%d-%d) %d-%d\n",
1306                             ntohl(fh->ip6f_ident), frp->fr_off, frp->fr_end,
1307                             off, fr_max));
1308                         frp->fr_end = fr_max;
1309                 } else if (precut > 0) {
1310                         /* The first part of this payload overlaps with a
1311                          * fragment that has already been passed.
1312                          * Need to trim off the first part of the payload.
1313                          * But to do so easily, we need to create another
1314                          * mbuf to throw the original header into.
1315                          */
1316
1317                         DPFPRINTF(("frag6cache[%u]: chop %d (%d-%d) %d-%d\n",
1318                             ntohl(fh->ip6f_ident), precut, frp->fr_off,
1319                             frp->fr_end, off, fr_max));
1320
1321                         off += precut;
1322                         fr_max -= precut;
1323                         /* Update the previous frag to encompass this one */
1324                         frp->fr_end = fr_max;
1325
1326                         if (!drop) {
1327                                 /* XXX Optimization opportunity
1328                                  * This is a very heavy way to trim the payload.
1329                                  * we could do it much faster by diddling mbuf
1330                                  * internals but that would be even less legible
1331                                  * than this mbuf magic.  For my next trick,
1332                                  * I'll pull a rabbit out of my laptop.
1333                                  */
1334                                 *m0 = m_copym(m, 0, hlen, M_NOWAIT);
1335                                 if (*m0 == NULL)
1336                                         goto no_mem;
1337                                 VERIFY((*m0)->m_next == NULL);
1338                                 m_adj(m, precut + hlen);
1339                                 m_cat(*m0, m);
1340                                 m = *m0;
1341                                 if (m->m_flags & M_PKTHDR) {
1342                                         int pktlen = 0;
1343                                         struct mbuf *t;
1344                                         for (t = m; t; t = t->m_next)
1345                                                 pktlen += t->m_len;
1346                                         m->m_pkthdr.len = pktlen;
1347                                 }
1348
1349                                 h = mtod(m, struct ip6_hdr *);
1350
1351                                 VERIFY((int)m->m_len ==
1352                                     ntohs(h->ip6_plen) - precut);
1353                                 fh->ip6f_offlg &= ~IP6F_OFF_MASK;
1354                                 fh->ip6f_offlg |=
1355                                     htons(ntohs(fh->ip6f_offlg & IP6F_OFF_MASK)
1356                                     + (precut >> 3));
1357                                 h->ip6_plen = htons(ntohs(h->ip6_plen) -
1358                                     precut);
1359                         } else {
1360                                 hosed++;
1361                         }
1362                 } else {
1363                         /* There is a gap between fragments */
1364
1365                         DPFPRINTF(("frag6cache[%u]: gap %d (%d-%d) %d-%d\n",
1366                             ntohl(fh->ip6f_ident), -precut, frp->fr_off,
1367                             frp->fr_end, off, fr_max));
1368
1369                         cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1370                         if (cur == NULL)
1371                                 goto no_mem;
1372                         pf_ncache++;
1373
1374                         cur->fr_off = off;
1375                         cur->fr_end = fr_max;
1376                         LIST_INSERT_AFTER(frp, cur, fr_next);
1377                 }
1378         }
1379
1380         if (fra != NULL) {
1381                 int     aftercut;
1382                 int     merge = 0;
1383
1384                 aftercut = fr_max - fra->fr_off;
1385                 if (aftercut == 0) {
1386                         /* Adjacent fragments */
1387                         DPFPRINTF(("frag6cache[%u]: adjacent %d-%d (%d-%d)\n",
1388                             ntohl(fh->ip6f_ident), off, fr_max, fra->fr_off,
1389                             fra->fr_end));
1390                         fra->fr_off = off;
1391                         merge = 1;
1392                 } else if (aftercut > 0) {
1393                         /* Need to chop off the tail of this fragment */
1394                         DPFPRINTF(("frag6cache[%u]: chop %d %d-%d (%d-%d)\n",
1395                             ntohl(fh->ip6f_ident), aftercut, off, fr_max,
1396                             fra->fr_off, fra->fr_end));
1397                         fra->fr_off = off;
1398                         fr_max -= aftercut;
1399
1400                         merge = 1;
1401
1402                         if (!drop) {
1403                                 m_adj(m, -aftercut);
1404                                 if (m->m_flags & M_PKTHDR) {
1405                                         int pktlen = 0;
1406                                         struct mbuf *t;
1407                                         for (t = m; t; t = t->m_next)
1408                                                 pktlen += t->m_len;
1409                                         m->m_pkthdr.len = pktlen;
1410                                 }
1411                                 h = mtod(m, struct ip6_hdr *);
1412                                 VERIFY((int)m->m_len ==
1413                                     ntohs(h->ip6_plen) - aftercut);
1414                                 h->ip6_plen =
1415                                     htons(ntohs(h->ip6_plen) - aftercut);
1416                         } else {
1417                                 hosed++;
1418                         }
1419                 } else if (frp == NULL) {
1420                         /* There is a gap between fragments */
1421                         DPFPRINTF(("frag6cache[%u]: gap %d %d-%d (%d-%d)\n",
1422                             ntohl(fh->ip6f_ident), -aftercut, off, fr_max,
1423                             fra->fr_off, fra->fr_end));
1424
1425                         cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1426                         if (cur == NULL)
1427                                 goto no_mem;
1428                         pf_ncache++;
1429
1430                         cur->fr_off = off;
1431                         cur->fr_end = fr_max;
1432                         LIST_INSERT_BEFORE(fra, cur, fr_next);
1433                 }
1434
1435                 /* Need to glue together two separate fragment descriptors */
1436                 if (merge) {
1437                         if (cur && fra->fr_off <= cur->fr_end) {
1438                                 /* Need to merge in a previous 'cur' */
1439                                 DPFPRINTF(("frag6cache[%u]: adjacent(merge "
1440                                     "%d-%d) %d-%d (%d-%d)\n",
1441                                     ntohl(fh->ip6f_ident), cur->fr_off,
1442                                     cur->fr_end, off, fr_max, fra->fr_off,
1443                                     fra->fr_end));
1444                                 fra->fr_off = cur->fr_off;
1445                                 LIST_REMOVE(cur, fr_next);
1446                                 pool_put(&pf_cent_pl, cur);
1447                                 pf_ncache--;
1448                                 cur = NULL;
1449                         } else if (frp && fra->fr_off <= frp->fr_end) {
1450                                 /* Need to merge in a modified 'frp' */
1451                                 VERIFY(cur == NULL);
1452                                 DPFPRINTF(("frag6cache[%u]: adjacent(merge "
1453                                     "%d-%d) %d-%d (%d-%d)\n",
1454                                     ntohl(fh->ip6f_ident), frp->fr_off,
1455                                     frp->fr_end, off, fr_max, fra->fr_off,
1456                                     fra->fr_end));
1457                                 fra->fr_off = frp->fr_off;
1458                                 LIST_REMOVE(frp, fr_next);
1459                                 pool_put(&pf_cent_pl, frp);
1460                                 pf_ncache--;
1461                                 frp = NULL;
1462                         }
1463                 }
1464         }
1465
1466         if (hosed) {
1467                 /*
1468                  * We must keep tracking the overall fragment even when
1469                  * we're going to drop it anyway so that we know when to
1470                  * free the overall descriptor.  Thus we drop the frag late.
1471                  */
1472                 goto drop_fragment;
1473         }
1474
1475  pass:
1476         /* Update maximum data size */
1477         if ((*frag)->fr_max < fr_max)
1478                 (*frag)->fr_max = fr_max;
1479
1480         /* This is the last segment */
1481         if (!mff)
1482                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1483
1484         /* Check if we are completely reassembled */
1485         if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
1486             LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
1487             LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) {
1488                 /* Remove from fragment queue */
1489                 DPFPRINTF(("frag6cache[%u]: done 0-%d\n",
1490                     ntohl(fh->ip6f_ident), (*frag)->fr_max));
1491                 pf_free_fragment(*frag);
1492                 *frag = NULL;
1493         }
1494
1495         return (m);
1496
1497  no_mem:
1498         *nomem = 1;
1499
1500         /* Still need to pay attention to !IP_MF */
1501         if (!mff && *frag != NULL)
1502                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1503
1504         m_freem(m);
1505         return (NULL);
1506
1507  drop_fragment:
1508
1509         /* Still need to pay attention to !IP_MF */
1510         if (!mff && *frag != NULL)
1511                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1512
1513         if (drop) {
1514                 /* This fragment has been deemed bad.  Don't reass */
1515                 if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
1516                         DPFPRINTF(("frag6cache[%u]: dropping overall fragment\n",
1517                             ntohl(fh->ip6f_ident)));
1518                 (*frag)->fr_flags |= PFFRAG_DROP;
1519         }
1520
1521         m_freem(m);
1522         return (NULL);
1523 }
1524
1525 int
1526 pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
1527     struct pf_pdesc *pd)
1528 {
1529         struct mbuf             *m = *m0;
1530         struct pf_rule          *r;
1531         struct pf_frent         *frent;
1532         struct pf_fragment      *frag = NULL;
1533         struct ip               *h = mtod(m, struct ip *);
1534         int                      mff = (ntohs(h->ip_off) & IP_MF);
1535         int                      hlen = h->ip_hl << 2;
1536         u_int16_t                fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
1537         u_int16_t                fr_max;
1538         int                      ip_len;
1539         int                      ip_off;
1540         int                      asd = 0;
1541         struct pf_ruleset       *ruleset = NULL;
1542
1543         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1544         while (r != NULL) {
1545                 r->evaluations++;
1546                 if (pfi_kif_match(r->kif, kif) == r->ifnot)
1547                         r = r->skip[PF_SKIP_IFP].ptr;
1548                 else if (r->direction && r->direction != dir)
1549                         r = r->skip[PF_SKIP_DIR].ptr;
1550                 else if (r->af && r->af != AF_INET)
1551                         r = r->skip[PF_SKIP_AF].ptr;
1552                 else if (r->proto && r->proto != h->ip_p)
1553                         r = r->skip[PF_SKIP_PROTO].ptr;
1554                 else if (PF_MISMATCHAW(&r->src.addr,
1555                     (struct pf_addr *)&h->ip_src.s_addr, AF_INET,
1556                     r->src.neg, kif))
1557                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1558                 else if (PF_MISMATCHAW(&r->dst.addr,
1559                     (struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
1560                     r->dst.neg, NULL))
1561                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
1562                 else {
1563                         if (r->anchor == NULL)
1564                                 break;
1565                         else
1566                                 pf_step_into_anchor(&asd, &ruleset,
1567                                     PF_RULESET_SCRUB, &r, NULL, NULL);
1568                 }
1569                 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
1570                     PF_RULESET_SCRUB, &r, NULL, NULL))
1571                         break;
1572         }
1573
1574         if (r == NULL || r->action == PF_NOSCRUB)
1575                 return (PF_PASS);
1576         else {
1577                 r->packets[dir == PF_OUT]++;
1578                 r->bytes[dir == PF_OUT] += pd->tot_len;
1579         }
1580
1581         /* Check for illegal packets */
1582         if (hlen < (int)sizeof (struct ip))
1583                 goto drop;
1584
1585         if (hlen > ntohs(h->ip_len))
1586                 goto drop;
1587
1588         /* Clear IP_DF if the rule uses the no-df option */
1589         if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
1590                 u_int16_t ipoff = h->ip_off;
1591
1592                 h->ip_off &= htons(~IP_DF);
1593                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ipoff, h->ip_off, 0);
1594         }
1595
1596         /* We will need other tests here */
1597         if (!fragoff && !mff)
1598                 goto no_fragment;
1599
1600         /*
1601          * We're dealing with a fragment now. Don't allow fragments
1602          * with IP_DF to enter the cache. If the flag was cleared by
1603          * no-df above, fine. Otherwise drop it.
1604          */
1605         if (h->ip_off & htons(IP_DF)) {
1606                 DPFPRINTF(("IP_DF\n"));
1607                 goto bad;
1608         }
1609
1610         ip_len = ntohs(h->ip_len) - hlen;
1611         ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
1612
1613         /* All fragments are 8 byte aligned */
1614         if (mff && (ip_len & 0x7)) {
1615                 DPFPRINTF(("mff and %d\n", ip_len));
1616                 goto bad;
1617         }
1618
1619         /* Respect maximum length */
1620         if (fragoff + ip_len > IP_MAXPACKET) {
1621                 DPFPRINTF(("max packet %d\n", fragoff + ip_len));
1622                 goto bad;
1623         }
1624         fr_max = fragoff + ip_len;
1625
1626         if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
1627                 /* Fully buffer all of the fragments */
1628
1629                 frag = pf_find_fragment_by_ipv4_header(h, &pf_frag_tree);
1630                 /* Check if we saw the last fragment already */
1631                 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
1632                     fr_max > frag->fr_max)
1633                         goto bad;
1634
1635                 /* Get an entry for the fragment queue */
1636                 frent = pool_get(&pf_frent_pl, PR_NOWAIT);
1637                 if (frent == NULL) {
1638                         REASON_SET(reason, PFRES_MEMORY);
1639                         return (PF_DROP);
1640                 }
1641                 pf_nfrents++;
1642                 frent->fr_ip = h;
1643                 frent->fr_m = m;
1644
1645                 /* Might return a completely reassembled mbuf, or NULL */
1646                 DPFPRINTF(("reass IPv4 frag %d @ %d-%d\n", ntohs(h->ip_id),
1647                     fragoff, fr_max));
1648                 *m0 = m = pf_reassemble(m0, &frag, frent, mff);
1649
1650                 if (m == NULL)
1651                         return (PF_DROP);
1652
1653                 /* use mtag from concatenated mbuf chain */
1654                 pd->pf_mtag = pf_find_mtag(m);
1655 #ifdef DIAGNOSTIC
1656                 if (pd->pf_mtag == NULL) {
1657                         printf("%s: pf_find_mtag returned NULL(1)\n", __func__);
1658                         if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
1659                                 m_freem(m);
1660                                 *m0 = NULL;
1661                                 goto no_mem;
1662                         }
1663                 }
1664 #endif
1665                 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
1666                         goto drop;
1667
1668                 h = mtod(m, struct ip *);
1669         } else {
1670                 /* non-buffering fragment cache (drops or masks overlaps) */
1671                 int     nomem = 0;
1672
1673                 if (dir == PF_OUT && (pd->pf_mtag->pftag_flags & PF_TAG_FRAGCACHE)) {
1674                         /*
1675                          * Already passed the fragment cache in the
1676                          * input direction.  If we continued, it would
1677                          * appear to be a dup and would be dropped.
1678                          */
1679                         goto fragment_pass;
1680                 }
1681
1682                 frag = pf_find_fragment_by_ipv4_header(h, &pf_cache_tree);
1683
1684                 /* Check if we saw the last fragment already */
1685                 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
1686                     fr_max > frag->fr_max) {
1687                         if (r->rule_flag & PFRULE_FRAGDROP)
1688                                 frag->fr_flags |= PFFRAG_DROP;
1689                         goto bad;
1690                 }
1691
1692                 *m0 = m = pf_fragcache(m0, h, &frag, mff,
1693                     (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
1694                 if (m == NULL) {
1695                         if (nomem)
1696                                 goto no_mem;
1697                         goto drop;
1698                 }
1699
1700                 /* use mtag from copied and trimmed mbuf chain */
1701                 pd->pf_mtag = pf_find_mtag(m);
1702 #ifdef DIAGNOSTIC
1703                 if (pd->pf_mtag == NULL) {
1704                         printf("%s: pf_find_mtag returned NULL(2)\n", __func__);
1705                         if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
1706                                 m_freem(m);
1707                                 *m0 = NULL;
1708                                 goto no_mem;
1709                         }
1710                 }
1711 #endif
1712                 if (dir == PF_IN)
1713                         pd->pf_mtag->pftag_flags |= PF_TAG_FRAGCACHE;
1714
1715                 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
1716                         goto drop;
1717                 goto fragment_pass;
1718         }
1719
1720 no_fragment:
1721         /* At this point, only IP_DF is allowed in ip_off */
1722         if (h->ip_off & ~htons(IP_DF)) {
1723                 u_int16_t ipoff = h->ip_off;
1724
1725                 h->ip_off &= htons(IP_DF);
1726                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ipoff, h->ip_off, 0);
1727         }
1728
1729         /* Enforce a minimum ttl, may cause endless packet loops */
1730         if (r->min_ttl && h->ip_ttl < r->min_ttl) {
1731                 u_int16_t ip_ttl = h->ip_ttl;
1732
1733                 h->ip_ttl = r->min_ttl;
1734                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
1735         }
1736 #if RANDOM_IP_ID
1737         if (r->rule_flag & PFRULE_RANDOMID) {
1738                 u_int16_t ip_id = h->ip_id;
1739
1740                 h->ip_id = ip_randomid();
1741                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0);
1742         }
1743 #endif /* RANDOM_IP_ID */
1744         if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
1745                 pd->flags |= PFDESC_IP_REAS;
1746
1747         return (PF_PASS);
1748
1749 fragment_pass:
1750         /* Enforce a minimum ttl, may cause endless packet loops */
1751         if (r->min_ttl && h->ip_ttl < r->min_ttl) {
1752                 u_int16_t ip_ttl = h->ip_ttl;
1753
1754                 h->ip_ttl = r->min_ttl;
1755                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
1756         }
1757         if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
1758                 pd->flags |= PFDESC_IP_REAS;
1759         return (PF_PASS);
1760
1761 no_mem:
1762         REASON_SET(reason, PFRES_MEMORY);
1763         if (r != NULL && r->log)
1764                 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r,
1765                     NULL, NULL, pd);
1766         return (PF_DROP);
1767
1768 drop:
1769         REASON_SET(reason, PFRES_NORM);
1770         if (r != NULL && r->log)
1771                 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r,
1772                     NULL, NULL, pd);
1773         return (PF_DROP);
1774
1775 bad:
1776         DPFPRINTF(("dropping bad IPv4 fragment\n"));
1777
1778         /* Free associated fragments */
1779         if (frag != NULL)
1780                 pf_free_fragment(frag);
1781
1782         REASON_SET(reason, PFRES_FRAG);
1783         if (r != NULL && r->log)
1784                 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1785
1786         return (PF_DROP);
1787 }
1788
1789 #if INET6
1790 int
1791 pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
1792     u_short *reason, struct pf_pdesc *pd)
1793 {
1794         struct mbuf             *m = *m0;
1795         struct pf_rule          *r;
1796         struct ip6_hdr          *h = mtod(m, struct ip6_hdr *);
1797         int                      off;
1798         struct ip6_ext           ext;
1799 /* adi XXX */
1800 #if 0
1801         struct ip6_opt           opt;
1802         struct ip6_opt_jumbo     jumbo;
1803         int                      optend;
1804         int                      ooff;
1805 #endif
1806         struct ip6_frag          frag;
1807         u_int32_t                jumbolen = 0, plen;
1808         u_int16_t                fragoff = 0;
1809         u_int8_t                 proto;
1810         int                      terminal;
1811         struct pf_frent         *frent;
1812         struct pf_fragment      *pff = NULL;
1813         int                      mff = 0, rh_cnt = 0;
1814         u_int16_t                fr_max;
1815         int                      asd = 0;
1816         struct pf_ruleset       *ruleset = NULL;
1817
1818         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1819         while (r != NULL) {
1820                 r->evaluations++;
1821                 if (pfi_kif_match(r->kif, kif) == r->ifnot)
1822                         r = r->skip[PF_SKIP_IFP].ptr;
1823                 else if (r->direction && r->direction != dir)
1824                         r = r->skip[PF_SKIP_DIR].ptr;
1825                 else if (r->af && r->af != AF_INET6)
1826                         r = r->skip[PF_SKIP_AF].ptr;
1827 #if 0 /* header chain! */
1828                 else if (r->proto && r->proto != h->ip6_nxt)
1829                         r = r->skip[PF_SKIP_PROTO].ptr;
1830 #endif
1831                 else if (PF_MISMATCHAW(&r->src.addr,
1832                     (struct pf_addr *)&h->ip6_src, AF_INET6,
1833                     r->src.neg, kif))
1834                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1835                 else if (PF_MISMATCHAW(&r->dst.addr,
1836                     (struct pf_addr *)&h->ip6_dst, AF_INET6,
1837                     r->dst.neg, NULL))
1838                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
1839                 else {
1840                         if (r->anchor == NULL)
1841                                 break;
1842                         else
1843                                 pf_step_into_anchor(&asd, &ruleset,
1844                                     PF_RULESET_SCRUB, &r, NULL, NULL);
1845                 }
1846                 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
1847                     PF_RULESET_SCRUB, &r, NULL, NULL))
1848                         break;
1849         }
1850
1851         if (r == NULL || r->action == PF_NOSCRUB)
1852                 return (PF_PASS);
1853         else {
1854                 r->packets[dir == PF_OUT]++;
1855                 r->bytes[dir == PF_OUT] += pd->tot_len;
1856         }
1857
1858         /* Check for illegal packets */
1859         if ((int)(sizeof (struct ip6_hdr) + IPV6_MAXPACKET) < m->m_pkthdr.len)
1860                 goto drop;
1861
1862         off = sizeof (struct ip6_hdr);
1863         proto = h->ip6_nxt;
1864         terminal = 0;
1865         do {
1866                 pd->proto = proto;
1867                 switch (proto) {
1868                 case IPPROTO_FRAGMENT:
1869                         goto fragment;
1870                         break;
1871                 case IPPROTO_AH:
1872                 case IPPROTO_ROUTING:
1873                 case IPPROTO_DSTOPTS:
1874                         if (!pf_pull_hdr(m, off, &ext, sizeof (ext), NULL,
1875                             NULL, AF_INET6))
1876                                 goto shortpkt;
1877                         /*
1878                          * <jhw@apple.com>
1879                          * Multiple routing headers not allowed.
1880                          * Routing header type zero considered harmful.
1881                          */
1882                         if (proto == IPPROTO_ROUTING) {
1883                                 const struct ip6_rthdr *rh =
1884                                     (const struct ip6_rthdr *)&ext;
1885                                 if (rh_cnt++)
1886                                         goto drop;
1887                                 if (rh->ip6r_type == IPV6_RTHDR_TYPE_0)
1888                                         goto drop;
1889                         }
1890                         else
1891                         if (proto == IPPROTO_AH)
1892                                 off += (ext.ip6e_len + 2) * 4;
1893                         else
1894                                 off += (ext.ip6e_len + 1) * 8;
1895                         proto = ext.ip6e_nxt;
1896                         break;
1897                 case IPPROTO_HOPOPTS:
1898 /* adi XXX */
1899 #if 0
1900                         if (!pf_pull_hdr(m, off, &ext, sizeof (ext), NULL,
1901                             NULL, AF_INET6))
1902                                 goto shortpkt;
1903                         optend = off + (ext.ip6e_len + 1) * 8;
1904                         ooff = off + sizeof (ext);
1905                         do {
1906                                 if (!pf_pull_hdr(m, ooff, &opt.ip6o_type,
1907                                     sizeof (opt.ip6o_type), NULL, NULL,
1908                                     AF_INET6))
1909                                         goto shortpkt;
1910                                 if (opt.ip6o_type == IP6OPT_PAD1) {
1911                                         ooff++;
1912                                         continue;
1913                                 }
1914                                 if (!pf_pull_hdr(m, ooff, &opt, sizeof (opt),
1915                                     NULL, NULL, AF_INET6))
1916                                         goto shortpkt;
1917                                 if (ooff + sizeof (opt) + opt.ip6o_len > optend)
1918                                         goto drop;
1919                                 switch (opt.ip6o_type) {
1920                                 case IP6OPT_JUMBO:
1921                                         if (h->ip6_plen != 0)
1922                                                 goto drop;
1923                                         if (!pf_pull_hdr(m, ooff, &jumbo,
1924                                             sizeof (jumbo), NULL, NULL,
1925                                             AF_INET6))
1926                                                 goto shortpkt;
1927                                         memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
1928                                             sizeof (jumbolen));
1929                                         jumbolen = ntohl(jumbolen);
1930                                         if (jumbolen <= IPV6_MAXPACKET)
1931                                                 goto drop;
1932                                         if (sizeof (struct ip6_hdr) +
1933                                             jumbolen != m->m_pkthdr.len)
1934                                                 goto drop;
1935                                         break;
1936                                 default:
1937                                         break;
1938                                 }
1939                                 ooff += sizeof (opt) + opt.ip6o_len;
1940                         } while (ooff < optend);
1941
1942                         off = optend;
1943                         proto = ext.ip6e_nxt;
1944                         break;
1945 #endif
1946                 default:
1947                         terminal = 1;
1948                         break;
1949                 }
1950         } while (!terminal);
1951
1952         /* jumbo payload option must be present, or plen > 0 */
1953         if (ntohs(h->ip6_plen) == 0)
1954                 plen = jumbolen;
1955         else
1956                 plen = ntohs(h->ip6_plen);
1957         if (plen == 0)
1958                 goto drop;
1959         if ((int)(sizeof (struct ip6_hdr) + plen) > m->m_pkthdr.len)
1960                 goto shortpkt;
1961
1962         /* Enforce a minimum ttl, may cause endless packet loops */
1963         if (r->min_ttl && h->ip6_hlim < r->min_ttl)
1964                 h->ip6_hlim = r->min_ttl;
1965
1966         return (PF_PASS);
1967
1968 fragment:
1969         if (ntohs(h->ip6_plen) == 0 || jumbolen)
1970                 goto drop;
1971         plen = ntohs(h->ip6_plen);
1972
1973         if (!pf_pull_hdr(m, off, &frag, sizeof (frag), NULL, NULL, AF_INET6))
1974                 goto shortpkt;
1975         fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
1976         pd->proto = frag.ip6f_nxt;
1977         mff = ntohs(frag.ip6f_offlg & IP6F_MORE_FRAG);
1978         off += sizeof frag;
1979         if (fragoff + (plen - off) > IPV6_MAXPACKET)
1980                goto badfrag;
1981
1982         fr_max = fragoff + plen - (off - sizeof(struct ip6_hdr));
1983         DPFPRINTF(("%p IPv6 frag plen %u mff %d off %u fragoff %u fr_max %u\n", m,
1984                 plen, mff, off, fragoff, fr_max));
1985
1986         if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
1987                 /* Fully buffer all of the fragments */
1988                 pd->flags |= PFDESC_IP_REAS;
1989
1990                 pff = pf_find_fragment_by_ipv6_header(h, &frag,
1991                    &pf_frag_tree);
1992
1993                 /* Check if we saw the last fragment already */
1994                 if (pff != NULL && (pff->fr_flags & PFFRAG_SEENLAST) &&
1995                     fr_max > pff->fr_max)
1996                         goto badfrag;
1997
1998                 /* Get an entry for the fragment queue */
1999                 frent = pool_get(&pf_frent_pl, PR_NOWAIT);
2000                 if (frent == NULL) {
2001                         REASON_SET(reason, PFRES_MEMORY);
2002                         return (PF_DROP);
2003                 }
2004                 pf_nfrents++;
2005                 frent->fr_ip6 = h;
2006                 frent->fr_m = m;
2007                 frent->fr_ip6f_opt = frag;
2008                 frent->fr_ip6f_hlen = off;
2009
2010                 /* Might return a completely reassembled mbuf, or NULL */
2011                 DPFPRINTF(("reass IPv6 frag %d @ %d-%d\n",
2012                      ntohl(frag.ip6f_ident), fragoff, fr_max));
2013                 *m0 = m = pf_reassemble6(m0, &pff, frent, mff);
2014
2015                 if (m == NULL)
2016                         return (PF_DROP);
2017
2018                 if (pff != NULL && (pff->fr_flags & PFFRAG_DROP))
2019                         goto drop;
2020
2021                 h = mtod(m, struct ip6_hdr *);
2022         }
2023         else if (dir == PF_IN || !(pd->pf_mtag->pftag_flags & PF_TAG_FRAGCACHE)) {
2024                 /* non-buffering fragment cache (overlaps: see RFC 5722) */
2025                 int nomem = 0;
2026
2027                 pff = pf_find_fragment_by_ipv6_header(h, &frag,
2028                     &pf_cache_tree);
2029
2030                 /* Check if we saw the last fragment already */
2031                 if (pff != NULL && (pff->fr_flags & PFFRAG_SEENLAST) &&
2032                     fr_max > pff->fr_max) {
2033                        if (r->rule_flag & PFRULE_FRAGDROP)
2034                                 pff->fr_flags |= PFFRAG_DROP;
2035                        goto badfrag;
2036                 }
2037
2038                 *m0 = m = pf_frag6cache(m0, h, &frag, &pff, off, mff,
2039                      (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
2040                 if (m == NULL) {
2041                         if (nomem)
2042                                 goto no_mem;
2043                         goto drop;
2044                 }
2045
2046                 if (dir == PF_IN)
2047                         pd->pf_mtag->pftag_flags |= PF_TAG_FRAGCACHE;
2048
2049                 if (pff != NULL && (pff->fr_flags & PFFRAG_DROP))
2050                         goto drop;
2051         }
2052
2053         /* Enforce a minimum ttl, may cause endless packet loops */
2054         if (r->min_ttl && h->ip6_hlim < r->min_ttl)
2055                 h->ip6_hlim = r->min_ttl;
2056         return (PF_PASS);
2057
2058   no_mem:
2059         REASON_SET(reason, PFRES_MEMORY);
2060         goto dropout;
2061
2062   shortpkt:
2063         REASON_SET(reason, PFRES_SHORT);
2064         goto dropout;
2065
2066   drop:
2067         REASON_SET(reason, PFRES_NORM);
2068         goto dropout;
2069
2070   badfrag:
2071         DPFPRINTF(("dropping bad IPv6 fragment\n"));
2072         REASON_SET(reason, PFRES_FRAG);
2073         goto dropout;
2074
2075   dropout:
2076         if (pff != NULL)
2077                 pf_free_fragment(pff);
2078         if (r != NULL && r->log)
2079                 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
2080         return (PF_DROP);
2081 }
2082 #endif /* INET6 */
2083
2084 int
2085 pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
2086     int off, void *h, struct pf_pdesc *pd)
2087 {
2088 #pragma unused(ipoff, h)
2089         struct pf_rule  *r, *rm = NULL;
2090         struct tcphdr   *th = pd->hdr.tcp;
2091         int              rewrite = 0;
2092         int              asd = 0;
2093         u_short          reason;
2094         u_int8_t         flags;
2095         sa_family_t      af = pd->af;
2096         struct pf_ruleset *ruleset = NULL;
2097         union pf_state_xport sxport, dxport;
2098
2099         sxport.port = th->th_sport;
2100         dxport.port = th->th_dport;
2101
2102         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
2103         while (r != NULL) {
2104                 r->evaluations++;
2105                 if (pfi_kif_match(r->kif, kif) == r->ifnot)
2106                         r = r->skip[PF_SKIP_IFP].ptr;
2107                 else if (r->direction && r->direction != dir)
2108                         r = r->skip[PF_SKIP_DIR].ptr;
2109                 else if (r->af && r->af != af)
2110                         r = r->skip[PF_SKIP_AF].ptr;
2111                 else if (r->proto && r->proto != pd->proto)
2112                         r = r->skip[PF_SKIP_PROTO].ptr;
2113                 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
2114                     r->src.neg, kif))
2115                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
2116                 else if (r->src.xport.range.op &&
2117                     !pf_match_xport(r->src.xport.range.op, r->proto_variant,
2118                     &r->src.xport, &sxport))
2119                         r = r->skip[PF_SKIP_SRC_PORT].ptr;
2120                 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
2121                     r->dst.neg, NULL))
2122                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
2123                 else if (r->dst.xport.range.op &&
2124                     !pf_match_xport(r->dst.xport.range.op, r->proto_variant,
2125                     &r->dst.xport, &dxport))
2126                         r = r->skip[PF_SKIP_DST_PORT].ptr;
2127                 else if (r->os_fingerprint != PF_OSFP_ANY &&
2128                     !pf_osfp_match(pf_osfp_fingerprint(pd, m, off, th),
2129                     r->os_fingerprint))
2130                         r = TAILQ_NEXT(r, entries);
2131                 else {
2132                         if (r->anchor == NULL) {
2133                                 rm = r;
2134                                 break;
2135                         } else {
2136                                 pf_step_into_anchor(&asd, &ruleset,
2137                                     PF_RULESET_SCRUB, &r, NULL, NULL);
2138                         }
2139                 }
2140                 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
2141                     PF_RULESET_SCRUB, &r, NULL, NULL))
2142                         break;
2143         }
2144
2145         if (rm == NULL || rm->action == PF_NOSCRUB)
2146                 return (PF_PASS);
2147         else {
2148                 r->packets[dir == PF_OUT]++;
2149                 r->bytes[dir == PF_OUT] += pd->tot_len;
2150         }
2151
2152         if (rm->rule_flag & PFRULE_REASSEMBLE_TCP)
2153                 pd->flags |= PFDESC_TCP_NORM;
2154
2155         flags = th->th_flags;
2156         if (flags & TH_SYN) {
2157                 /* Illegal packet */
2158                 if (flags & TH_RST)
2159                         goto tcp_drop;
2160
2161                 if (flags & TH_FIN)
2162                         flags &= ~TH_FIN;
2163         } else {
2164                 /* Illegal packet */
2165                 if (!(flags & (TH_ACK|TH_RST)))
2166                         goto tcp_drop;
2167         }
2168
2169         if (!(flags & TH_ACK)) {
2170                 /* These flags are only valid if ACK is set */
2171                 if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG))
2172                         goto tcp_drop;
2173         }
2174
2175         /* Check for illegal header length */
2176         if (th->th_off < (sizeof (struct tcphdr) >> 2))
2177                 goto tcp_drop;
2178
2179         /* If flags changed, or reserved data set, then adjust */
2180         if (flags != th->th_flags || th->th_x2 != 0) {
2181                 u_int16_t       ov, nv;
2182
2183                 ov = *(u_int16_t *)(&th->th_ack + 1);
2184                 th->th_flags = flags;
2185                 th->th_x2 = 0;
2186                 nv = *(u_int16_t *)(&th->th_ack + 1);
2187
2188                 th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0);
2189                 rewrite = 1;
2190         }
2191
2192         /* Remove urgent pointer, if TH_URG is not set */
2193         if (!(flags & TH_URG) && th->th_urp) {
2194                 th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0);
2195                 th->th_urp = 0;
2196                 rewrite = 1;
2197         }
2198
2199         /* copy back packet headers if we sanitized */
2200         /* Process options */
2201         if (r->max_mss) {
2202                 int rv = pf_normalize_tcpopt(r, dir, kif, pd, m, th, off,
2203                     &rewrite);
2204                 if (rv == PF_DROP)
2205                         return rv;
2206                 m = pd->mp;
2207         }
2208
2209         if (rewrite) {
2210                 struct mbuf *mw = pf_lazy_makewritable(pd, m,
2211                     off + sizeof (*th));
2212                 if (!mw) {
2213                         REASON_SET(&reason, PFRES_MEMORY);
2214                         if (r->log)
2215                                 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason,
2216                                     r, 0, 0, pd);
2217                         return PF_DROP;
2218                 }
2219
2220                 m_copyback(mw, off, sizeof (*th), th);
2221         }
2222
2223         return (PF_PASS);
2224
2225 tcp_drop:
2226         REASON_SET(&reason, PFRES_NORM);
2227         if (rm != NULL && r->log)
2228                 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL, pd);
2229         return (PF_DROP);
2230 }
2231
2232 int
2233 pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
2234     struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst)
2235 {
2236 #pragma unused(dst)
2237         u_int32_t tsval, tsecr;
2238         u_int8_t hdr[60];
2239         u_int8_t *opt;
2240
2241         VERIFY(src->scrub == NULL);
2242
2243         src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
2244         if (src->scrub == NULL)
2245                 return (1);
2246         bzero(src->scrub, sizeof (*src->scrub));
2247
2248         switch (pd->af) {
2249 #if INET
2250         case AF_INET: {
2251                 struct ip *h = mtod(m, struct ip *);
2252                 src->scrub->pfss_ttl = h->ip_ttl;
2253                 break;
2254         }
2255 #endif /* INET */
2256 #if INET6
2257         case AF_INET6: {
2258                 struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
2259                 src->scrub->pfss_ttl = h->ip6_hlim;
2260                 break;
2261         }
2262 #endif /* INET6 */
2263         }
2264
2265
2266         /*
2267          * All normalizations below are only begun if we see the start of
2268          * the connections.  They must all set an enabled bit in pfss_flags
2269          */
2270         if ((th->th_flags & TH_SYN) == 0)
2271                 return (0);
2272
2273
2274         if (th->th_off > (sizeof (struct tcphdr) >> 2) && src->scrub &&
2275             pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
2276                 /* Diddle with TCP options */
2277                 int hlen;
2278                 opt = hdr + sizeof (struct tcphdr);
2279                 hlen = (th->th_off << 2) - sizeof (struct tcphdr);
2280                 while (hlen >= TCPOLEN_TIMESTAMP) {
2281                         switch (*opt) {
2282                         case TCPOPT_EOL:        /* FALLTHROUGH */
2283                         case TCPOPT_NOP:
2284                                 opt++;
2285                                 hlen--;
2286                                 break;
2287                         case TCPOPT_TIMESTAMP:
2288                                 if (opt[1] >= TCPOLEN_TIMESTAMP) {
2289                                         src->scrub->pfss_flags |=
2290                                             PFSS_TIMESTAMP;
2291                                         src->scrub->pfss_ts_mod =
2292                                             htonl(random());
2293
2294                                         /* note PFSS_PAWS not set yet */
2295                                         memcpy(&tsval, &opt[2],
2296                                             sizeof (u_int32_t));
2297                                         memcpy(&tsecr, &opt[6],
2298                                             sizeof (u_int32_t));
2299                                         src->scrub->pfss_tsval0 = ntohl(tsval);
2300                                         src->scrub->pfss_tsval = ntohl(tsval);
2301                                         src->scrub->pfss_tsecr = ntohl(tsecr);
2302                                         getmicrouptime(&src->scrub->pfss_last);
2303                                 }
2304                                 /* FALLTHROUGH */
2305                         default:
2306                                 hlen -= MAX(opt[1], 2);
2307                                 opt += MAX(opt[1], 2);
2308                                 break;
2309                         }
2310                 }
2311         }
2312
2313         return (0);
2314 }
2315
2316 void
2317 pf_normalize_tcp_cleanup(struct pf_state *state)
2318 {
2319         if (state->src.scrub)
2320                 pool_put(&pf_state_scrub_pl, state->src.scrub);
2321         if (state->dst.scrub)
2322                 pool_put(&pf_state_scrub_pl, state->dst.scrub);
2323
2324         /* Someday... flush the TCP segment reassembly descriptors. */
2325 }
2326
2327 int
2328 pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
2329     u_short *reason, struct tcphdr *th, struct pf_state *state,
2330     struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback)
2331 {
2332         struct timeval uptime;
2333         u_int32_t tsval, tsecr;
2334         u_int tsval_from_last;
2335         u_int8_t hdr[60];
2336         u_int8_t *opt;
2337         int copyback = 0;
2338         int got_ts = 0;
2339
2340         VERIFY(src->scrub || dst->scrub);
2341
2342         /*
2343          * Enforce the minimum TTL seen for this connection.  Negate a common
2344          * technique to evade an intrusion detection system and confuse
2345          * firewall state code.
2346          */
2347         switch (pd->af) {
2348 #if INET
2349         case AF_INET: {
2350                 if (src->scrub) {
2351                         struct ip *h = mtod(m, struct ip *);
2352                         if (h->ip_ttl > src->scrub->pfss_ttl)
2353                                 src->scrub->pfss_ttl = h->ip_ttl;
2354                         h->ip_ttl = src->scrub->pfss_ttl;
2355                 }
2356                 break;
2357         }
2358 #endif /* INET */
2359 #if INET6
2360         case AF_INET6: {
2361                 if (src->scrub) {
2362                         struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
2363                         if (h->ip6_hlim > src->scrub->pfss_ttl)
2364                                 src->scrub->pfss_ttl = h->ip6_hlim;
2365                         h->ip6_hlim = src->scrub->pfss_ttl;
2366                 }
2367                 break;
2368         }
2369 #endif /* INET6 */
2370         }
2371
2372         if (th->th_off > (sizeof (struct tcphdr) >> 2) &&
2373             ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
2374             (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
2375             pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
2376                 /* Diddle with TCP options */
2377                 int hlen;
2378                 opt = hdr + sizeof (struct tcphdr);
2379                 hlen = (th->th_off << 2) - sizeof (struct tcphdr);
2380                 while (hlen >= TCPOLEN_TIMESTAMP) {
2381                         switch (*opt) {
2382                         case TCPOPT_EOL:        /* FALLTHROUGH */
2383                         case TCPOPT_NOP:
2384                                 opt++;
2385                                 hlen--;
2386                                 break;
2387                         case TCPOPT_TIMESTAMP:
2388                                 /*
2389                                  * Modulate the timestamps.  Can be used for
2390                                  * NAT detection, OS uptime determination or
2391                                  * reboot detection.
2392                                  */
2393
2394                                 if (got_ts) {
2395                                         /* Huh?  Multiple timestamps!? */
2396                                         if (pf_status.debug >= PF_DEBUG_MISC) {
2397                                                 DPFPRINTF(("multiple TS??"));
2398                                                 pf_print_state(state);
2399                                                 printf("\n");
2400                                         }
2401                                         REASON_SET(reason, PFRES_TS);
2402                                         return (PF_DROP);
2403                                 }
2404                                 if (opt[1] >= TCPOLEN_TIMESTAMP) {
2405                                         memcpy(&tsval, &opt[2],
2406                                             sizeof (u_int32_t));
2407                                         if (tsval && src->scrub &&
2408                                             (src->scrub->pfss_flags &
2409                                             PFSS_TIMESTAMP)) {
2410                                                 tsval = ntohl(tsval);
2411                                                 pf_change_a(&opt[2],
2412                                                     &th->th_sum,
2413                                                     htonl(tsval +
2414                                                     src->scrub->pfss_ts_mod),
2415                                                     0);
2416                                                 copyback = 1;
2417                                         }
2418
2419                                         /* Modulate TS reply iff valid (!0) */
2420                                         memcpy(&tsecr, &opt[6],
2421                                             sizeof (u_int32_t));
2422                                         if (tsecr && dst->scrub &&
2423                                             (dst->scrub->pfss_flags &
2424                                             PFSS_TIMESTAMP)) {
2425                                                 tsecr = ntohl(tsecr)
2426                                                     - dst->scrub->pfss_ts_mod;
2427                                                 pf_change_a(&opt[6],
2428                                                     &th->th_sum, htonl(tsecr),
2429                                                     0);
2430                                                 copyback = 1;
2431                                         }
2432                                         got_ts = 1;
2433                                 }
2434                                 /* FALLTHROUGH */
2435                         default:
2436                                 hlen -= MAX(opt[1], 2);
2437                                 opt += MAX(opt[1], 2);
2438                                 break;
2439                         }
2440                 }
2441                 if (copyback) {
2442                         /* Copyback the options, caller copys back header */
2443                         int optoff = off + sizeof (*th);
2444                         int optlen = (th->th_off << 2) - sizeof (*th);
2445                         m = pf_lazy_makewritable(pd, m, optoff + optlen);
2446                         if (!m) {
2447                                 REASON_SET(reason, PFRES_MEMORY);
2448                                 return PF_DROP;
2449                         }
2450                         *writeback = optoff + optlen;
2451                         m_copyback(m, optoff, optlen, hdr + sizeof (*th));
2452                 }
2453         }
2454
2455
2456         /*
2457          * Must invalidate PAWS checks on connections idle for too long.
2458          * The fastest allowed timestamp clock is 1ms.  That turns out to
2459          * be about 24 days before it wraps.  XXX Right now our lowerbound
2460          * TS echo check only works for the first 12 days of a connection
2461          * when the TS has exhausted half its 32bit space
2462          */
2463 #define TS_MAX_IDLE     (24*24*60*60)
2464 #define TS_MAX_CONN     (12*24*60*60)   /* XXX remove when better tsecr check */
2465
2466         getmicrouptime(&uptime);
2467         if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
2468             (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
2469             pf_time_second() - state->creation > TS_MAX_CONN))  {
2470                 if (pf_status.debug >= PF_DEBUG_MISC) {
2471                         DPFPRINTF(("src idled out of PAWS\n"));
2472                         pf_print_state(state);
2473                         printf("\n");
2474                 }
2475                 src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
2476                     | PFSS_PAWS_IDLED;
2477         }
2478         if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
2479             uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
2480                 if (pf_status.debug >= PF_DEBUG_MISC) {
2481                         DPFPRINTF(("dst idled out of PAWS\n"));
2482                         pf_print_state(state);
2483                         printf("\n");
2484                 }
2485                 dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
2486                     | PFSS_PAWS_IDLED;
2487         }
2488
2489         if (got_ts && src->scrub && dst->scrub &&
2490             (src->scrub->pfss_flags & PFSS_PAWS) &&
2491             (dst->scrub->pfss_flags & PFSS_PAWS)) {
2492                 /*
2493                  * Validate that the timestamps are "in-window".
2494                  * RFC1323 describes TCP Timestamp options that allow
2495                  * measurement of RTT (round trip time) and PAWS
2496                  * (protection against wrapped sequence numbers).  PAWS
2497                  * gives us a set of rules for rejecting packets on
2498                  * long fat pipes (packets that were somehow delayed
2499                  * in transit longer than the time it took to send the
2500                  * full TCP sequence space of 4Gb).  We can use these
2501                  * rules and infer a few others that will let us treat
2502                  * the 32bit timestamp and the 32bit echoed timestamp
2503                  * as sequence numbers to prevent a blind attacker from
2504                  * inserting packets into a connection.
2505                  *
2506                  * RFC1323 tells us:
2507                  *  - The timestamp on this packet must be greater than
2508                  *    or equal to the last value echoed by the other
2509                  *    endpoint.  The RFC says those will be discarded
2510                  *    since it is a dup that has already been acked.
2511                  *    This gives us a lowerbound on the timestamp.
2512                  *        timestamp >= other last echoed timestamp
2513                  *  - The timestamp will be less than or equal to
2514                  *    the last timestamp plus the time between the
2515                  *    last packet and now.  The RFC defines the max
2516                  *    clock rate as 1ms.  We will allow clocks to be
2517                  *    up to 10% fast and will allow a total difference
2518                  *    or 30 seconds due to a route change.  And this
2519                  *    gives us an upperbound on the timestamp.
2520                  *        timestamp <= last timestamp + max ticks
2521                  *    We have to be careful here.  Windows will send an
2522                  *    initial timestamp of zero and then initialize it
2523                  *    to a random value after the 3whs; presumably to
2524                  *    avoid a DoS by having to call an expensive RNG
2525                  *    during a SYN flood.  Proof MS has at least one
2526                  *    good security geek.
2527                  *
2528                  *  - The TCP timestamp option must also echo the other
2529                  *    endpoints timestamp.  The timestamp echoed is the
2530                  *    one carried on the earliest unacknowledged segment
2531                  *    on the left edge of the sequence window.  The RFC
2532                  *    states that the host will reject any echoed
2533                  *    timestamps that were larger than any ever sent.
2534                  *    This gives us an upperbound on the TS echo.
2535                  *        tescr <= largest_tsval
2536                  *  - The lowerbound on the TS echo is a little more
2537                  *    tricky to determine.  The other endpoint's echoed
2538                  *    values will not decrease.  But there may be
2539                  *    network conditions that re-order packets and
2540                  *    cause our view of them to decrease.  For now the
2541                  *    only lowerbound we can safely determine is that
2542                  *    the TS echo will never be less than the original
2543                  *    TS.  XXX There is probably a better lowerbound.
2544                  *    Remove TS_MAX_CONN with better lowerbound check.
2545                  *        tescr >= other original TS
2546                  *
2547                  * It is also important to note that the fastest
2548                  * timestamp clock of 1ms will wrap its 32bit space in
2549                  * 24 days.  So we just disable TS checking after 24
2550                  * days of idle time.  We actually must use a 12d
2551                  * connection limit until we can come up with a better
2552                  * lowerbound to the TS echo check.
2553                  */
2554                 struct timeval delta_ts;
2555                 int ts_fudge;
2556
2557
2558                 /*
2559                  * PFTM_TS_DIFF is how many seconds of leeway to allow
2560                  * a host's timestamp.  This can happen if the previous
2561                  * packet got delayed in transit for much longer than
2562                  * this packet.
2563                  */
2564                 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0)
2565                         ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF];
2566
2567
2568                 /* Calculate max ticks since the last timestamp */
2569 #define TS_MAXFREQ      1100            /* RFC max TS freq of 1Khz + 10% skew */
2570 #define TS_MICROSECS    1000000         /* microseconds per second */
2571                 timersub(&uptime, &src->scrub->pfss_last, &delta_ts);
2572                 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
2573                 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ);
2574
2575
2576                 if ((src->state >= TCPS_ESTABLISHED &&
2577                     dst->state >= TCPS_ESTABLISHED) &&
2578                     (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
2579                     SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
2580                     (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
2581                     SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
2582                         /*
2583                          * Bad RFC1323 implementation or an insertion attack.
2584                          *
2585                          * - Solaris 2.6 and 2.7 are known to send another ACK
2586                          *   after the FIN,FIN|ACK,ACK closing that carries
2587                          *   an old timestamp.
2588                          */
2589
2590                         DPFPRINTF(("Timestamp failed %c%c%c%c\n",
2591                             SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
2592                             SEQ_GT(tsval, src->scrub->pfss_tsval +
2593                             tsval_from_last) ? '1' : ' ',
2594                             SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
2595                             SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
2596                         DPFPRINTF((" tsval: %u  tsecr: %u  +ticks: %u  "
2597                             "idle: %lus %ums\n",
2598                             tsval, tsecr, tsval_from_last, delta_ts.tv_sec,
2599                             delta_ts.tv_usec / 1000));
2600                         DPFPRINTF((" src->tsval: %u  tsecr: %u\n",
2601                             src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
2602                         DPFPRINTF((" dst->tsval: %u  tsecr: %u  tsval0: %u\n",
2603                             dst->scrub->pfss_tsval, dst->scrub->pfss_tsecr,
2604                             dst->scrub->pfss_tsval0));
2605                         if (pf_status.debug >= PF_DEBUG_MISC) {
2606                                 pf_print_state(state);
2607                                 pf_print_flags(th->th_flags);
2608                                 printf("\n");
2609                         }
2610                         REASON_SET(reason, PFRES_TS);
2611                         return (PF_DROP);
2612                 }
2613
2614                 /* XXX I'd really like to require tsecr but it's optional */
2615
2616         } else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
2617             ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
2618             || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
2619             src->scrub && dst->scrub &&
2620             (src->scrub->pfss_flags & PFSS_PAWS) &&
2621             (dst->scrub->pfss_flags & PFSS_PAWS)) {
2622                 /*
2623                  * Didn't send a timestamp.  Timestamps aren't really useful
2624                  * when:
2625                  *  - connection opening or closing (often not even sent).
2626                  *    but we must not let an attacker to put a FIN on a
2627                  *    data packet to sneak it through our ESTABLISHED check.
2628                  *  - on a TCP reset.  RFC suggests not even looking at TS.
2629                  *  - on an empty ACK.  The TS will not be echoed so it will
2630                  *    probably not help keep the RTT calculation in sync and
2631                  *    there isn't as much danger when the sequence numbers
2632                  *    got wrapped.  So some stacks don't include TS on empty
2633                  *    ACKs :-(
2634                  *
2635                  * To minimize the disruption to mostly RFC1323 conformant
2636                  * stacks, we will only require timestamps on data packets.
2637                  *
2638                  * And what do ya know, we cannot require timestamps on data
2639                  * packets.  There appear to be devices that do legitimate
2640                  * TCP connection hijacking.  There are HTTP devices that allow
2641                  * a 3whs (with timestamps) and then buffer the HTTP request.
2642                  * If the intermediate device has the HTTP response cache, it
2643                  * will spoof the response but not bother timestamping its
2644                  * packets.  So we can look for the presence of a timestamp in
2645                  * the first data packet and if there, require it in all future
2646                  * packets.
2647                  */
2648
2649                 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
2650                         /*
2651                          * Hey!  Someone tried to sneak a packet in.  Or the
2652                          * stack changed its RFC1323 behavior?!?!
2653                          */
2654                         if (pf_status.debug >= PF_DEBUG_MISC) {
2655                                 DPFPRINTF(("Did not receive expected RFC1323 "
2656                                     "timestamp\n"));
2657                                 pf_print_state(state);
2658                                 pf_print_flags(th->th_flags);
2659                                 printf("\n");
2660                         }
2661                         REASON_SET(reason, PFRES_TS);
2662                         return (PF_DROP);
2663                 }
2664         }
2665
2666
2667         /*
2668          * We will note if a host sends his data packets with or without
2669          * timestamps.  And require all data packets to contain a timestamp
2670          * if the first does.  PAWS implicitly requires that all data packets be
2671          * timestamped.  But I think there are middle-man devices that hijack
2672          * TCP streams immediately after the 3whs and don't timestamp their
2673          * packets (seen in a WWW accelerator or cache).
2674          */
2675         if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
2676             (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
2677                 if (got_ts)
2678                         src->scrub->pfss_flags |= PFSS_DATA_TS;
2679                 else {
2680                         src->scrub->pfss_flags |= PFSS_DATA_NOTS;
2681                         if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
2682                             (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
2683                                 /* Don't warn if other host rejected RFC1323 */
2684                                 DPFPRINTF(("Broken RFC1323 stack did not "
2685                                     "timestamp data packet. Disabled PAWS "
2686                                     "security.\n"));
2687                                 pf_print_state(state);
2688                                 pf_print_flags(th->th_flags);
2689                                 printf("\n");
2690                         }
2691                 }
2692         }
2693
2694
2695         /*
2696          * Update PAWS values
2697          */
2698         if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
2699             (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) {
2700                 getmicrouptime(&src->scrub->pfss_last);
2701                 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
2702                     (src->scrub->pfss_flags & PFSS_PAWS) == 0)
2703                         src->scrub->pfss_tsval = tsval;
2704
2705                 if (tsecr) {
2706                         if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
2707                             (src->scrub->pfss_flags & PFSS_PAWS) == 0)
2708                                 src->scrub->pfss_tsecr = tsecr;
2709
2710                         if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
2711                             (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
2712                             src->scrub->pfss_tsval0 == 0)) {
2713                                 /* tsval0 MUST be the lowest timestamp */
2714                                 src->scrub->pfss_tsval0 = tsval;
2715                         }
2716
2717                         /* Only fully initialized after a TS gets echoed */
2718                         if ((src->scrub->pfss_flags & PFSS_PAWS) == 0)
2719                                 src->scrub->pfss_flags |= PFSS_PAWS;
2720                 }
2721         }
2722
2723         /* I have a dream....  TCP segment reassembly.... */
2724         return (0);
2725 }
2726
2727 static int
2728 pf_normalize_tcpopt(struct pf_rule *r, int dir, struct pfi_kif *kif,
2729     struct pf_pdesc *pd, struct mbuf *m, struct tcphdr *th, int off,
2730     int *rewrptr)
2731 {
2732 #pragma unused(dir, kif)
2733         sa_family_t af = pd->af;
2734         u_int16_t       *mss;
2735         int             thoff;
2736         int             opt, cnt, optlen = 0;
2737         int             rewrite = 0;
2738         u_char          opts[MAX_TCPOPTLEN];
2739         u_char          *optp = opts;
2740
2741         thoff = th->th_off << 2;
2742         cnt = thoff - sizeof (struct tcphdr);
2743
2744         if (cnt > 0 && !pf_pull_hdr(m, off + sizeof (*th), opts, cnt,
2745             NULL, NULL, af))
2746                 return PF_DROP;
2747
2748         for (; cnt > 0; cnt -= optlen, optp += optlen) {
2749                 opt = optp[0];
2750                 if (opt == TCPOPT_EOL)
2751                         break;
2752                 if (opt == TCPOPT_NOP)
2753                         optlen = 1;
2754                 else {
2755                         if (cnt < 2)
2756                                 break;
2757                         optlen = optp[1];
2758                         if (optlen < 2 || optlen > cnt)
2759                                 break;
2760                 }
2761                 switch (opt) {
2762                 case TCPOPT_MAXSEG:
2763                         mss = (u_int16_t *)(void *)(optp + 2);
2764                         if ((ntohs(*mss)) > r->max_mss) {
2765                                 /*
2766                                  * <jhw@apple.com>
2767                                  *  Only do the TCP checksum fixup if delayed
2768                                  * checksum calculation will not be performed.
2769                                  */
2770                                 if (m->m_pkthdr.rcvif ||
2771                                     !(m->m_pkthdr.csum_flags & CSUM_TCP))
2772                                         th->th_sum = pf_cksum_fixup(th->th_sum,
2773                                             *mss, htons(r->max_mss), 0);
2774                                 *mss = htons(r->max_mss);
2775                                 rewrite = 1;
2776                         }
2777                         break;
2778                 default:
2779                         break;
2780                 }
2781         }
2782
2783         if (rewrite) {
2784                 struct mbuf *mw;
2785                 u_short reason;
2786
2787                 mw = pf_lazy_makewritable(pd, pd->mp,
2788                     off + sizeof (*th) + thoff);
2789                 if (!mw) {
2790                         REASON_SET(&reason, PFRES_MEMORY);
2791                         if (r->log)
2792                                 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason,
2793                                     r, 0, 0, pd);
2794                         return PF_DROP;
2795                 }
2796
2797                 *rewrptr = 1;
2798                 m_copyback(mw, off + sizeof (*th), thoff - sizeof (*th), opts);
2799         }
2800
2801         return PF_PASS;
2802 }