1 /* $KAME: frag6.c,v 1.23 2000/02/28 16:18:11 itojun Exp $ */
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/malloc.h>
36 #include <sys/domain.h>
37 #include <sys/protosw.h>
38 #include <sys/socket.h>
39 #include <sys/errno.h>
41 #include <sys/kernel.h>
42 #include <sys/syslog.h>
43 #include <kern/queue.h>
46 #include <net/route.h>
48 #include <netinet/in.h>
49 #include <netinet/in_var.h>
50 #include <netinet/ip6.h>
51 #include <netinet6/ip6_var.h>
52 #if !(defined(__FreeBSD__) && __FreeBSD__ >= 3) && !defined(__OpenBSD__) && !(defined(__bsdi__) && _BSDI_VERSION >= 199802) && !defined(__APPLE__)
53 #include <netinet6/in6_pcb.h>
55 #include <netinet/icmp6.h>
57 #include <net/net_osdep.h>
60 * Define it to get a correct behavior on per-interface statistics.
61 * You will need to perform an extra routing table lookup, per fragment,
62 * to do it. This may, or may not be, a performance hit.
64 #define IN6_IFSTAT_STRICT
66 static void frag6_enq
__P((struct ip6asfrag
*, struct ip6asfrag
*));
67 static void frag6_deq
__P((struct ip6asfrag
*));
68 static void frag6_insque
__P((struct ip6q
*, struct ip6q
*));
69 static void frag6_remque
__P((struct ip6q
*));
70 static void frag6_freef
__P((struct ip6q
*));
72 int frag6_doing_reass
;
73 u_int frag6_nfragpackets
;
74 struct ip6q ip6q
; /* ip6 reassemble queue */
77 #if !defined(M_FTABLE) && (defined(__FreeBSD__) && __FreeBSD__ >= 3)
78 MALLOC_DEFINE(M_FTABLE
, "fragment", "fragment reassembly header");
81 #ifndef offsetof /* XXX */
82 #define offsetof(type, member) ((size_t)(&((type *)0)->member))
86 * Initialise reassembly queue and fragment identifier.
94 * in many cases, random() here does NOT return random number
95 * as initialization during bootstrap time occur in fixed order.
98 ip6q
.ip6q_next
= ip6q
.ip6q_prev
= &ip6q
;
99 ip6_id
= random() ^ tv
.tv_usec
;
103 * In RFC2460, fragment and reassembly rule do not agree with each other,
104 * in terms of next header field handling in fragment header.
105 * While the sender will use the same value for all of the fragmented packets,
106 * receiver is suggested not to check the consistency.
108 * fragment rule (p20):
109 * (2) A Fragment header containing:
110 * The Next Header value that identifies the first header of
111 * the Fragmentable Part of the original packet.
112 * -> next header field is same for all fragments
114 * reassembly rule (p21):
115 * The Next Header field of the last header of the Unfragmentable
116 * Part is obtained from the Next Header field of the first
117 * fragment's Fragment header.
118 * -> should grab it from the first fragment only
120 * The following note also contradicts with fragment rule - noone is going to
121 * send different fragment with different next header field.
123 * additional note (p22):
124 * The Next Header values in the Fragment headers of different
125 * fragments of the same original packet may differ. Only the value
126 * from the Offset zero fragment packet is used for reassembly.
127 * -> should grab it from the first fragment only
129 * There is no explicit reason given in the RFC. Historical reason maybe?
135 frag6_input(mp
, offp
, proto
)
139 struct mbuf
*m
= *mp
, *t
;
141 struct ip6_frag
*ip6f
;
143 struct ip6asfrag
*af6
, *ip6af
, *af6dwn
;
144 int offset
= *offp
, nxt
, i
, next
;
146 int fragoff
, frgpartlen
; /* must be larger than u_int16_t */
147 struct ifnet
*dstifp
;
148 #ifdef IN6_IFSTAT_STRICT
149 static struct route_in6 ro
;
150 struct sockaddr_in6
*dst
;
153 ip6
= mtod(m
, struct ip6_hdr
*);
154 #ifndef PULLDOWN_TEST
155 IP6_EXTHDR_CHECK(m
, offset
, sizeof(struct ip6_frag
), IPPROTO_DONE
);
156 ip6f
= (struct ip6_frag
*)((caddr_t
)ip6
+ offset
);
158 IP6_EXTHDR_GET(ip6f
, struct ip6_frag
*, m
, offset
, sizeof(*ip6f
));
164 #ifdef IN6_IFSTAT_STRICT
165 /* find the destination interface of the packet. */
166 dst
= (struct sockaddr_in6
*)&ro
.ro_dst
;
168 && ((ro
.ro_rt
->rt_flags
& RTF_UP
) == 0
169 || !IN6_ARE_ADDR_EQUAL(&dst
->sin6_addr
, &ip6
->ip6_dst
))) {
171 ro
.ro_rt
= (struct rtentry
*)0;
173 if (ro
.ro_rt
== NULL
) {
174 bzero(dst
, sizeof(*dst
));
175 dst
->sin6_family
= AF_INET6
;
176 dst
->sin6_len
= sizeof(struct sockaddr_in6
);
177 dst
->sin6_addr
= ip6
->ip6_dst
;
180 rtalloc((struct route
*)&ro
);
182 rtcalloc((struct route
*)&ro
);
184 if (ro
.ro_rt
!= NULL
&& ro
.ro_rt
->rt_ifa
!= NULL
)
185 dstifp
= ((struct in6_ifaddr
*)ro
.ro_rt
->rt_ifa
)->ia_ifp
;
187 /* we are violating the spec, this is not the destination interface */
188 if ((m
->m_flags
& M_PKTHDR
) != 0)
189 dstifp
= m
->m_pkthdr
.rcvif
;
192 /* jumbo payload can't contain a fragment header */
193 if (ip6
->ip6_plen
== 0) {
194 icmp6_error(m
, ICMP6_PARAM_PROB
, ICMP6_PARAMPROB_HEADER
, offset
);
195 in6_ifstat_inc(dstifp
, ifs6_reass_fail
);
200 * check whether fragment packet's fragment length is
201 * multiple of 8 octets.
202 * sizeof(struct ip6_frag) == 8
203 * sizeof(struct ip6_hdr) = 40
205 if ((ip6f
->ip6f_offlg
& IP6F_MORE_FRAG
) &&
206 (((ntohs(ip6
->ip6_plen
) - offset
) & 0x7) != 0)) {
207 icmp6_error(m
, ICMP6_PARAM_PROB
,
208 ICMP6_PARAMPROB_HEADER
,
209 offsetof(struct ip6_hdr
, ip6_plen
));
210 in6_ifstat_inc(dstifp
, ifs6_reass_fail
);
214 ip6stat
.ip6s_fragments
++;
215 in6_ifstat_inc(dstifp
, ifs6_reass_reqd
);
217 /* offset now points to data portion */
218 offset
+= sizeof(struct ip6_frag
);
220 for (q6
= ip6q
.ip6q_next
; q6
!= &ip6q
; q6
= q6
->ip6q_next
)
221 if (ip6f
->ip6f_ident
== q6
->ip6q_ident
&&
222 IN6_ARE_ADDR_EQUAL(&ip6
->ip6_src
, &q6
->ip6q_src
) &&
223 IN6_ARE_ADDR_EQUAL(&ip6
->ip6_dst
, &q6
->ip6q_dst
))
228 * the first fragment to arrive, create a reassembly queue.
231 frag6_nfragpackets
++;
234 * Enforce upper bound on number of fragmented packets
235 * for which we attempt reassembly;
236 * If maxfrag is 0, never accept fragments.
237 * If maxfrag is -1, accept all fragments without limitation.
239 if (frag6_nfragpackets
>= (u_int
)ip6_maxfragpackets
) {
240 ip6stat
.ip6s_fragoverflow
++;
241 in6_ifstat_inc(dstifp
, ifs6_reass_fail
);
242 frag6_freef(ip6q
.ip6q_prev
);
244 q6
= (struct ip6q
*)_MALLOC(sizeof(struct ip6q
), M_FTABLE
,
248 bzero(q6
, sizeof(*q6
));
250 frag6_insque(q6
, &ip6q
);
252 /* ip6q_nxt will be filled afterwards, from 1st fragment */
253 q6
->ip6q_down
= q6
->ip6q_up
= (struct ip6asfrag
*)q6
;
255 q6
->ip6q_nxtp
= (u_char
*)nxtp
;
257 q6
->ip6q_ident
= ip6f
->ip6f_ident
;
258 q6
->ip6q_arrive
= 0; /* Is it used anywhere? */
259 q6
->ip6q_ttl
= IPV6_FRAGTTL
;
260 q6
->ip6q_src
= ip6
->ip6_src
;
261 q6
->ip6q_dst
= ip6
->ip6_dst
;
262 q6
->ip6q_unfrglen
= -1; /* The 1st fragment has not arrived. */
266 * If it's the 1st fragment, record the length of the
267 * unfragmentable part and the next header of the fragment header.
269 fragoff
= ntohs(ip6f
->ip6f_offlg
& IP6F_OFF_MASK
);
271 q6
->ip6q_unfrglen
= offset
- sizeof(struct ip6_hdr
)
272 - sizeof(struct ip6_frag
);
273 q6
->ip6q_nxt
= ip6f
->ip6f_nxt
;
277 * Check that the reassembled packet would not exceed 65535 bytes
279 * If it would exceed, discard the fragment and return an ICMP error.
281 frgpartlen
= sizeof(struct ip6_hdr
) + ntohs(ip6
->ip6_plen
) - offset
;
282 if (q6
->ip6q_unfrglen
>= 0) {
283 /* The 1st fragment has already arrived. */
284 if (q6
->ip6q_unfrglen
+ fragoff
+ frgpartlen
> IPV6_MAXPACKET
) {
285 icmp6_error(m
, ICMP6_PARAM_PROB
, ICMP6_PARAMPROB_HEADER
,
286 offset
- sizeof(struct ip6_frag
) +
287 offsetof(struct ip6_frag
, ip6f_offlg
));
288 return(IPPROTO_DONE
);
291 else if (fragoff
+ frgpartlen
> IPV6_MAXPACKET
) {
292 icmp6_error(m
, ICMP6_PARAM_PROB
, ICMP6_PARAMPROB_HEADER
,
293 offset
- sizeof(struct ip6_frag
) +
294 offsetof(struct ip6_frag
, ip6f_offlg
));
295 return(IPPROTO_DONE
);
298 * If it's the first fragment, do the above check for each
299 * fragment already stored in the reassembly queue.
302 for (af6
= q6
->ip6q_down
; af6
!= (struct ip6asfrag
*)q6
;
304 af6dwn
= af6
->ip6af_down
;
306 if (q6
->ip6q_unfrglen
+ af6
->ip6af_off
+ af6
->ip6af_frglen
>
308 struct mbuf
*merr
= IP6_REASS_MBUF(af6
);
309 struct ip6_hdr
*ip6err
;
310 int erroff
= af6
->ip6af_offset
;
312 /* dequeue the fragment. */
314 _FREE(af6
, M_FTABLE
);
316 /* adjust pointer. */
317 ip6err
= mtod(merr
, struct ip6_hdr
*);
320 * Restore source and destination addresses
321 * in the erroneous IPv6 header.
323 ip6err
->ip6_src
= q6
->ip6q_src
;
324 ip6err
->ip6_dst
= q6
->ip6q_dst
;
326 icmp6_error(merr
, ICMP6_PARAM_PROB
,
327 ICMP6_PARAMPROB_HEADER
,
328 erroff
- sizeof(struct ip6_frag
) +
329 offsetof(struct ip6_frag
, ip6f_offlg
));
334 ip6af
= (struct ip6asfrag
*)_MALLOC(sizeof(struct ip6asfrag
), M_FTABLE
,
338 bzero(ip6af
, sizeof(*ip6af
));
339 ip6af
->ip6af_head
= ip6
->ip6_flow
;
340 ip6af
->ip6af_len
= ip6
->ip6_plen
;
341 ip6af
->ip6af_nxt
= ip6
->ip6_nxt
;
342 ip6af
->ip6af_hlim
= ip6
->ip6_hlim
;
343 ip6af
->ip6af_mff
= ip6f
->ip6f_offlg
& IP6F_MORE_FRAG
;
344 ip6af
->ip6af_off
= fragoff
;
345 ip6af
->ip6af_frglen
= frgpartlen
;
346 ip6af
->ip6af_offset
= offset
;
347 IP6_REASS_MBUF(ip6af
) = m
;
350 af6
= (struct ip6asfrag
*)q6
;
355 * Find a segment which begins after this one does.
357 for (af6
= q6
->ip6q_down
; af6
!= (struct ip6asfrag
*)q6
;
358 af6
= af6
->ip6af_down
)
359 if (af6
->ip6af_off
> ip6af
->ip6af_off
)
364 * If there is a preceding segment, it may provide some of
365 * our data already. If so, drop the data from the incoming
366 * segment. If it provides all of our data, drop us.
368 if (af6
->ip6af_up
!= (struct ip6asfrag
*)q6
) {
369 i
= af6
->ip6af_up
->ip6af_off
+ af6
->ip6af_up
->ip6af_frglen
372 if (i
>= ip6af
->ip6af_frglen
)
374 m_adj(IP6_REASS_MBUF(ip6af
), i
);
375 ip6af
->ip6af_off
+= i
;
376 ip6af
->ip6af_frglen
-= i
;
381 * While we overlap succeeding segments trim them or,
382 * if they are completely covered, dequeue them.
384 while (af6
!= (struct ip6asfrag
*)q6
&&
385 ip6af
->ip6af_off
+ ip6af
->ip6af_frglen
> af6
->ip6af_off
) {
386 i
= (ip6af
->ip6af_off
+ ip6af
->ip6af_frglen
) - af6
->ip6af_off
;
387 if (i
< af6
->ip6af_frglen
) {
388 af6
->ip6af_frglen
-= i
;
390 m_adj(IP6_REASS_MBUF(af6
), i
);
393 af6
= af6
->ip6af_down
;
394 m_freem(IP6_REASS_MBUF(af6
->ip6af_up
));
395 frag6_deq(af6
->ip6af_up
);
399 * If the incoming framgent overlaps some existing fragments in
400 * the reassembly queue, drop it, since it is dangerous to override
401 * existing fragments from a security point of view.
403 if (af6
->ip6af_up
!= (struct ip6asfrag
*)q6
) {
404 i
= af6
->ip6af_up
->ip6af_off
+ af6
->ip6af_up
->ip6af_frglen
407 log(LOG_ERR
, "%d bytes of a fragment from %s "
408 "overlaps the previous fragment\n",
409 i
, ip6_sprintf(&q6
->ip6q_src
));
413 if (af6
!= (struct ip6asfrag
*)q6
) {
414 i
= (ip6af
->ip6af_off
+ ip6af
->ip6af_frglen
) - af6
->ip6af_off
;
416 log(LOG_ERR
, "%d bytes of a fragment from %s "
417 "overlaps the succeeding fragment",
418 i
, ip6_sprintf(&q6
->ip6q_src
));
427 * Stick new segment in its place;
428 * check for complete reassembly.
429 * Move to front of packet queue, as we are
430 * the most recently active fragmented packet.
432 frag6_enq(ip6af
, af6
->ip6af_up
);
434 if (q6
!= ip6q
.ip6q_next
) {
436 frag6_insque(q6
, &ip6q
);
440 for (af6
= q6
->ip6q_down
; af6
!= (struct ip6asfrag
*)q6
;
441 af6
= af6
->ip6af_down
) {
442 if (af6
->ip6af_off
!= next
) {
443 frag6_doing_reass
= 0;
446 next
+= af6
->ip6af_frglen
;
448 if (af6
->ip6af_up
->ip6af_mff
) {
449 frag6_doing_reass
= 0;
454 * Reassembly is complete; concatenate fragments.
456 ip6af
= q6
->ip6q_down
;
457 t
= m
= IP6_REASS_MBUF(ip6af
);
458 af6
= ip6af
->ip6af_down
;
460 while (af6
!= (struct ip6asfrag
*)q6
) {
461 af6dwn
= af6
->ip6af_down
;
465 t
->m_next
= IP6_REASS_MBUF(af6
);
466 m_adj(t
->m_next
, af6
->ip6af_offset
);
467 _FREE(af6
, M_FTABLE
);
471 /* adjust offset to point where the original next header starts */
472 offset
= ip6af
->ip6af_offset
- sizeof(struct ip6_frag
);
473 _FREE(ip6af
, M_FTABLE
);
474 ip6
= mtod(m
, struct ip6_hdr
*);
475 ip6
->ip6_plen
= htons((u_short
)next
+ offset
- sizeof(struct ip6_hdr
));
476 ip6
->ip6_src
= q6
->ip6q_src
;
477 ip6
->ip6_dst
= q6
->ip6q_dst
;
480 *q6
->ip6q_nxtp
= (u_char
)(nxt
& 0xff);
484 * Delete frag6 header with as a few cost as possible.
486 if (offset
< m
->m_len
) {
487 ovbcopy((caddr_t
)ip6
, (caddr_t
)ip6
+ sizeof(struct ip6_frag
),
489 m
->m_data
+= sizeof(struct ip6_frag
);
490 m
->m_len
-= sizeof(struct ip6_frag
);
492 /* this comes with no copy if the boundary is on cluster */
493 if ((t
= m_split(m
, offset
, M_DONTWAIT
)) == NULL
) {
496 frag6_nfragpackets
--;
499 m_adj(t
, sizeof(struct ip6_frag
));
504 * Store NXT to the original.
507 char *prvnxtp
= ip6_get_prevhdr(m
, offset
); /* XXX */
513 frag6_nfragpackets
--;
515 if (m
->m_flags
& M_PKTHDR
) { /* Isn't it always true? */
517 for (t
= m
; t
; t
= t
->m_next
)
519 m
->m_pkthdr
.len
= plen
;
522 ip6stat
.ip6s_reassembled
++;
523 in6_ifstat_inc(dstifp
, ifs6_reass_ok
);
526 * Tell launch routine the next header
532 frag6_doing_reass
= 0;
536 in6_ifstat_inc(dstifp
, ifs6_reass_fail
);
537 ip6stat
.ip6s_fragdropped
++;
543 * Free a fragment reassembly header and all
544 * associated datagrams.
550 struct ip6asfrag
*af6
, *down6
;
552 for (af6
= q6
->ip6q_down
; af6
!= (struct ip6asfrag
*)q6
;
554 struct mbuf
*m
= IP6_REASS_MBUF(af6
);
556 down6
= af6
->ip6af_down
;
560 * Return ICMP time exceeded error for the 1st fragment.
561 * Just free other fragments.
563 if (af6
->ip6af_off
== 0) {
567 ip6
= mtod(m
, struct ip6_hdr
*);
569 /* restoure source and destination addresses */
570 ip6
->ip6_src
= q6
->ip6q_src
;
571 ip6
->ip6_dst
= q6
->ip6q_dst
;
573 icmp6_error(m
, ICMP6_TIME_EXCEEDED
,
574 ICMP6_TIME_EXCEED_REASSEMBLY
, 0);
577 _FREE(af6
, M_FTABLE
);
582 frag6_nfragpackets
--;
586 * Put an ip fragment on a reassembly chain.
587 * Like insque, but pointers in middle of structure.
591 struct ip6asfrag
*af6
, *up6
;
594 af6
->ip6af_down
= up6
->ip6af_down
;
595 up6
->ip6af_down
->ip6af_up
= af6
;
596 up6
->ip6af_down
= af6
;
600 * To frag6_enq as remque is to insque.
604 struct ip6asfrag
*af6
;
606 af6
->ip6af_up
->ip6af_down
= af6
->ip6af_down
;
607 af6
->ip6af_down
->ip6af_up
= af6
->ip6af_up
;
611 frag6_insque(new, old
)
612 struct ip6q
*new, *old
;
614 new->ip6q_prev
= old
;
615 new->ip6q_next
= old
->ip6q_next
;
616 old
->ip6q_next
->ip6q_prev
= new;
617 old
->ip6q_next
= new;
624 p6
->ip6q_prev
->ip6q_next
= p6
->ip6q_next
;
625 p6
->ip6q_next
->ip6q_prev
= p6
->ip6q_prev
;
629 * IP timer processing;
630 * if a timer expires on a reassembly
644 extern struct route_in6 ip6_forward_rt
;
647 frag6_doing_reass
= 1;
650 while (q6
!= &ip6q
) {
653 if (q6
->ip6q_prev
->ip6q_ttl
== 0) {
654 ip6stat
.ip6s_fragtimeout
++;
655 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
656 frag6_freef(q6
->ip6q_prev
);
660 * If we are over the maximum number of fragments
661 * (due to the limit being lowered), drain off
662 * enough to get down to the new limit.
664 while (frag6_nfragpackets
> (u_int
)ip6_maxfragpackets
) {
665 ip6stat
.ip6s_fragoverflow
++;
666 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
667 frag6_freef(ip6q
.ip6q_prev
);
669 frag6_doing_reass
= 0;
673 * Routing changes might produce a better route than we last used;
674 * make sure we notice eventually, even if forwarding only for one
675 * destination and the cache is never replaced.
677 if (ip6_forward_rt
.ro_rt
) {
678 RTFREE(ip6_forward_rt
.ro_rt
);
679 ip6_forward_rt
.ro_rt
= 0;
681 if (ipsrcchk_rt
.ro_rt
) {
682 RTFREE(ipsrcchk_rt
.ro_rt
);
683 ipsrcchk_rt
.ro_rt
= 0;
691 * Drain off all datagram fragments.
696 if (frag6_doing_reass
)
698 while (ip6q
.ip6q_next
!= &ip6q
) {
699 ip6stat
.ip6s_fragdropped
++;
700 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
701 frag6_freef(ip6q
.ip6q_next
);