1 /* $FreeBSD: src/sys/net/if_stf.c,v 1.1.2.6 2001/07/24 19:10:18 brooks Exp $ */
2 /* $KAME: if_stf.c,v 1.62 2001/06/07 22:32:16 itojun Exp $ */
5 * Copyright (C) 2000 WIDE Project.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * 6to4 interface, based on RFC3056.
36 * 6to4 interface is NOT capable of link-layer (I mean, IPv4) multicasting.
37 * There is no address mapping defined from IPv6 multicast address to IPv4
38 * address. Therefore, we do not have IFF_MULTICAST on the interface.
40 * Due to the lack of address mapping for link-local addresses, we cannot
41 * throw packets toward link-local addresses (fe80::x). Also, we cannot throw
42 * packets to link-local multicast addresses (ff02::x).
44 * Here are interesting symptoms due to the lack of link-local address:
46 * Unicast routing exchange:
47 * - RIPng: Impossible. Uses link-local multicast packet toward ff02::9,
48 * and link-local addresses as nexthop.
49 * - OSPFv6: Impossible. OSPFv6 assumes that there's link-local address
50 * assigned to the link, and makes use of them. Also, HELLO packets use
51 * link-local multicast addresses (ff02::5 and ff02::6).
52 * - BGP4+: Maybe. You can only use global address as nexthop, and global
53 * address as TCP endpoint address.
55 * Multicast routing protocols:
56 * - PIM: Hello packet cannot be used to discover adjacent PIM routers.
57 * Adjacent PIM routers must be configured manually (is it really spec-wise
58 * correct thing to do?).
61 * - Redirects cannot be used due to the lack of link-local address.
63 * stf interface does not have, and will not need, a link-local address.
64 * It seems to have no real benefit and does not help the above symptoms much.
65 * Even if we assign link-locals to interface, we cannot really
66 * use link-local unicast/multicast on top of 6to4 cloud (since there's no
67 * encapsulation defined for link-local address), and the above analysis does
68 * not change. RFC3056 does not mandate the assignment of link-local address
71 * 6to4 interface has security issues. Refer to
72 * http://playground.iijlab.net/i-d/draft-itojun-ipv6-transition-abuse-00.txt
73 * for details. The code tries to filter out some of malicious packets.
74 * Note that there is no way to be 100% secure.
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/socket.h>
80 #include <sys/sockio.h>
82 #include <sys/errno.h>
83 #include <sys/protosw.h>
84 #include <sys/kernel.h>
85 #include <sys/syslog.h>
86 #include <machine/cpu.h>
88 #include <sys/malloc.h>
91 #include <net/route.h>
92 #include <net/netisr.h>
93 #include <net/if_types.h>
94 #include <net/if_stf.h>
96 #include <netinet/in.h>
97 #include <netinet/in_systm.h>
98 #include <netinet/ip.h>
99 #include <netinet/ip_var.h>
100 #include <netinet/in_var.h>
102 #include <netinet/ip6.h>
103 #include <netinet6/ip6_var.h>
104 #include <netinet6/in6_var.h>
105 #include <netinet/ip_ecn.h>
107 #include <netinet/ip_encap.h>
108 #include <net/dlil.h>
111 #include <net/net_osdep.h>
115 #define IN6_IS_ADDR_6TO4(x) (ntohs((x)->s6_addr16[0]) == 0x2002)
116 #define GET_V4(x) ((struct in_addr *)(&(x)->s6_addr16[1]))
119 struct ifnet sc_if
; /* common area */
121 struct if_proto
*stf_proto
; /* dlil protocol attached */
124 struct route __sc_ro4
;
125 struct route_in6 __sc_ro6
; /* just for safety */
127 #define sc_ro __sc_ro46.__sc_ro4
128 const struct encaptab
*encap_cookie
;
131 static struct stf_softc
*stf
;
134 void stfattach
__P((void));
135 int stf_pre_output
__P((struct ifnet
*, register struct mbuf
**, struct sockaddr
*,
136 caddr_t
, char *, char *, u_long
));
137 static u_long stf_dl_tag
=0;
141 static MALLOC_DEFINE(M_STF
, "stf", "6to4 Tunnel Interface");
143 static int ip_stf_ttl
= 40;
145 extern struct domain inetdomain
;
146 struct protosw in_stf_protosw
=
147 { SOCK_RAW
, &inetdomain
, IPPROTO_IPV6
, PR_ATOMIC
|PR_ADDR
,
148 in_stf_input
, 0, 0, rip_ctloutput
,
155 static int stf_encapcheck
__P((const struct mbuf
*, int, int, void *));
156 static struct in6_ifaddr
*stf_getsrcifa6
__P((struct ifnet
*));
157 int stf_pre_output
__P((struct ifnet
*, register struct mbuf
**, struct sockaddr
*,
158 caddr_t
, char *, char *, u_long
));
159 static int stf_checkaddr4
__P((struct stf_softc
*, struct in_addr
*,
161 static int stf_checkaddr6
__P((struct stf_softc
*, struct in6_addr
*,
163 static void stf_rtrequest
__P((int, struct rtentry
*, struct sockaddr
*));
164 int stf_ioctl
__P((struct ifnet
*, u_long
, void *));
168 int stf_add_if(struct ifnet
*ifp
)
176 int stf_del_if(struct ifnet
*ifp
)
182 int stf_add_proto(struct ddesc_head_str
*desc_head
, struct if_proto
*proto
, u_long dl_tag
)
184 /* Only one protocol may be attached at a time */
185 struct stf_softc
* stf
= (struct stf_softc
*)proto
->ifp
;
186 if (stf
->stf_proto
== NULL
)
187 stf
->stf_proto
= proto
;
189 printf("stf_add_proto: stf already has a proto\n");
197 int stf_del_proto(struct if_proto
*proto
, u_long dl_tag
)
199 if (((struct stf_softc
*)proto
->ifp
)->stf_proto
== proto
)
200 ((struct stf_softc
*)proto
->ifp
)->stf_proto
= NULL
;
212 int stf_attach_inet6(struct ifnet
*ifp
, u_long
*dl_tag
)
214 struct dlil_proto_reg_str reg
;
215 struct dlil_demux_desc desc
;
219 if (stf_dl_tag
!= 0) {
220 *dl_tag
= stf_dl_tag
;
224 TAILQ_INIT(®
.demux_desc_head
);
225 desc
.type
= DLIL_DESC_RAW
;
226 desc
.variants
.bitmask
.proto_id_length
= 0;
227 desc
.variants
.bitmask
.proto_id
= 0;
228 desc
.variants
.bitmask
.proto_id_mask
= 0;
229 desc
.native_type
= (char *) &native
;
230 TAILQ_INSERT_TAIL(®
.demux_desc_head
, &desc
, next
);
231 reg
.interface_family
= ifp
->if_family
;
232 reg
.unit_number
= ifp
->if_unit
;
234 reg
.pre_output
= stf_pre_output
;
238 reg
.default_proto
= 0;
239 reg
.protocol_family
= PF_INET6
;
241 stat
= dlil_attach_protocol(®
, &stf_dl_tag
);
242 *dl_tag
= stf_dl_tag
;
247 int stf_detach_inet6(struct ifnet
*ifp
, u_long dl_tag
)
251 stat
= dlil_find_dltag(ifp
->if_family
, ifp
->if_unit
, AF_INET6
, &dl_tag
);
253 stat
= dlil_detach_protocol(dl_tag
);
255 printf("WARNING: stf_detach can't detach IP AF_INET6 from interface\n");
261 void stf_reg_if_mods()
263 struct dlil_ifmod_reg_str stf_ifmod
;
264 struct dlil_protomod_reg_str stf_protoreg
;
267 bzero(&stf_ifmod
, sizeof(stf_ifmod
));
268 stf_ifmod
.add_if
= stf_add_if
;
269 stf_ifmod
.del_if
= stf_del_if
;
270 stf_ifmod
.add_proto
= stf_add_proto
;
271 stf_ifmod
.del_proto
= stf_del_proto
;
272 stf_ifmod
.ifmod_ioctl
= 0;
273 stf_ifmod
.shutdown
= stf_shutdown
;
276 if (dlil_reg_if_modules(APPLE_IF_FAM_STF
, &stf_ifmod
))
277 panic("Couldn't register stf modules\n");
279 /* Register protocol registration functions */
281 bzero(&stf_protoreg
, sizeof(stf_protoreg
));
282 stf_protoreg
.attach_proto
= stf_attach_inet6
;
283 stf_protoreg
.detach_proto
= stf_detach_inet6
;
285 if ( error
= dlil_reg_proto_module(AF_INET6
, APPLE_IF_FAM_STF
, &stf_protoreg
) != 0)
286 kprintf("dlil_reg_proto_module failed for AF_INET6 error=%d\n", error
);
293 struct stf_softc
*sc
;
298 const struct encaptab
*p
;
300 stf_reg_if_mods(); /* DLIL modules */
302 sc
= _MALLOC(sizeof(struct stf_softc
), M_DEVBUF
, M_WAITOK
);
304 printf("stf softc attach failed\n" );
308 bzero(sc
, sizeof(*sc
));
309 sc
->sc_if
.if_name
= "stf";
310 sc
->sc_if
.if_unit
= 0;
312 p
= encap_attach_func(AF_INET
, IPPROTO_IPV6
, stf_encapcheck
,
313 &in_stf_protosw
, sc
);
315 printf("%s: attach failed\n", if_name(&sc
->sc_if
));
319 sc
->encap_cookie
= p
;
320 sc
->sc_if
.if_mtu
= IPV6_MMTU
;
321 sc
->sc_if
.if_flags
= 0;
322 sc
->sc_if
.if_ioctl
= stf_ioctl
;
323 sc
->sc_if
.if_output
= NULL
; /* processing done in pre_output */
324 sc
->sc_if
.if_type
= IFT_STF
;
325 sc
->sc_if
.if_family
= APPLE_IF_FAM_STF
;
327 /* turn off ingress filter */
328 sc
->sc_if
.if_flags
|= IFF_LINK2
;
330 sc
->sc_if
.if_snd
.ifq_maxlen
= IFQ_MAXLEN
;
332 if (error
= dlil_if_attach(&sc
->sc_if
))
333 printf("stfattach: can't dlil_if_attach error=%d\n");
335 bpfattach(&sc
->sc_if
, DLT_NULL
, sizeof(u_int
));
341 stf_encapcheck(m
, off
, proto
, arg
)
342 const struct mbuf
*m
;
348 struct in6_ifaddr
*ia6
;
349 struct stf_softc
*sc
;
352 sc
= (struct stf_softc
*)arg
;
356 if ((sc
->sc_if
.if_flags
& IFF_UP
) == 0)
359 /* IFF_LINK0 means "no decapsulation" */
360 if ((sc
->sc_if
.if_flags
& IFF_LINK0
) != 0)
363 if (proto
!= IPPROTO_IPV6
)
366 /* LINTED const cast */
367 m_copydata((struct mbuf
*)m
, 0, sizeof(ip
), (caddr_t
)&ip
);
372 ia6
= stf_getsrcifa6(&sc
->sc_if
);
377 * check if IPv4 dst matches the IPv4 address derived from the
378 * local 6to4 address.
379 * success on: dst = 10.1.1.1, ia6->ia_addr = 2002:0a01:0101:...
381 if (bcmp(GET_V4(&ia6
->ia_addr
.sin6_addr
), &ip
.ip_dst
,
382 sizeof(ip
.ip_dst
)) != 0)
386 * check if IPv4 src matches the IPv4 address derived from the
387 * local 6to4 address masked by prefixmask.
388 * success on: src = 10.1.1.1, ia6->ia_addr = 2002:0a00:.../24
389 * fail on: src = 10.1.1.1, ia6->ia_addr = 2002:0b00:.../24
391 bzero(&a
, sizeof(a
));
392 a
.s_addr
= GET_V4(&ia6
->ia_addr
.sin6_addr
)->s_addr
;
393 a
.s_addr
&= GET_V4(&ia6
->ia_prefixmask
.sin6_addr
)->s_addr
;
395 b
.s_addr
&= GET_V4(&ia6
->ia_prefixmask
.sin6_addr
)->s_addr
;
396 if (a
.s_addr
!= b
.s_addr
)
399 /* stf interface makes single side match only */
403 static struct in6_ifaddr
*
408 struct in_ifaddr
*ia4
;
409 struct sockaddr_in6
*sin6
;
412 for (ia
= ifp
->if_addrlist
.tqh_first
;
414 ia
= ia
->ifa_list
.tqe_next
)
416 if (ia
->ifa_addr
== NULL
)
418 if (ia
->ifa_addr
->sa_family
!= AF_INET6
)
420 sin6
= (struct sockaddr_in6
*)ia
->ifa_addr
;
421 if (!IN6_IS_ADDR_6TO4(&sin6
->sin6_addr
))
424 bcopy(GET_V4(&sin6
->sin6_addr
), &in
, sizeof(in
));
425 for (ia4
= TAILQ_FIRST(&in_ifaddrhead
);
427 ia4
= TAILQ_NEXT(ia4
, ia_link
))
429 if (ia4
->ia_addr
.sin_addr
.s_addr
== in
.s_addr
)
435 return (struct in6_ifaddr
*)ia
;
442 stf_pre_output(ifp
, m0
, dst
, rt
, frame_type
, address
, dl_tag
)
444 register struct mbuf
**m0
;
445 struct sockaddr
*dst
;
451 register struct mbuf
*m
= *m0
;
452 struct stf_softc
*sc
;
453 struct sockaddr_in6
*dst6
;
455 struct sockaddr_in
*dst4
;
459 struct in6_ifaddr
*ia6
;
462 sc
= (struct stf_softc
*)ifp
;
463 dst6
= (struct sockaddr_in6
*)dst
;
466 if ((ifp
->if_flags
& IFF_UP
) == 0) {
467 printf("stf: IFF_DOWN\n");
472 * If we don't have an ip4 address that match my inner ip6 address,
473 * we shouldn't generate output. Without this check, we'll end up
474 * using wrong IPv4 source.
476 ia6
= stf_getsrcifa6(ifp
);
481 if (m
->m_len
< sizeof(*ip6
)) {
482 m
= m_pullup(m
, sizeof(*ip6
));
486 ip6
= mtod(m
, struct ip6_hdr
*);
487 tos
= (ntohl(ip6
->ip6_flow
) >> 20) & 0xff;
490 * Pickup the right outer dst addr from the list of candidates.
491 * ip6_dst has priority as it may be able to give us shorter IPv4 hops.
493 if (IN6_IS_ADDR_6TO4(&ip6
->ip6_dst
))
494 in4
= GET_V4(&ip6
->ip6_dst
);
495 else if (IN6_IS_ADDR_6TO4(&dst6
->sin6_addr
))
496 in4
= GET_V4(&dst6
->sin6_addr
);
503 * We need to prepend the address family as
504 * a four byte field. Cons up a dummy header
505 * to pacify bpf. This is safe because bpf
506 * will only read from the mbuf (i.e., it won't
507 * try to free it or keep a pointer a to it).
510 u_int32_t af
= AF_INET6
;
514 m0
.m_data
= (char *)&af
;
519 M_PREPEND(m
, sizeof(struct ip
), M_DONTWAIT
);
520 if (m
&& m
->m_len
< sizeof(struct ip
))
521 m
= m_pullup(m
, sizeof(struct ip
));
524 ip
= mtod(m
, struct ip
*);
526 bzero(ip
, sizeof(*ip
));
528 bcopy(GET_V4(&((struct sockaddr_in6
*)&ia6
->ia_addr
)->sin6_addr
),
529 &ip
->ip_src
, sizeof(ip
->ip_src
));
530 bcopy(in4
, &ip
->ip_dst
, sizeof(ip
->ip_dst
));
531 ip
->ip_p
= IPPROTO_IPV6
;
532 ip
->ip_ttl
= ip_stf_ttl
;
533 ip
->ip_len
= m
->m_pkthdr
.len
; /*host order*/
534 if (ifp
->if_flags
& IFF_LINK1
)
535 ip_ecn_ingress(ECN_ALLOWED
, &ip
->ip_tos
, &tos
);
537 ip_ecn_ingress(ECN_NOCARE
, &ip
->ip_tos
, &tos
);
539 dst4
= (struct sockaddr_in
*)&sc
->sc_ro
.ro_dst
;
540 if (dst4
->sin_family
!= AF_INET
||
541 bcmp(&dst4
->sin_addr
, &ip
->ip_dst
, sizeof(ip
->ip_dst
)) != 0) {
542 /* cache route doesn't match */
543 dst4
->sin_family
= AF_INET
;
544 dst4
->sin_len
= sizeof(struct sockaddr_in
);
545 bcopy(&ip
->ip_dst
, &dst4
->sin_addr
, sizeof(dst4
->sin_addr
));
546 if (sc
->sc_ro
.ro_rt
) {
547 RTFREE(sc
->sc_ro
.ro_rt
);
548 sc
->sc_ro
.ro_rt
= NULL
;
552 if (sc
->sc_ro
.ro_rt
== NULL
) {
554 if (sc
->sc_ro
.ro_rt
== NULL
) {
559 error
= ip_output(m
, NULL
, &sc
->sc_ro
, 0, NULL
);
565 stf_checkaddr4(sc
, in
, inifp
)
566 struct stf_softc
*sc
;
568 struct ifnet
*inifp
; /* incoming interface */
570 struct in_ifaddr
*ia4
;
573 * reject packets with the following address:
574 * 224.0.0.0/4 0.0.0.0/8 127.0.0.0/8 255.0.0.0/8
576 if (IN_MULTICAST(ntohl(in
->s_addr
)))
578 switch ((ntohl(in
->s_addr
) & 0xff000000) >> 24) {
579 case 0: case 127: case 255:
584 * reject packets with broadcast
586 for (ia4
= TAILQ_FIRST(&in_ifaddrhead
);
588 ia4
= TAILQ_NEXT(ia4
, ia_link
))
590 if ((ia4
->ia_ifa
.ifa_ifp
->if_flags
& IFF_BROADCAST
) == 0)
592 if (in
->s_addr
== ia4
->ia_broadaddr
.sin_addr
.s_addr
)
597 * perform ingress filter
599 if (sc
&& (sc
->sc_if
.if_flags
& IFF_LINK2
) == 0 && inifp
) {
600 struct sockaddr_in sin
;
603 bzero(&sin
, sizeof(sin
));
604 sin
.sin_family
= AF_INET
;
605 sin
.sin_len
= sizeof(struct sockaddr_in
);
607 rt
= rtalloc1((struct sockaddr
*)&sin
, 0, 0UL);
608 if (!rt
|| rt
->rt_ifp
!= inifp
) {
610 log(LOG_WARNING
, "%s: packet from 0x%x dropped "
611 "due to ingress filter\n", if_name(&sc
->sc_if
),
612 (u_int32_t
)ntohl(sin
.sin_addr
.s_addr
));
625 stf_checkaddr6(sc
, in6
, inifp
)
626 struct stf_softc
*sc
;
627 struct in6_addr
*in6
;
628 struct ifnet
*inifp
; /* incoming interface */
631 * check 6to4 addresses
633 if (IN6_IS_ADDR_6TO4(in6
))
634 return stf_checkaddr4(sc
, GET_V4(in6
), inifp
);
637 * reject anything that look suspicious. the test is implemented
638 * in ip6_input too, but we check here as well to
639 * (1) reject bad packets earlier, and
640 * (2) to be safe against future ip6_input change.
642 if (IN6_IS_ADDR_V4COMPAT(in6
) || IN6_IS_ADDR_V4MAPPED(in6
))
653 struct stf_softc
*sc
;
658 struct ifqueue
*ifq
= NULL
;
661 ip
= mtod(m
, struct ip
*);
665 if (proto
!= IPPROTO_IPV6
) {
670 ip
= mtod(m
, struct ip
*);
672 sc
= (struct stf_softc
*)encap_getarg(m
);
674 if (sc
== NULL
|| (sc
->sc_if
.if_flags
& IFF_UP
) == 0) {
682 * perform sanity check against outer src/dst.
683 * for source, perform ingress filter as well.
685 if (stf_checkaddr4(sc
, &ip
->ip_dst
, NULL
) < 0 ||
686 stf_checkaddr4(sc
, &ip
->ip_src
, m
->m_pkthdr
.rcvif
) < 0) {
694 if (m
->m_len
< sizeof(*ip6
)) {
695 m
= m_pullup(m
, sizeof(*ip6
));
699 ip6
= mtod(m
, struct ip6_hdr
*);
702 * perform sanity check against inner src/dst.
703 * for source, perform ingress filter as well.
705 if (stf_checkaddr6(sc
, &ip6
->ip6_dst
, NULL
) < 0 ||
706 stf_checkaddr6(sc
, &ip6
->ip6_src
, m
->m_pkthdr
.rcvif
) < 0) {
711 itos
= (ntohl(ip6
->ip6_flow
) >> 20) & 0xff;
712 if ((ifp
->if_flags
& IFF_LINK1
) != 0)
713 ip_ecn_egress(ECN_ALLOWED
, &otos
, &itos
);
715 ip_ecn_egress(ECN_NOCARE
, &otos
, &itos
);
716 ip6
->ip6_flow
&= ~htonl(0xff << 20);
717 ip6
->ip6_flow
|= htonl((u_int32_t
)itos
<< 20);
719 m
->m_pkthdr
.rcvif
= ifp
;
723 * We need to prepend the address family as
724 * a four byte field. Cons up a dummy header
725 * to pacify bpf. This is safe because bpf
726 * will only read from the mbuf (i.e., it won't
727 * try to free it or keep a pointer a to it).
730 u_int32_t af
= AF_INET6
;
734 m0
.m_data
= (char *)&af
;
739 bpf_mtap(ifp
->if_bpf
, &m0
);
744 * Put the packet to the network layer input queue according to the
745 * specified address family.
746 * See net/if_gif.c for possible issues with packet processing
747 * reorder due to extra queueing.
754 IF_DROP(ifq
); /* update statistics */
762 ifp
->if_ibytes
+= m
->m_pkthdr
.len
;
770 stf_rtrequest(cmd
, rt
, sa
)
777 rt
->rt_rmx
.rmx_mtu
= IPV6_MMTU
;
781 stf_ioctl(ifp
, cmd
, data
)
788 struct sockaddr_in6
*sin6
;
794 ifa
= (struct ifaddr
*)data
;
795 if (ifa
== NULL
|| ifa
->ifa_addr
->sa_family
!= AF_INET6
) {
796 error
= EAFNOSUPPORT
;
799 sin6
= (struct sockaddr_in6
*)ifa
->ifa_addr
;
800 if (IN6_IS_ADDR_6TO4(&sin6
->sin6_addr
)) {
801 ifa
->ifa_rtrequest
= stf_rtrequest
;
802 ifp
->if_flags
|= IFF_UP
;
809 ifr
= (struct ifreq
*)data
;
810 if (ifr
&& ifr
->ifr_addr
.sa_family
== AF_INET6
)
813 error
= EAFNOSUPPORT
;