2  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. 
   4  * @APPLE_LICENSE_HEADER_START@ 
   6  * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved. 
   8  * This file contains Original Code and/or Modifications of Original Code 
   9  * as defined in and that are subject to the Apple Public Source License 
  10  * Version 2.0 (the 'License'). You may not use this file except in 
  11  * compliance with the License. Please obtain a copy of the License at 
  12  * http://www.opensource.apple.com/apsl/ and read it before using this 
  15  * The Original Code and all software distributed under the License are 
  16  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  17  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  18  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  19  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  20  * Please see the License for the specific language governing rights and 
  21  * limitations under the License. 
  23  * @APPLE_LICENSE_HEADER_END@ 
  26  * Copyright (c) 1982, 1986, 1988, 1993 
  27  *      The Regents of the University of California.  All rights reserved. 
  29  * Redistribution and use in source and binary forms, with or without 
  30  * modification, are permitted provided that the following conditions 
  32  * 1. Redistributions of source code must retain the above copyright 
  33  *    notice, this list of conditions and the following disclaimer. 
  34  * 2. Redistributions in binary form must reproduce the above copyright 
  35  *    notice, this list of conditions and the following disclaimer in the 
  36  *    documentation and/or other materials provided with the distribution. 
  37  * 3. All advertising materials mentioning features or use of this software 
  38  *    must display the following acknowledgement: 
  39  *      This product includes software developed by the University of 
  40  *      California, Berkeley and its contributors. 
  41  * 4. Neither the name of the University nor the names of its contributors 
  42  *    may be used to endorse or promote products derived from this software 
  43  *    without specific prior written permission. 
  45  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 
  46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  48  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 
  49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
  50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
  51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
  52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
  53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
  54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
  57  *      @(#)raw_ip.c    8.7 (Berkeley) 5/15/95 
  60 #include <sys/param.h> 
  61 #include <sys/systm.h> 
  62 #include <sys/kernel.h> 
  63 #include <sys/malloc.h> 
  66 #include <sys/protosw.h> 
  67 #include <sys/socket.h> 
  68 #include <sys/socketvar.h> 
  69 #include <sys/sysctl.h> 
  72 #include <vm/vm_zone.h> 
  76 #include <net/route.h> 
  79 #include <netinet/in.h> 
  80 #include <netinet/in_systm.h> 
  81 #include <netinet/ip.h> 
  82 #include <netinet/in_pcb.h> 
  83 #include <netinet/in_var.h> 
  84 #include <netinet/ip_var.h> 
  85 #include <netinet/ip_mroute.h> 
  87 #include <netinet/ip_fw.h> 
  90 #include <netinet6/ipsec.h> 
  94 #include <netinet/ip_dummynet.h> 
  98 extern int ipsec_bypass
; 
 101 struct  inpcbhead ripcb
; 
 102 struct  inpcbinfo ripcbinfo
; 
 105  * Nominal space allocated to a raw ip socket. 
 111  * Raw interface to IP protocol. 
 115  * Initialize raw connection block q. 
 121         ripcbinfo
.listhead 
= &ripcb
; 
 123          * XXX We don't use the hash list for raw IP, but it's easier 
 124          * to allocate a one entry hash list than it is to check all 
 125          * over the place for hashbase == NULL. 
 127         ripcbinfo
.hashbase 
= hashinit(1, M_PCB
, &ripcbinfo
.hashmask
); 
 128         ripcbinfo
.porthashbase 
= hashinit(1, M_PCB
, &ripcbinfo
.porthashmask
); 
 130         ripcbinfo
.ipi_zone 
= (void *) zinit(sizeof(struct inpcb
), 
 131                                             (4096 * sizeof(struct inpcb
)),  
 136 static struct   sockaddr_in ripsrc 
= { sizeof(ripsrc
), AF_INET 
}; 
 138  * Setup generic address and protocol structures 
 139  * for raw_input routine, then pass them along with 
 147         register struct ip 
*ip 
= mtod(m
, struct ip 
*); 
 148         register struct inpcb 
*inp
; 
 149         struct inpcb 
*last 
= 0; 
 150         struct mbuf 
*opts 
= 0; 
 152         ripsrc
.sin_addr 
= ip
->ip_src
; 
 153         LIST_FOREACH(inp
, &ripcb
, inp_list
) { 
 155                 if ((inp
->inp_vflag 
& INP_IPV4
) == 0) 
 158                 if (inp
->inp_ip_p 
&& (inp
->inp_ip_p 
!= ip
->ip_p
)) 
 160                 if (inp
->inp_laddr
.s_addr 
&& 
 161                   inp
->inp_laddr
.s_addr 
!= ip
->ip_dst
.s_addr
) 
 163                 if (inp
->inp_faddr
.s_addr 
&& 
 164                   inp
->inp_faddr
.s_addr 
!= ip
->ip_src
.s_addr
) 
 167                         struct mbuf 
*n 
= m_copy(m
, 0, (int)M_COPYALL
); 
 170                         /* check AH/ESP integrity. */ 
 171                         if (ipsec_bypass 
== 0 && n 
&& ipsec4_in_reject_so(n
, last
->inp_socket
)) { 
 173                                 ipsecstat
.in_polvio
++; 
 174                                 /* do not inject data to pcb */ 
 178                                 if (last
->inp_flags 
& INP_CONTROLOPTS 
|| 
 179                                     last
->inp_socket
->so_options 
& SO_TIMESTAMP
) 
 180                                     ip_savecontrol(last
, &opts
, ip
, n
); 
 181                                 if (last
->inp_flags 
& INP_STRIPHDR
) { 
 183                                         n
->m_pkthdr
.len 
-= iphlen
; 
 186                                 if (sbappendaddr(&last
->inp_socket
->so_rcv
, 
 187                                     (struct sockaddr 
*)&ripsrc
, n
, 
 189                                         /* should notify about lost packet */ 
 190                                     kprintf("rip_input can't append to socket\n"); 
 195                                         sorwakeup(last
->inp_socket
); 
 202         /* check AH/ESP integrity. */ 
 203         if (ipsec_bypass 
== 0 && last 
&& ipsec4_in_reject_so(m
, last
->inp_socket
)) { 
 205                 ipsecstat
.in_polvio
++; 
 206                 ipstat
.ips_delivered
--; 
 207                 /* do not inject data to pcb */ 
 211                 if (last
->inp_flags 
& INP_CONTROLOPTS 
|| 
 212                     last
->inp_socket
->so_options 
& SO_TIMESTAMP
) 
 213                         ip_savecontrol(last
, &opts
, ip
, m
); 
 214         if (last
->inp_flags 
& INP_STRIPHDR
) { 
 216             m
->m_pkthdr
.len 
-= iphlen
; 
 219                 if (sbappendaddr(&last
->inp_socket
->so_rcv
, 
 220                     (struct sockaddr 
*)&ripsrc
, m
, opts
) == 0) { 
 221                     kprintf("rip_input(2) can't append to socket\n"); 
 226                         sorwakeup(last
->inp_socket
); 
 229                 ipstat
.ips_noproto
++; 
 230                 ipstat
.ips_delivered
--; 
 235  * Generate IP header and pass packet to ip_output. 
 236  * Tack on options user may have setup with control call. 
 239 rip_output(m
, so
, dst
) 
 240         register struct mbuf 
*m
; 
 244         register struct ip 
*ip
; 
 245         register struct inpcb 
*inp 
= sotoinpcb(so
); 
 246         int flags 
= (so
->so_options 
& SO_DONTROUTE
) | IP_ALLOWBROADCAST
; 
 249          * If the user handed us a complete IP packet, use it. 
 250          * Otherwise, allocate an mbuf for a header and fill it in. 
 252         if ((inp
->inp_flags 
& INP_HDRINCL
) == 0) { 
 253                 if (m
->m_pkthdr
.len 
+ sizeof(struct ip
) > IP_MAXPACKET
) { 
 257                 M_PREPEND(m
, sizeof(struct ip
), M_WAIT
); 
 258                 ip 
= mtod(m
, struct ip 
*); 
 259                 ip
->ip_tos 
= inp
->inp_ip_tos
; 
 261                 ip
->ip_p 
= inp
->inp_ip_p
; 
 262                 ip
->ip_len 
= m
->m_pkthdr
.len
; 
 263                 ip
->ip_src 
= inp
->inp_laddr
; 
 264                 ip
->ip_dst
.s_addr 
= dst
; 
 265                 ip
->ip_ttl 
= inp
->inp_ip_ttl
; 
 267                 if (m
->m_pkthdr
.len 
> IP_MAXPACKET
) { 
 271                 ip 
= mtod(m
, struct ip 
*); 
 272                 /* don't allow both user specified and setsockopt options, 
 273                    and don't allow packet length sizes that will crash */ 
 274                 if (((IP_VHL_HL(ip
->ip_vhl
) != (sizeof (*ip
) >> 2)) 
 276                     || (ip
->ip_len 
> m
->m_pkthdr
.len
) 
 277                     || (ip
->ip_len 
< (IP_VHL_HL(ip
->ip_vhl
) << 2))) { 
 283                         ip
->ip_id 
= ip_randomid(); 
 285                         ip
->ip_id 
= htons(ip_id
++); 
 287                 /* XXX prevent ip_output from overwriting header fields */ 
 288                 flags 
|= IP_RAWOUTPUT
; 
 293         if (ipsec_bypass 
== 0 && ipsec_setsocket(m
, so
) != 0) { 
 299         return (ip_output(m
, inp
->inp_options
, &inp
->inp_route
, flags
, 
 304  * Raw IP socket option processing. 
 307 rip_ctloutput(so
, sopt
) 
 309         struct sockopt 
*sopt
; 
 311         struct  inpcb 
*inp 
= sotoinpcb(so
); 
 314         if (sopt
->sopt_level 
!= IPPROTO_IP
) 
 319         switch (sopt
->sopt_dir
) { 
 321                 switch (sopt
->sopt_name
) { 
 323                         optval 
= inp
->inp_flags 
& INP_HDRINCL
; 
 324                         error 
= sooptcopyout(sopt
, &optval
, sizeof optval
); 
 328             optval 
= inp
->inp_flags 
& INP_STRIPHDR
; 
 329             error 
= sooptcopyout(sopt
, &optval
, sizeof optval
); 
 336                         if (ip_fw_ctl_ptr 
== 0) 
 339                                 error 
= ip_fw_ctl_ptr(sopt
); 
 343                 case IP_DUMMYNET_GET
: 
 344                         if (ip_dn_ctl_ptr 
== NULL
) 
 345                                 error 
= ENOPROTOOPT 
; 
 347                                 error 
= ip_dn_ctl_ptr(sopt
); 
 349 #endif /* DUMMYNET */ 
 359                         error 
= ip_mrouter_get(so
, sopt
); 
 363                         error 
= ip_ctloutput(so
, sopt
); 
 369                 switch (sopt
->sopt_name
) { 
 371                         error 
= sooptcopyin(sopt
, &optval
, sizeof optval
, 
 376                                 inp
->inp_flags 
|= INP_HDRINCL
; 
 378                                 inp
->inp_flags 
&= ~INP_HDRINCL
; 
 382             error 
= sooptcopyin(sopt
, &optval
, sizeof optval
, 
 387                 inp
->inp_flags 
|= INP_STRIPHDR
; 
 389                 inp
->inp_flags 
&= ~INP_STRIPHDR
; 
 400                 case IP_OLD_FW_FLUSH
: 
 402                 case IP_OLD_FW_RESETLOG
: 
 403                         if (ip_fw_ctl_ptr 
== 0) 
 406                                 error 
= ip_fw_ctl_ptr(sopt
); 
 410                 case IP_DUMMYNET_CONFIGURE
: 
 411                 case IP_DUMMYNET_DEL
: 
 412                 case IP_DUMMYNET_FLUSH
: 
 413                         if (ip_dn_ctl_ptr 
== NULL
) 
 414                                 error 
= ENOPROTOOPT 
; 
 416                                 error 
= ip_dn_ctl_ptr(sopt
); 
 421                         error 
= ip_rsvp_init(so
); 
 425                         error 
= ip_rsvp_done(); 
 428                         /* XXX - should be combined */ 
 430                         error 
= ip_rsvp_vif_init(so
, sopt
); 
 433                 case IP_RSVP_VIF_OFF
: 
 434                         error 
= ip_rsvp_vif_done(so
, sopt
); 
 445                         error 
= ip_mrouter_set(so
, sopt
); 
 449                         error 
= ip_ctloutput(so
, sopt
); 
 459  * This function exists solely to receive the PRC_IFDOWN messages which 
 460  * are sent by if_down().  It looks for an ifaddr whose ifa_addr is sa, 
 461  * and calls in_ifadown() to remove all routes corresponding to that address. 
 462  * It also receives the PRC_IFUP messages from if_up() and reinstalls the 
 466 rip_ctlinput(cmd
, sa
, vip
) 
 471         struct in_ifaddr 
*ia
; 
 478                 for (ia 
= in_ifaddrhead
.tqh_first
; ia
; 
 479                      ia 
= ia
->ia_link
.tqe_next
) { 
 480                         if (ia
->ia_ifa
.ifa_addr 
== sa
 
 481                             && (ia
->ia_flags 
& IFA_ROUTE
)) { 
 483                                  * in_ifscrub kills the interface route. 
 485                                 in_ifscrub(ia
->ia_ifp
, ia
); 
 487                                  * in_ifadown gets rid of all the rest of 
 488                                  * the routes.  This is not quite the right 
 489                                  * thing to do, but at least if we are running 
 490                                  * a routing process they will come back. 
 492                                 in_ifadown(&ia
->ia_ifa
, 1); 
 499                 for (ia 
= in_ifaddrhead
.tqh_first
; ia
; 
 500                      ia 
= ia
->ia_link
.tqe_next
) { 
 501                         if (ia
->ia_ifa
.ifa_addr 
== sa
) 
 504                 if (ia 
== 0 || (ia
->ia_flags 
& IFA_ROUTE
)) 
 507                 ifp 
= ia
->ia_ifa
.ifa_ifp
; 
 509                 if ((ifp
->if_flags 
& IFF_LOOPBACK
) 
 510                     || (ifp
->if_flags 
& IFF_POINTOPOINT
)) 
 513                 err 
= rtinit(&ia
->ia_ifa
, RTM_ADD
, flags
); 
 515                         ia
->ia_flags 
|= IFA_ROUTE
; 
 520 u_long  rip_sendspace 
= RIPSNDQ
; 
 521 u_long  rip_recvspace 
= RIPRCVQ
; 
 523 SYSCTL_INT(_net_inet_raw
, OID_AUTO
, maxdgram
, CTLFLAG_RW
, 
 524     &rip_sendspace
, 0, "Maximum outgoing raw IP datagram size"); 
 525 SYSCTL_INT(_net_inet_raw
, OID_AUTO
, recvspace
, CTLFLAG_RW
, 
 526     &rip_recvspace
, 0, "Maximum incoming raw IP datagram size"); 
 529 rip_attach(struct socket 
*so
, int proto
, struct proc 
*p
) 
 538         if ((so
->so_state 
& SS_PRIV
) == 0) 
 541         if (p 
&& (error 
= suser(p
)) != 0) 
 545         error 
= soreserve(so
, rip_sendspace
, rip_recvspace
); 
 549         error 
= in_pcballoc(so
, &ripcbinfo
, p
); 
 553         inp 
= (struct inpcb 
*)so
->so_pcb
; 
 554         inp
->inp_vflag 
|= INP_IPV4
; 
 555         inp
->inp_ip_p 
= proto
; 
 556         inp
->inp_ip_ttl 
= ip_defttl
; 
 560 __private_extern__ 
int 
 561 rip_detach(struct socket 
*so
) 
 568         if (so 
== ip_mrouter
) 
 570         ip_rsvp_force_done(so
); 
 577 __private_extern__ 
int 
 578 rip_abort(struct socket 
*so
) 
 580         soisdisconnected(so
); 
 581         return rip_detach(so
); 
 584 __private_extern__ 
int 
 585 rip_disconnect(struct socket 
*so
) 
 587         if ((so
->so_state 
& SS_ISCONNECTED
) == 0) 
 589         return rip_abort(so
); 
 592 __private_extern__ 
int 
 593 rip_bind(struct socket 
*so
, struct sockaddr 
*nam
, struct proc 
*p
) 
 595         struct inpcb 
*inp 
= sotoinpcb(so
); 
 596         struct sockaddr_in 
*addr 
= (struct sockaddr_in 
*)nam
; 
 598         if (nam
->sa_len 
!= sizeof(*addr
)) 
 601         if (TAILQ_EMPTY(&ifnet
) || ((addr
->sin_family 
!= AF_INET
) && 
 602                                     (addr
->sin_family 
!= AF_IMPLINK
)) || 
 603             (addr
->sin_addr
.s_addr 
&& 
 604              ifa_ifwithaddr((struct sockaddr 
*)addr
) == 0)) 
 605                 return EADDRNOTAVAIL
; 
 606         inp
->inp_laddr 
= addr
->sin_addr
; 
 610 __private_extern__ 
int 
 611 rip_connect(struct socket 
*so
, struct sockaddr 
*nam
, struct proc 
*p
) 
 613         struct inpcb 
*inp 
= sotoinpcb(so
); 
 614         struct sockaddr_in 
*addr 
= (struct sockaddr_in 
*)nam
; 
 616         if (nam
->sa_len 
!= sizeof(*addr
)) 
 618         if (TAILQ_EMPTY(&ifnet
)) 
 619                 return EADDRNOTAVAIL
; 
 620         if ((addr
->sin_family 
!= AF_INET
) && 
 621             (addr
->sin_family 
!= AF_IMPLINK
)) 
 623         inp
->inp_faddr 
= addr
->sin_addr
; 
 628 __private_extern__ 
int 
 629 rip_shutdown(struct socket 
*so
) 
 635 __private_extern__ 
int 
 636 rip_send(struct socket 
*so
, int flags
, struct mbuf 
*m
, struct sockaddr 
*nam
, 
 637          struct mbuf 
*control
, struct proc 
*p
) 
 639         struct inpcb 
*inp 
= sotoinpcb(so
); 
 642         if (so
->so_state 
& SS_ISCONNECTED
) { 
 647                 dst 
= inp
->inp_faddr
.s_addr
; 
 653                 dst 
= ((struct sockaddr_in 
*)nam
)->sin_addr
.s_addr
; 
 655         return rip_output(m
, so
, dst
); 
 659 rip_pcblist SYSCTL_HANDLER_ARGS
 
 662         struct inpcb 
*inp
, **inp_list
; 
 667          * The process of preparing the TCB list is too time-consuming and 
 668          * resource-intensive to repeat twice on every request. 
 670         if (req
->oldptr 
== 0) { 
 671                 n 
= ripcbinfo
.ipi_count
; 
 672                 req
->oldidx 
= 2 * (sizeof xig
) 
 673                         + (n 
+ n
/8) * sizeof(struct xinpcb
); 
 677         if (req
->newptr 
!= 0) 
 681          * OK, now we're committed to doing something. 
 684         gencnt 
= ripcbinfo
.ipi_gencnt
; 
 685         n 
= ripcbinfo
.ipi_count
; 
 688         xig
.xig_len 
= sizeof xig
; 
 690         xig
.xig_gen 
= gencnt
; 
 691         xig
.xig_sogen 
= so_gencnt
; 
 692         error 
= SYSCTL_OUT(req
, &xig
, sizeof xig
); 
 696      * We are done if there is no pcb 
 701         inp_list 
= _MALLOC(n 
* sizeof *inp_list
, M_TEMP
, M_WAITOK
); 
 706         for (inp 
= ripcbinfo
.listhead
->lh_first
, i 
= 0; inp 
&& i 
< n
; 
 707              inp 
= inp
->inp_list
.le_next
) { 
 708                 if (inp
->inp_gencnt 
<= gencnt
) 
 715         for (i 
= 0; i 
< n
; i
++) { 
 717                 if (inp
->inp_gencnt 
<= gencnt
) { 
 719                         xi
.xi_len 
= sizeof xi
; 
 720                         /* XXX should avoid extra copy */ 
 721                         bcopy(inp
, &xi
.xi_inp
, sizeof *inp
); 
 723                                 sotoxsocket(inp
->inp_socket
, &xi
.xi_socket
); 
 724                         error 
= SYSCTL_OUT(req
, &xi
, sizeof xi
); 
 729                  * Give the user an updated idea of our state. 
 730                  * If the generation differs from what we told 
 731                  * her before, she knows that something happened 
 732                  * while we were processing this request, and it 
 733                  * might be necessary to retry. 
 736                 xig
.xig_gen 
= ripcbinfo
.ipi_gencnt
; 
 737                 xig
.xig_sogen 
= so_gencnt
; 
 738                 xig
.xig_count 
= ripcbinfo
.ipi_count
; 
 740                 error 
= SYSCTL_OUT(req
, &xig
, sizeof xig
); 
 742         FREE(inp_list
, M_TEMP
); 
 746 SYSCTL_PROC(_net_inet_raw
, OID_AUTO
/*XXX*/, pcblist
, CTLFLAG_RD
, 0, 0, 
 747             rip_pcblist
, "S,xinpcb", "List of active raw IP sockets"); 
 749 struct pr_usrreqs rip_usrreqs 
= { 
 750         rip_abort
, pru_accept_notsupp
, rip_attach
, rip_bind
, rip_connect
, 
 751         pru_connect2_notsupp
, in_control
, rip_detach
, rip_disconnect
, 
 752         pru_listen_notsupp
, in_setpeeraddr
, pru_rcvd_notsupp
, 
 753         pru_rcvoob_notsupp
, rip_send
, pru_sense_null
, rip_shutdown
, 
 754         in_setsockaddr
, sosend
, soreceive
, sopoll