]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/ip_flow.c
xnu-344.49.tar.gz
[apple/xnu.git] / bsd / netinet / ip_flow.c
CommitLineData
1c79356b
A
1/*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
43866e37 6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
1c79356b 7 *
43866e37
A
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
43866e37
A
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
1c79356b
A
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25/*-
26 * Copyright (c) 1998 The NetBSD Foundation, Inc.
27 * All rights reserved.
28 *
29 * This code is derived from software contributed to The NetBSD Foundation
30 * by the 3am Software Foundry ("3am"). It was developed by Matt Thomas.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the NetBSD
43 * Foundation, Inc. and its contributors.
44 * 4. Neither the name of The NetBSD Foundation nor the names of its
45 * contributors may be used to endorse or promote products derived
46 * from this software without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
49 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
50 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
51 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
52 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
53 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
54 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
55 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
56 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
57 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
58 * POSSIBILITY OF SUCH DAMAGE.
59 *
9bccf70c 60 * $FreeBSD: src/sys/netinet/ip_flow.c,v 1.9.2.1 2001/08/08 08:20:35 ru Exp $
1c79356b
A
61 */
62
63#include <sys/param.h>
64#include <sys/systm.h>
65#include <sys/malloc.h>
66#include <sys/mbuf.h>
67#include <sys/protosw.h>
68#include <sys/socket.h>
69#include <sys/kernel.h>
70
71#include <sys/sysctl.h>
72
73#include <net/if.h>
74#include <net/route.h>
75
76#include <netinet/in.h>
77#include <netinet/in_systm.h>
78#include <netinet/ip.h>
79#include <netinet/in_var.h>
80#include <netinet/ip_var.h>
81#include <netinet/ip_flow.h>
82#include <net/dlil.h>
83
84#define IPFLOW_TIMER (5 * PR_SLOWHZ)
85#define IPFLOW_HASHBITS 6 /* should not be a multiple of 8 */
86#define IPFLOW_HASHSIZE (1 << IPFLOW_HASHBITS)
87static LIST_HEAD(ipflowhead, ipflow) ipflows[IPFLOW_HASHSIZE];
88static int ipflow_inuse;
89#define IPFLOW_MAX 256
90
9bccf70c 91#ifdef __APPLE__
1c79356b
A
92#define M_IPFLOW M_TEMP
93#endif
94
95static int ipflow_active = 0;
96SYSCTL_INT(_net_inet_ip, IPCTL_FASTFORWARDING, fastforwarding, CTLFLAG_RW,
9bccf70c 97 &ipflow_active, 0, "Enable flow-based IP forwarding");
1c79356b 98
9bccf70c
A
99#ifndef __APPLE__
100static MALLOC_DEFINE(M_IPFLOW, "ip_flow", "IP flow");
101#endif
1c79356b
A
102
103static unsigned
104ipflow_hash(
105 struct in_addr dst,
106 struct in_addr src,
107 unsigned tos)
108{
109 unsigned hash = tos;
110 int idx;
111 for (idx = 0; idx < 32; idx += IPFLOW_HASHBITS)
112 hash += (dst.s_addr >> (32 - idx)) + (src.s_addr >> idx);
113 return hash & (IPFLOW_HASHSIZE-1);
114}
115
116static struct ipflow *
117ipflow_lookup(
118 const struct ip *ip)
119{
120 unsigned hash;
121 struct ipflow *ipf;
122
123 hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos);
124
125 ipf = LIST_FIRST(&ipflows[hash]);
126 while (ipf != NULL) {
127 if (ip->ip_dst.s_addr == ipf->ipf_dst.s_addr
128 && ip->ip_src.s_addr == ipf->ipf_src.s_addr
129 && ip->ip_tos == ipf->ipf_tos)
130 break;
131 ipf = LIST_NEXT(ipf, ipf_next);
132 }
133 return ipf;
134}
135
136int
137ipflow_fastforward(
138 struct mbuf *m)
139{
140 struct ip *ip;
141 struct ipflow *ipf;
142 struct rtentry *rt;
9bccf70c 143 struct sockaddr *dst;
1c79356b
A
144 int error;
145
146 /*
147 * Are we forwarding packets? Big enough for an IP packet?
148 */
149 if (!ipforwarding || !ipflow_active || m->m_len < sizeof(struct ip))
150 return 0;
151 /*
152 * IP header with no option and valid version and length
153 */
154 ip = mtod(m, struct ip *);
155 if (ip->ip_v != IPVERSION || ip->ip_hl != (sizeof(struct ip) >> 2)
156 || ntohs(ip->ip_len) > m->m_pkthdr.len)
157 return 0;
158 /*
159 * Find a flow.
160 */
161 if ((ipf = ipflow_lookup(ip)) == NULL)
162 return 0;
163
164 /*
165 * Route and interface still up?
166 */
167 rt = ipf->ipf_ro.ro_rt;
168 if ((rt->rt_flags & RTF_UP) == 0 || (rt->rt_ifp->if_flags & IFF_UP) == 0)
169 return 0;
170
171 /*
172 * Packet size OK? TTL?
173 */
174 if (m->m_pkthdr.len > rt->rt_ifp->if_mtu || ip->ip_ttl <= IPTTLDEC)
175 return 0;
176
177 /*
178 * Everything checks out and so we can forward this packet.
179 * Modify the TTL and incrementally change the checksum.
180 */
181 ip->ip_ttl -= IPTTLDEC;
182 if (ip->ip_sum >= htons(0xffff - (IPTTLDEC << 8))) {
183 ip->ip_sum += htons(IPTTLDEC << 8) + 1;
184 } else {
185 ip->ip_sum += htons(IPTTLDEC << 8);
186 }
187
188 /*
189 * Send the packet on its way. All we can get back is ENOBUFS
190 */
191 ipf->ipf_uses++;
192 ipf->ipf_timer = IPFLOW_TIMER;
193
9bccf70c
A
194 if (rt->rt_flags & RTF_GATEWAY)
195 dst = rt->rt_gateway;
196 else
197 dst = &ipf->ipf_ro.ro_dst;
198#ifdef __APPLE__
1c79356b 199 /* Not sure the rt_dlt is valid here !! XXX */
9bccf70c
A
200 if ((error = dlil_output((u_long)rt->rt_dlt, m, (caddr_t) rt, dst, 0)) != 0) {
201
202#else
203 if ((error = (*rt->rt_ifp->if_output)(rt->rt_ifp, m, dst, rt)) != 0) {
204#endif
1c79356b
A
205 if (error == ENOBUFS)
206 ipf->ipf_dropped++;
207 else
208 ipf->ipf_errors++;
209 }
210 return 1;
211}
212\f
213static void
214ipflow_addstats(
215 struct ipflow *ipf)
216{
217 ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses;
218 ipstat.ips_cantforward += ipf->ipf_errors + ipf->ipf_dropped;
219 ipstat.ips_forward += ipf->ipf_uses;
220 ipstat.ips_fastforward += ipf->ipf_uses;
221}
222
223static void
224ipflow_free(
225 struct ipflow *ipf)
226{
227 int s;
228 /*
229 * Remove the flow from the hash table (at elevated IPL).
230 * Once it's off the list, we can deal with it at normal
231 * network IPL.
232 */
233 s = splimp();
234 LIST_REMOVE(ipf, ipf_next);
235 splx(s);
236 ipflow_addstats(ipf);
9bccf70c 237 rtfree(ipf->ipf_ro.ro_rt);
1c79356b
A
238 ipflow_inuse--;
239 FREE(ipf, M_IPFLOW);
240}
241
242static struct ipflow *
243ipflow_reap(
244 void)
245{
246 struct ipflow *ipf, *maybe_ipf = NULL;
247 int idx;
248 int s;
249
250 for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) {
251 ipf = LIST_FIRST(&ipflows[idx]);
252 while (ipf != NULL) {
253 /*
254 * If this no longer points to a valid route
255 * reclaim it.
256 */
257 if ((ipf->ipf_ro.ro_rt->rt_flags & RTF_UP) == 0)
258 goto done;
259 /*
260 * choose the one that's been least recently used
261 * or has had the least uses in the last 1.5
262 * intervals.
263 */
264 if (maybe_ipf == NULL
265 || ipf->ipf_timer < maybe_ipf->ipf_timer
266 || (ipf->ipf_timer == maybe_ipf->ipf_timer
267 && ipf->ipf_last_uses + ipf->ipf_uses <
268 maybe_ipf->ipf_last_uses +
269 maybe_ipf->ipf_uses))
270 maybe_ipf = ipf;
271 ipf = LIST_NEXT(ipf, ipf_next);
272 }
273 }
274 ipf = maybe_ipf;
275 done:
276 /*
277 * Remove the entry from the flow table.
278 */
279 s = splimp();
280 LIST_REMOVE(ipf, ipf_next);
281 splx(s);
282 ipflow_addstats(ipf);
9bccf70c 283 rtfree(ipf->ipf_ro.ro_rt);
1c79356b
A
284 return ipf;
285}
286
287void
288ipflow_slowtimo(
289 void)
290{
291 struct ipflow *ipf;
292 int idx;
293
294 for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) {
295 ipf = LIST_FIRST(&ipflows[idx]);
296 while (ipf != NULL) {
297 struct ipflow *next_ipf = LIST_NEXT(ipf, ipf_next);
298 if (--ipf->ipf_timer == 0) {
299 ipflow_free(ipf);
300 } else {
301 ipf->ipf_last_uses = ipf->ipf_uses;
302 ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses;
303 ipstat.ips_forward += ipf->ipf_uses;
304 ipstat.ips_fastforward += ipf->ipf_uses;
305 ipf->ipf_uses = 0;
306 }
307 ipf = next_ipf;
308 }
309 }
310}
311
312void
313ipflow_create(
314 const struct route *ro,
315 struct mbuf *m)
316{
317 const struct ip *const ip = mtod(m, struct ip *);
318 struct ipflow *ipf;
319 unsigned hash;
320 int s;
321
322 /*
323 * Don't create cache entries for ICMP messages.
324 */
325 if (!ipflow_active || ip->ip_p == IPPROTO_ICMP)
326 return;
327 /*
328 * See if an existing flow struct exists. If so remove it from it's
329 * list and free the old route. If not, try to malloc a new one
330 * (if we aren't at our limit).
331 */
332 ipf = ipflow_lookup(ip);
333 if (ipf == NULL) {
334 if (ipflow_inuse == IPFLOW_MAX) {
335 ipf = ipflow_reap();
336 } else {
337 ipf = (struct ipflow *) _MALLOC(sizeof(*ipf), M_IPFLOW,
338 M_NOWAIT);
339 if (ipf == NULL)
340 return;
341 ipflow_inuse++;
342 }
343 bzero((caddr_t) ipf, sizeof(*ipf));
344 } else {
345 s = splimp();
346 LIST_REMOVE(ipf, ipf_next);
347 splx(s);
348 ipflow_addstats(ipf);
9bccf70c 349 rtfree(ipf->ipf_ro.ro_rt);
1c79356b
A
350 ipf->ipf_uses = ipf->ipf_last_uses = 0;
351 ipf->ipf_errors = ipf->ipf_dropped = 0;
352 }
353
354 /*
355 * Fill in the updated information.
356 */
357 ipf->ipf_ro = *ro;
9bccf70c 358 rtref(ro->ro_rt);
1c79356b
A
359 ipf->ipf_dst = ip->ip_dst;
360 ipf->ipf_src = ip->ip_src;
361 ipf->ipf_tos = ip->ip_tos;
362 ipf->ipf_timer = IPFLOW_TIMER;
363 /*
364 * Insert into the approriate bucket of the flow table.
365 */
366 hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos);
367 s = splimp();
368 LIST_INSERT_HEAD(&ipflows[hash], ipf, ipf_next);
369 splx(s);
370}