]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/ip_flow.c
xnu-792.10.96.tar.gz
[apple/xnu.git] / bsd / netinet / ip_flow.c
CommitLineData
1c79356b
A
1/*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
37839358
A
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
1c79356b 11 *
37839358
A
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
37839358
A
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
1c79356b
A
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*-
23 * Copyright (c) 1998 The NetBSD Foundation, Inc.
24 * All rights reserved.
25 *
26 * This code is derived from software contributed to The NetBSD Foundation
27 * by the 3am Software Foundry ("3am"). It was developed by Matt Thomas.
28 *
29 * Redistribution and use in source and binary forms, with or without
30 * modification, are permitted provided that the following conditions
31 * are met:
32 * 1. Redistributions of source code must retain the above copyright
33 * notice, this list of conditions and the following disclaimer.
34 * 2. Redistributions in binary form must reproduce the above copyright
35 * notice, this list of conditions and the following disclaimer in the
36 * documentation and/or other materials provided with the distribution.
37 * 3. All advertising materials mentioning features or use of this software
38 * must display the following acknowledgement:
39 * This product includes software developed by the NetBSD
40 * Foundation, Inc. and its contributors.
41 * 4. Neither the name of The NetBSD Foundation nor the names of its
42 * contributors may be used to endorse or promote products derived
43 * from this software without specific prior written permission.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
46 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
47 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
49 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
50 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
51 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
52 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
53 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
54 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
55 * POSSIBILITY OF SUCH DAMAGE.
56 *
9bccf70c 57 * $FreeBSD: src/sys/netinet/ip_flow.c,v 1.9.2.1 2001/08/08 08:20:35 ru Exp $
1c79356b
A
58 */
59
60#include <sys/param.h>
61#include <sys/systm.h>
62#include <sys/malloc.h>
63#include <sys/mbuf.h>
64#include <sys/protosw.h>
65#include <sys/socket.h>
66#include <sys/kernel.h>
67
68#include <sys/sysctl.h>
69
70#include <net/if.h>
71#include <net/route.h>
72
73#include <netinet/in.h>
74#include <netinet/in_systm.h>
75#include <netinet/ip.h>
76#include <netinet/in_var.h>
77#include <netinet/ip_var.h>
78#include <netinet/ip_flow.h>
79#include <net/dlil.h>
80
81#define IPFLOW_TIMER (5 * PR_SLOWHZ)
82#define IPFLOW_HASHBITS 6 /* should not be a multiple of 8 */
83#define IPFLOW_HASHSIZE (1 << IPFLOW_HASHBITS)
84static LIST_HEAD(ipflowhead, ipflow) ipflows[IPFLOW_HASHSIZE];
85static int ipflow_inuse;
86#define IPFLOW_MAX 256
87
9bccf70c 88#ifdef __APPLE__
1c79356b
A
89#define M_IPFLOW M_TEMP
90#endif
91
92static int ipflow_active = 0;
93SYSCTL_INT(_net_inet_ip, IPCTL_FASTFORWARDING, fastforwarding, CTLFLAG_RW,
9bccf70c 94 &ipflow_active, 0, "Enable flow-based IP forwarding");
1c79356b 95
9bccf70c
A
96#ifndef __APPLE__
97static MALLOC_DEFINE(M_IPFLOW, "ip_flow", "IP flow");
98#endif
1c79356b
A
99
100static unsigned
101ipflow_hash(
102 struct in_addr dst,
103 struct in_addr src,
104 unsigned tos)
105{
106 unsigned hash = tos;
107 int idx;
108 for (idx = 0; idx < 32; idx += IPFLOW_HASHBITS)
109 hash += (dst.s_addr >> (32 - idx)) + (src.s_addr >> idx);
110 return hash & (IPFLOW_HASHSIZE-1);
111}
112
113static struct ipflow *
114ipflow_lookup(
115 const struct ip *ip)
116{
117 unsigned hash;
118 struct ipflow *ipf;
119
120 hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos);
121
122 ipf = LIST_FIRST(&ipflows[hash]);
123 while (ipf != NULL) {
124 if (ip->ip_dst.s_addr == ipf->ipf_dst.s_addr
125 && ip->ip_src.s_addr == ipf->ipf_src.s_addr
126 && ip->ip_tos == ipf->ipf_tos)
127 break;
128 ipf = LIST_NEXT(ipf, ipf_next);
129 }
130 return ipf;
131}
132
133int
134ipflow_fastforward(
135 struct mbuf *m)
136{
137 struct ip *ip;
138 struct ipflow *ipf;
139 struct rtentry *rt;
9bccf70c 140 struct sockaddr *dst;
1c79356b
A
141 int error;
142
143 /*
144 * Are we forwarding packets? Big enough for an IP packet?
145 */
146 if (!ipforwarding || !ipflow_active || m->m_len < sizeof(struct ip))
147 return 0;
148 /*
149 * IP header with no option and valid version and length
150 */
151 ip = mtod(m, struct ip *);
152 if (ip->ip_v != IPVERSION || ip->ip_hl != (sizeof(struct ip) >> 2)
153 || ntohs(ip->ip_len) > m->m_pkthdr.len)
154 return 0;
155 /*
156 * Find a flow.
157 */
158 if ((ipf = ipflow_lookup(ip)) == NULL)
159 return 0;
160
161 /*
162 * Route and interface still up?
163 */
164 rt = ipf->ipf_ro.ro_rt;
165 if ((rt->rt_flags & RTF_UP) == 0 || (rt->rt_ifp->if_flags & IFF_UP) == 0)
166 return 0;
167
168 /*
169 * Packet size OK? TTL?
170 */
171 if (m->m_pkthdr.len > rt->rt_ifp->if_mtu || ip->ip_ttl <= IPTTLDEC)
172 return 0;
173
174 /*
175 * Everything checks out and so we can forward this packet.
176 * Modify the TTL and incrementally change the checksum.
177 */
178 ip->ip_ttl -= IPTTLDEC;
179 if (ip->ip_sum >= htons(0xffff - (IPTTLDEC << 8))) {
180 ip->ip_sum += htons(IPTTLDEC << 8) + 1;
181 } else {
182 ip->ip_sum += htons(IPTTLDEC << 8);
183 }
184
185 /*
186 * Send the packet on its way. All we can get back is ENOBUFS
187 */
188 ipf->ipf_uses++;
189 ipf->ipf_timer = IPFLOW_TIMER;
190
9bccf70c
A
191 if (rt->rt_flags & RTF_GATEWAY)
192 dst = rt->rt_gateway;
193 else
194 dst = &ipf->ipf_ro.ro_dst;
195#ifdef __APPLE__
1c79356b 196 /* Not sure the rt_dlt is valid here !! XXX */
91447636 197 if ((error = dlil_output(rt->rt_ifp, PF_INET, m, (caddr_t) rt, dst, 0)) != 0) {
9bccf70c
A
198
199#else
200 if ((error = (*rt->rt_ifp->if_output)(rt->rt_ifp, m, dst, rt)) != 0) {
201#endif
1c79356b
A
202 if (error == ENOBUFS)
203 ipf->ipf_dropped++;
204 else
205 ipf->ipf_errors++;
206 }
207 return 1;
208}
209\f
210static void
211ipflow_addstats(
212 struct ipflow *ipf)
213{
214 ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses;
215 ipstat.ips_cantforward += ipf->ipf_errors + ipf->ipf_dropped;
216 ipstat.ips_forward += ipf->ipf_uses;
217 ipstat.ips_fastforward += ipf->ipf_uses;
218}
219
220static void
221ipflow_free(
222 struct ipflow *ipf)
223{
224 int s;
225 /*
226 * Remove the flow from the hash table (at elevated IPL).
227 * Once it's off the list, we can deal with it at normal
228 * network IPL.
229 */
230 s = splimp();
231 LIST_REMOVE(ipf, ipf_next);
232 splx(s);
233 ipflow_addstats(ipf);
9bccf70c 234 rtfree(ipf->ipf_ro.ro_rt);
1c79356b
A
235 ipflow_inuse--;
236 FREE(ipf, M_IPFLOW);
237}
238
239static struct ipflow *
240ipflow_reap(
241 void)
242{
243 struct ipflow *ipf, *maybe_ipf = NULL;
244 int idx;
245 int s;
246
247 for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) {
248 ipf = LIST_FIRST(&ipflows[idx]);
249 while (ipf != NULL) {
250 /*
251 * If this no longer points to a valid route
252 * reclaim it.
253 */
254 if ((ipf->ipf_ro.ro_rt->rt_flags & RTF_UP) == 0)
255 goto done;
256 /*
257 * choose the one that's been least recently used
258 * or has had the least uses in the last 1.5
259 * intervals.
260 */
261 if (maybe_ipf == NULL
262 || ipf->ipf_timer < maybe_ipf->ipf_timer
263 || (ipf->ipf_timer == maybe_ipf->ipf_timer
264 && ipf->ipf_last_uses + ipf->ipf_uses <
265 maybe_ipf->ipf_last_uses +
266 maybe_ipf->ipf_uses))
267 maybe_ipf = ipf;
268 ipf = LIST_NEXT(ipf, ipf_next);
269 }
270 }
271 ipf = maybe_ipf;
272 done:
273 /*
274 * Remove the entry from the flow table.
275 */
276 s = splimp();
277 LIST_REMOVE(ipf, ipf_next);
278 splx(s);
279 ipflow_addstats(ipf);
9bccf70c 280 rtfree(ipf->ipf_ro.ro_rt);
1c79356b
A
281 return ipf;
282}
283
284void
285ipflow_slowtimo(
286 void)
287{
288 struct ipflow *ipf;
289 int idx;
290
291 for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) {
292 ipf = LIST_FIRST(&ipflows[idx]);
293 while (ipf != NULL) {
294 struct ipflow *next_ipf = LIST_NEXT(ipf, ipf_next);
295 if (--ipf->ipf_timer == 0) {
296 ipflow_free(ipf);
297 } else {
298 ipf->ipf_last_uses = ipf->ipf_uses;
299 ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses;
300 ipstat.ips_forward += ipf->ipf_uses;
301 ipstat.ips_fastforward += ipf->ipf_uses;
302 ipf->ipf_uses = 0;
303 }
304 ipf = next_ipf;
305 }
306 }
307}
308
309void
310ipflow_create(
311 const struct route *ro,
312 struct mbuf *m)
313{
314 const struct ip *const ip = mtod(m, struct ip *);
315 struct ipflow *ipf;
316 unsigned hash;
317 int s;
318
319 /*
320 * Don't create cache entries for ICMP messages.
321 */
322 if (!ipflow_active || ip->ip_p == IPPROTO_ICMP)
323 return;
324 /*
325 * See if an existing flow struct exists. If so remove it from it's
326 * list and free the old route. If not, try to malloc a new one
327 * (if we aren't at our limit).
328 */
329 ipf = ipflow_lookup(ip);
330 if (ipf == NULL) {
331 if (ipflow_inuse == IPFLOW_MAX) {
332 ipf = ipflow_reap();
333 } else {
334 ipf = (struct ipflow *) _MALLOC(sizeof(*ipf), M_IPFLOW,
335 M_NOWAIT);
336 if (ipf == NULL)
337 return;
338 ipflow_inuse++;
339 }
340 bzero((caddr_t) ipf, sizeof(*ipf));
341 } else {
342 s = splimp();
343 LIST_REMOVE(ipf, ipf_next);
344 splx(s);
345 ipflow_addstats(ipf);
9bccf70c 346 rtfree(ipf->ipf_ro.ro_rt);
1c79356b
A
347 ipf->ipf_uses = ipf->ipf_last_uses = 0;
348 ipf->ipf_errors = ipf->ipf_dropped = 0;
349 }
350
351 /*
352 * Fill in the updated information.
353 */
354 ipf->ipf_ro = *ro;
91447636 355 rtref(ro->ro_rt); //### LD 5/25/04 needs rt_mtx lock
1c79356b
A
356 ipf->ipf_dst = ip->ip_dst;
357 ipf->ipf_src = ip->ip_src;
358 ipf->ipf_tos = ip->ip_tos;
359 ipf->ipf_timer = IPFLOW_TIMER;
360 /*
361 * Insert into the approriate bucket of the flow table.
362 */
363 hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos);
364 s = splimp();
365 LIST_INSERT_HEAD(&ipflows[hash], ipf, ipf_next);
366 splx(s);
367}