]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. | |
7 | * | |
8 | * This file contains Original Code and/or Modifications of Original Code | |
9 | * as defined in and that are subject to the Apple Public Source License | |
10 | * Version 2.0 (the 'License'). You may not use this file except in | |
11 | * compliance with the License. Please obtain a copy of the License at | |
12 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
13 | * file. | |
14 | * | |
15 | * The Original Code and all software distributed under the License are | |
16 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
17 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
18 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
19 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
20 | * Please see the License for the specific language governing rights and | |
21 | * limitations under the License. | |
22 | * | |
23 | * @APPLE_LICENSE_HEADER_END@ | |
24 | */ | |
25 | /*- | |
26 | * Copyright (c) 1998 The NetBSD Foundation, Inc. | |
27 | * All rights reserved. | |
28 | * | |
29 | * This code is derived from software contributed to The NetBSD Foundation | |
30 | * by the 3am Software Foundry ("3am"). It was developed by Matt Thomas. | |
31 | * | |
32 | * Redistribution and use in source and binary forms, with or without | |
33 | * modification, are permitted provided that the following conditions | |
34 | * are met: | |
35 | * 1. Redistributions of source code must retain the above copyright | |
36 | * notice, this list of conditions and the following disclaimer. | |
37 | * 2. Redistributions in binary form must reproduce the above copyright | |
38 | * notice, this list of conditions and the following disclaimer in the | |
39 | * documentation and/or other materials provided with the distribution. | |
40 | * 3. All advertising materials mentioning features or use of this software | |
41 | * must display the following acknowledgement: | |
42 | * This product includes software developed by the NetBSD | |
43 | * Foundation, Inc. and its contributors. | |
44 | * 4. Neither the name of The NetBSD Foundation nor the names of its | |
45 | * contributors may be used to endorse or promote products derived | |
46 | * from this software without specific prior written permission. | |
47 | * | |
48 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |
49 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |
50 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
51 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |
52 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
53 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
54 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
55 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
56 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
57 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
58 | * POSSIBILITY OF SUCH DAMAGE. | |
59 | * | |
60 | * $FreeBSD: src/sys/netinet/ip_flow.c,v 1.9.2.1 2001/08/08 08:20:35 ru Exp $ | |
61 | */ | |
62 | ||
63 | #include <sys/param.h> | |
64 | #include <sys/systm.h> | |
65 | #include <sys/malloc.h> | |
66 | #include <sys/mbuf.h> | |
67 | #include <sys/protosw.h> | |
68 | #include <sys/socket.h> | |
69 | #include <sys/kernel.h> | |
70 | ||
71 | #include <sys/sysctl.h> | |
72 | ||
73 | #include <net/if.h> | |
74 | #include <net/route.h> | |
75 | ||
76 | #include <netinet/in.h> | |
77 | #include <netinet/in_systm.h> | |
78 | #include <netinet/ip.h> | |
79 | #include <netinet/in_var.h> | |
80 | #include <netinet/ip_var.h> | |
81 | #include <netinet/ip_flow.h> | |
82 | #include <net/dlil.h> | |
83 | ||
84 | #define IPFLOW_TIMER (5 * PR_SLOWHZ) | |
85 | #define IPFLOW_HASHBITS 6 /* should not be a multiple of 8 */ | |
86 | #define IPFLOW_HASHSIZE (1 << IPFLOW_HASHBITS) | |
87 | static LIST_HEAD(ipflowhead, ipflow) ipflows[IPFLOW_HASHSIZE]; | |
88 | static int ipflow_inuse; | |
89 | #define IPFLOW_MAX 256 | |
90 | ||
91 | #ifdef __APPLE__ | |
92 | #define M_IPFLOW M_TEMP | |
93 | #endif | |
94 | ||
95 | static int ipflow_active = 0; | |
96 | SYSCTL_INT(_net_inet_ip, IPCTL_FASTFORWARDING, fastforwarding, CTLFLAG_RW, | |
97 | &ipflow_active, 0, "Enable flow-based IP forwarding"); | |
98 | ||
99 | #ifndef __APPLE__ | |
100 | static MALLOC_DEFINE(M_IPFLOW, "ip_flow", "IP flow"); | |
101 | #endif | |
102 | ||
103 | static unsigned | |
104 | ipflow_hash( | |
105 | struct in_addr dst, | |
106 | struct in_addr src, | |
107 | unsigned tos) | |
108 | { | |
109 | unsigned hash = tos; | |
110 | int idx; | |
111 | for (idx = 0; idx < 32; idx += IPFLOW_HASHBITS) | |
112 | hash += (dst.s_addr >> (32 - idx)) + (src.s_addr >> idx); | |
113 | return hash & (IPFLOW_HASHSIZE-1); | |
114 | } | |
115 | ||
116 | static struct ipflow * | |
117 | ipflow_lookup( | |
118 | const struct ip *ip) | |
119 | { | |
120 | unsigned hash; | |
121 | struct ipflow *ipf; | |
122 | ||
123 | hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos); | |
124 | ||
125 | ipf = LIST_FIRST(&ipflows[hash]); | |
126 | while (ipf != NULL) { | |
127 | if (ip->ip_dst.s_addr == ipf->ipf_dst.s_addr | |
128 | && ip->ip_src.s_addr == ipf->ipf_src.s_addr | |
129 | && ip->ip_tos == ipf->ipf_tos) | |
130 | break; | |
131 | ipf = LIST_NEXT(ipf, ipf_next); | |
132 | } | |
133 | return ipf; | |
134 | } | |
135 | ||
136 | int | |
137 | ipflow_fastforward( | |
138 | struct mbuf *m) | |
139 | { | |
140 | struct ip *ip; | |
141 | struct ipflow *ipf; | |
142 | struct rtentry *rt; | |
143 | struct sockaddr *dst; | |
144 | int error; | |
145 | ||
146 | /* | |
147 | * Are we forwarding packets? Big enough for an IP packet? | |
148 | */ | |
149 | if (!ipforwarding || !ipflow_active || m->m_len < sizeof(struct ip)) | |
150 | return 0; | |
151 | /* | |
152 | * IP header with no option and valid version and length | |
153 | */ | |
154 | ip = mtod(m, struct ip *); | |
155 | if (ip->ip_v != IPVERSION || ip->ip_hl != (sizeof(struct ip) >> 2) | |
156 | || ntohs(ip->ip_len) > m->m_pkthdr.len) | |
157 | return 0; | |
158 | /* | |
159 | * Find a flow. | |
160 | */ | |
161 | if ((ipf = ipflow_lookup(ip)) == NULL) | |
162 | return 0; | |
163 | ||
164 | /* | |
165 | * Route and interface still up? | |
166 | */ | |
167 | rt = ipf->ipf_ro.ro_rt; | |
168 | if ((rt->rt_flags & RTF_UP) == 0 || (rt->rt_ifp->if_flags & IFF_UP) == 0) | |
169 | return 0; | |
170 | ||
171 | /* | |
172 | * Packet size OK? TTL? | |
173 | */ | |
174 | if (m->m_pkthdr.len > rt->rt_ifp->if_mtu || ip->ip_ttl <= IPTTLDEC) | |
175 | return 0; | |
176 | ||
177 | /* | |
178 | * Everything checks out and so we can forward this packet. | |
179 | * Modify the TTL and incrementally change the checksum. | |
180 | */ | |
181 | ip->ip_ttl -= IPTTLDEC; | |
182 | if (ip->ip_sum >= htons(0xffff - (IPTTLDEC << 8))) { | |
183 | ip->ip_sum += htons(IPTTLDEC << 8) + 1; | |
184 | } else { | |
185 | ip->ip_sum += htons(IPTTLDEC << 8); | |
186 | } | |
187 | ||
188 | /* | |
189 | * Send the packet on its way. All we can get back is ENOBUFS | |
190 | */ | |
191 | ipf->ipf_uses++; | |
192 | ipf->ipf_timer = IPFLOW_TIMER; | |
193 | ||
194 | if (rt->rt_flags & RTF_GATEWAY) | |
195 | dst = rt->rt_gateway; | |
196 | else | |
197 | dst = &ipf->ipf_ro.ro_dst; | |
198 | #ifdef __APPLE__ | |
199 | /* Not sure the rt_dlt is valid here !! XXX */ | |
200 | if ((error = dlil_output((u_long)rt->rt_dlt, m, (caddr_t) rt, dst, 0)) != 0) { | |
201 | ||
202 | #else | |
203 | if ((error = (*rt->rt_ifp->if_output)(rt->rt_ifp, m, dst, rt)) != 0) { | |
204 | #endif | |
205 | if (error == ENOBUFS) | |
206 | ipf->ipf_dropped++; | |
207 | else | |
208 | ipf->ipf_errors++; | |
209 | } | |
210 | return 1; | |
211 | } | |
212 | \f | |
213 | static void | |
214 | ipflow_addstats( | |
215 | struct ipflow *ipf) | |
216 | { | |
217 | ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses; | |
218 | ipstat.ips_cantforward += ipf->ipf_errors + ipf->ipf_dropped; | |
219 | ipstat.ips_forward += ipf->ipf_uses; | |
220 | ipstat.ips_fastforward += ipf->ipf_uses; | |
221 | } | |
222 | ||
223 | static void | |
224 | ipflow_free( | |
225 | struct ipflow *ipf) | |
226 | { | |
227 | int s; | |
228 | /* | |
229 | * Remove the flow from the hash table (at elevated IPL). | |
230 | * Once it's off the list, we can deal with it at normal | |
231 | * network IPL. | |
232 | */ | |
233 | s = splimp(); | |
234 | LIST_REMOVE(ipf, ipf_next); | |
235 | splx(s); | |
236 | ipflow_addstats(ipf); | |
237 | rtfree(ipf->ipf_ro.ro_rt); | |
238 | ipflow_inuse--; | |
239 | FREE(ipf, M_IPFLOW); | |
240 | } | |
241 | ||
242 | static struct ipflow * | |
243 | ipflow_reap( | |
244 | void) | |
245 | { | |
246 | struct ipflow *ipf, *maybe_ipf = NULL; | |
247 | int idx; | |
248 | int s; | |
249 | ||
250 | for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) { | |
251 | ipf = LIST_FIRST(&ipflows[idx]); | |
252 | while (ipf != NULL) { | |
253 | /* | |
254 | * If this no longer points to a valid route | |
255 | * reclaim it. | |
256 | */ | |
257 | if ((ipf->ipf_ro.ro_rt->rt_flags & RTF_UP) == 0) | |
258 | goto done; | |
259 | /* | |
260 | * choose the one that's been least recently used | |
261 | * or has had the least uses in the last 1.5 | |
262 | * intervals. | |
263 | */ | |
264 | if (maybe_ipf == NULL | |
265 | || ipf->ipf_timer < maybe_ipf->ipf_timer | |
266 | || (ipf->ipf_timer == maybe_ipf->ipf_timer | |
267 | && ipf->ipf_last_uses + ipf->ipf_uses < | |
268 | maybe_ipf->ipf_last_uses + | |
269 | maybe_ipf->ipf_uses)) | |
270 | maybe_ipf = ipf; | |
271 | ipf = LIST_NEXT(ipf, ipf_next); | |
272 | } | |
273 | } | |
274 | ipf = maybe_ipf; | |
275 | done: | |
276 | /* | |
277 | * Remove the entry from the flow table. | |
278 | */ | |
279 | s = splimp(); | |
280 | LIST_REMOVE(ipf, ipf_next); | |
281 | splx(s); | |
282 | ipflow_addstats(ipf); | |
283 | rtfree(ipf->ipf_ro.ro_rt); | |
284 | return ipf; | |
285 | } | |
286 | ||
287 | void | |
288 | ipflow_slowtimo( | |
289 | void) | |
290 | { | |
291 | struct ipflow *ipf; | |
292 | int idx; | |
293 | ||
294 | for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) { | |
295 | ipf = LIST_FIRST(&ipflows[idx]); | |
296 | while (ipf != NULL) { | |
297 | struct ipflow *next_ipf = LIST_NEXT(ipf, ipf_next); | |
298 | if (--ipf->ipf_timer == 0) { | |
299 | ipflow_free(ipf); | |
300 | } else { | |
301 | ipf->ipf_last_uses = ipf->ipf_uses; | |
302 | ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses; | |
303 | ipstat.ips_forward += ipf->ipf_uses; | |
304 | ipstat.ips_fastforward += ipf->ipf_uses; | |
305 | ipf->ipf_uses = 0; | |
306 | } | |
307 | ipf = next_ipf; | |
308 | } | |
309 | } | |
310 | } | |
311 | ||
312 | void | |
313 | ipflow_create( | |
314 | const struct route *ro, | |
315 | struct mbuf *m) | |
316 | { | |
317 | const struct ip *const ip = mtod(m, struct ip *); | |
318 | struct ipflow *ipf; | |
319 | unsigned hash; | |
320 | int s; | |
321 | ||
322 | /* | |
323 | * Don't create cache entries for ICMP messages. | |
324 | */ | |
325 | if (!ipflow_active || ip->ip_p == IPPROTO_ICMP) | |
326 | return; | |
327 | /* | |
328 | * See if an existing flow struct exists. If so remove it from it's | |
329 | * list and free the old route. If not, try to malloc a new one | |
330 | * (if we aren't at our limit). | |
331 | */ | |
332 | ipf = ipflow_lookup(ip); | |
333 | if (ipf == NULL) { | |
334 | if (ipflow_inuse == IPFLOW_MAX) { | |
335 | ipf = ipflow_reap(); | |
336 | } else { | |
337 | ipf = (struct ipflow *) _MALLOC(sizeof(*ipf), M_IPFLOW, | |
338 | M_NOWAIT); | |
339 | if (ipf == NULL) | |
340 | return; | |
341 | ipflow_inuse++; | |
342 | } | |
343 | bzero((caddr_t) ipf, sizeof(*ipf)); | |
344 | } else { | |
345 | s = splimp(); | |
346 | LIST_REMOVE(ipf, ipf_next); | |
347 | splx(s); | |
348 | ipflow_addstats(ipf); | |
349 | rtfree(ipf->ipf_ro.ro_rt); | |
350 | ipf->ipf_uses = ipf->ipf_last_uses = 0; | |
351 | ipf->ipf_errors = ipf->ipf_dropped = 0; | |
352 | } | |
353 | ||
354 | /* | |
355 | * Fill in the updated information. | |
356 | */ | |
357 | ipf->ipf_ro = *ro; | |
358 | rtref(ro->ro_rt); | |
359 | ipf->ipf_dst = ip->ip_dst; | |
360 | ipf->ipf_src = ip->ip_src; | |
361 | ipf->ipf_tos = ip->ip_tos; | |
362 | ipf->ipf_timer = IPFLOW_TIMER; | |
363 | /* | |
364 | * Insert into the approriate bucket of the flow table. | |
365 | */ | |
366 | hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos); | |
367 | s = splimp(); | |
368 | LIST_INSERT_HEAD(&ipflows[hash], ipf, ipf_next); | |
369 | splx(s); | |
370 | } |