]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
8ad349bb | 2 | * Copyright (c) 2006 Apple Computer, Inc. All Rights Reserved. |
1c79356b | 3 | * |
8ad349bb | 4 | * @APPLE_LICENSE_OSREFERENCE_HEADER_START@ |
1c79356b | 5 | * |
8ad349bb A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the | |
10 | * License may not be used to create, or enable the creation or | |
11 | * redistribution of, unlawful or unlicensed copies of an Apple operating | |
12 | * system, or to circumvent, violate, or enable the circumvention or | |
13 | * violation of, any terms of an Apple operating system software license | |
14 | * agreement. | |
15 | * | |
16 | * Please obtain a copy of the License at | |
17 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
18 | * file. | |
19 | * | |
20 | * The Original Code and all software distributed under the License are | |
21 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
22 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
23 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
24 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
25 | * Please see the License for the specific language governing rights and | |
26 | * limitations under the License. | |
27 | * | |
28 | * @APPLE_LICENSE_OSREFERENCE_HEADER_END@ | |
1c79356b A |
29 | */ |
30 | /*- | |
31 | * Copyright (c) 1998 The NetBSD Foundation, Inc. | |
32 | * All rights reserved. | |
33 | * | |
34 | * This code is derived from software contributed to The NetBSD Foundation | |
35 | * by the 3am Software Foundry ("3am"). It was developed by Matt Thomas. | |
36 | * | |
37 | * Redistribution and use in source and binary forms, with or without | |
38 | * modification, are permitted provided that the following conditions | |
39 | * are met: | |
40 | * 1. Redistributions of source code must retain the above copyright | |
41 | * notice, this list of conditions and the following disclaimer. | |
42 | * 2. Redistributions in binary form must reproduce the above copyright | |
43 | * notice, this list of conditions and the following disclaimer in the | |
44 | * documentation and/or other materials provided with the distribution. | |
45 | * 3. All advertising materials mentioning features or use of this software | |
46 | * must display the following acknowledgement: | |
47 | * This product includes software developed by the NetBSD | |
48 | * Foundation, Inc. and its contributors. | |
49 | * 4. Neither the name of The NetBSD Foundation nor the names of its | |
50 | * contributors may be used to endorse or promote products derived | |
51 | * from this software without specific prior written permission. | |
52 | * | |
53 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |
54 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |
55 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
56 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |
57 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
58 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
59 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
60 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
61 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
62 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
63 | * POSSIBILITY OF SUCH DAMAGE. | |
64 | * | |
9bccf70c | 65 | * $FreeBSD: src/sys/netinet/ip_flow.c,v 1.9.2.1 2001/08/08 08:20:35 ru Exp $ |
1c79356b A |
66 | */ |
67 | ||
68 | #include <sys/param.h> | |
69 | #include <sys/systm.h> | |
70 | #include <sys/malloc.h> | |
71 | #include <sys/mbuf.h> | |
72 | #include <sys/protosw.h> | |
73 | #include <sys/socket.h> | |
74 | #include <sys/kernel.h> | |
75 | ||
76 | #include <sys/sysctl.h> | |
77 | ||
78 | #include <net/if.h> | |
79 | #include <net/route.h> | |
80 | ||
81 | #include <netinet/in.h> | |
82 | #include <netinet/in_systm.h> | |
83 | #include <netinet/ip.h> | |
84 | #include <netinet/in_var.h> | |
85 | #include <netinet/ip_var.h> | |
86 | #include <netinet/ip_flow.h> | |
87 | #include <net/dlil.h> | |
88 | ||
89 | #define IPFLOW_TIMER (5 * PR_SLOWHZ) | |
90 | #define IPFLOW_HASHBITS 6 /* should not be a multiple of 8 */ | |
91 | #define IPFLOW_HASHSIZE (1 << IPFLOW_HASHBITS) | |
92 | static LIST_HEAD(ipflowhead, ipflow) ipflows[IPFLOW_HASHSIZE]; | |
93 | static int ipflow_inuse; | |
94 | #define IPFLOW_MAX 256 | |
95 | ||
9bccf70c | 96 | #ifdef __APPLE__ |
1c79356b A |
97 | #define M_IPFLOW M_TEMP |
98 | #endif | |
99 | ||
100 | static int ipflow_active = 0; | |
101 | SYSCTL_INT(_net_inet_ip, IPCTL_FASTFORWARDING, fastforwarding, CTLFLAG_RW, | |
9bccf70c | 102 | &ipflow_active, 0, "Enable flow-based IP forwarding"); |
1c79356b | 103 | |
9bccf70c A |
104 | #ifndef __APPLE__ |
105 | static MALLOC_DEFINE(M_IPFLOW, "ip_flow", "IP flow"); | |
106 | #endif | |
1c79356b A |
107 | |
108 | static unsigned | |
109 | ipflow_hash( | |
110 | struct in_addr dst, | |
111 | struct in_addr src, | |
112 | unsigned tos) | |
113 | { | |
114 | unsigned hash = tos; | |
115 | int idx; | |
116 | for (idx = 0; idx < 32; idx += IPFLOW_HASHBITS) | |
117 | hash += (dst.s_addr >> (32 - idx)) + (src.s_addr >> idx); | |
118 | return hash & (IPFLOW_HASHSIZE-1); | |
119 | } | |
120 | ||
121 | static struct ipflow * | |
122 | ipflow_lookup( | |
123 | const struct ip *ip) | |
124 | { | |
125 | unsigned hash; | |
126 | struct ipflow *ipf; | |
127 | ||
128 | hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos); | |
129 | ||
130 | ipf = LIST_FIRST(&ipflows[hash]); | |
131 | while (ipf != NULL) { | |
132 | if (ip->ip_dst.s_addr == ipf->ipf_dst.s_addr | |
133 | && ip->ip_src.s_addr == ipf->ipf_src.s_addr | |
134 | && ip->ip_tos == ipf->ipf_tos) | |
135 | break; | |
136 | ipf = LIST_NEXT(ipf, ipf_next); | |
137 | } | |
138 | return ipf; | |
139 | } | |
140 | ||
141 | int | |
142 | ipflow_fastforward( | |
143 | struct mbuf *m) | |
144 | { | |
145 | struct ip *ip; | |
146 | struct ipflow *ipf; | |
147 | struct rtentry *rt; | |
9bccf70c | 148 | struct sockaddr *dst; |
1c79356b A |
149 | int error; |
150 | ||
151 | /* | |
152 | * Are we forwarding packets? Big enough for an IP packet? | |
153 | */ | |
154 | if (!ipforwarding || !ipflow_active || m->m_len < sizeof(struct ip)) | |
155 | return 0; | |
156 | /* | |
157 | * IP header with no option and valid version and length | |
158 | */ | |
159 | ip = mtod(m, struct ip *); | |
160 | if (ip->ip_v != IPVERSION || ip->ip_hl != (sizeof(struct ip) >> 2) | |
161 | || ntohs(ip->ip_len) > m->m_pkthdr.len) | |
162 | return 0; | |
163 | /* | |
164 | * Find a flow. | |
165 | */ | |
166 | if ((ipf = ipflow_lookup(ip)) == NULL) | |
167 | return 0; | |
168 | ||
169 | /* | |
170 | * Route and interface still up? | |
171 | */ | |
172 | rt = ipf->ipf_ro.ro_rt; | |
173 | if ((rt->rt_flags & RTF_UP) == 0 || (rt->rt_ifp->if_flags & IFF_UP) == 0) | |
174 | return 0; | |
175 | ||
176 | /* | |
177 | * Packet size OK? TTL? | |
178 | */ | |
179 | if (m->m_pkthdr.len > rt->rt_ifp->if_mtu || ip->ip_ttl <= IPTTLDEC) | |
180 | return 0; | |
181 | ||
182 | /* | |
183 | * Everything checks out and so we can forward this packet. | |
184 | * Modify the TTL and incrementally change the checksum. | |
185 | */ | |
186 | ip->ip_ttl -= IPTTLDEC; | |
187 | if (ip->ip_sum >= htons(0xffff - (IPTTLDEC << 8))) { | |
188 | ip->ip_sum += htons(IPTTLDEC << 8) + 1; | |
189 | } else { | |
190 | ip->ip_sum += htons(IPTTLDEC << 8); | |
191 | } | |
192 | ||
193 | /* | |
194 | * Send the packet on its way. All we can get back is ENOBUFS | |
195 | */ | |
196 | ipf->ipf_uses++; | |
197 | ipf->ipf_timer = IPFLOW_TIMER; | |
198 | ||
9bccf70c A |
199 | if (rt->rt_flags & RTF_GATEWAY) |
200 | dst = rt->rt_gateway; | |
201 | else | |
202 | dst = &ipf->ipf_ro.ro_dst; | |
203 | #ifdef __APPLE__ | |
1c79356b | 204 | /* Not sure the rt_dlt is valid here !! XXX */ |
91447636 | 205 | if ((error = dlil_output(rt->rt_ifp, PF_INET, m, (caddr_t) rt, dst, 0)) != 0) { |
9bccf70c A |
206 | |
207 | #else | |
208 | if ((error = (*rt->rt_ifp->if_output)(rt->rt_ifp, m, dst, rt)) != 0) { | |
209 | #endif | |
1c79356b A |
210 | if (error == ENOBUFS) |
211 | ipf->ipf_dropped++; | |
212 | else | |
213 | ipf->ipf_errors++; | |
214 | } | |
215 | return 1; | |
216 | } | |
217 | \f | |
218 | static void | |
219 | ipflow_addstats( | |
220 | struct ipflow *ipf) | |
221 | { | |
222 | ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses; | |
223 | ipstat.ips_cantforward += ipf->ipf_errors + ipf->ipf_dropped; | |
224 | ipstat.ips_forward += ipf->ipf_uses; | |
225 | ipstat.ips_fastforward += ipf->ipf_uses; | |
226 | } | |
227 | ||
228 | static void | |
229 | ipflow_free( | |
230 | struct ipflow *ipf) | |
231 | { | |
232 | int s; | |
233 | /* | |
234 | * Remove the flow from the hash table (at elevated IPL). | |
235 | * Once it's off the list, we can deal with it at normal | |
236 | * network IPL. | |
237 | */ | |
238 | s = splimp(); | |
239 | LIST_REMOVE(ipf, ipf_next); | |
240 | splx(s); | |
241 | ipflow_addstats(ipf); | |
9bccf70c | 242 | rtfree(ipf->ipf_ro.ro_rt); |
1c79356b A |
243 | ipflow_inuse--; |
244 | FREE(ipf, M_IPFLOW); | |
245 | } | |
246 | ||
247 | static struct ipflow * | |
248 | ipflow_reap( | |
249 | void) | |
250 | { | |
251 | struct ipflow *ipf, *maybe_ipf = NULL; | |
252 | int idx; | |
253 | int s; | |
254 | ||
255 | for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) { | |
256 | ipf = LIST_FIRST(&ipflows[idx]); | |
257 | while (ipf != NULL) { | |
258 | /* | |
259 | * If this no longer points to a valid route | |
260 | * reclaim it. | |
261 | */ | |
262 | if ((ipf->ipf_ro.ro_rt->rt_flags & RTF_UP) == 0) | |
263 | goto done; | |
264 | /* | |
265 | * choose the one that's been least recently used | |
266 | * or has had the least uses in the last 1.5 | |
267 | * intervals. | |
268 | */ | |
269 | if (maybe_ipf == NULL | |
270 | || ipf->ipf_timer < maybe_ipf->ipf_timer | |
271 | || (ipf->ipf_timer == maybe_ipf->ipf_timer | |
272 | && ipf->ipf_last_uses + ipf->ipf_uses < | |
273 | maybe_ipf->ipf_last_uses + | |
274 | maybe_ipf->ipf_uses)) | |
275 | maybe_ipf = ipf; | |
276 | ipf = LIST_NEXT(ipf, ipf_next); | |
277 | } | |
278 | } | |
279 | ipf = maybe_ipf; | |
280 | done: | |
281 | /* | |
282 | * Remove the entry from the flow table. | |
283 | */ | |
284 | s = splimp(); | |
285 | LIST_REMOVE(ipf, ipf_next); | |
286 | splx(s); | |
287 | ipflow_addstats(ipf); | |
9bccf70c | 288 | rtfree(ipf->ipf_ro.ro_rt); |
1c79356b A |
289 | return ipf; |
290 | } | |
291 | ||
292 | void | |
293 | ipflow_slowtimo( | |
294 | void) | |
295 | { | |
296 | struct ipflow *ipf; | |
297 | int idx; | |
298 | ||
299 | for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) { | |
300 | ipf = LIST_FIRST(&ipflows[idx]); | |
301 | while (ipf != NULL) { | |
302 | struct ipflow *next_ipf = LIST_NEXT(ipf, ipf_next); | |
303 | if (--ipf->ipf_timer == 0) { | |
304 | ipflow_free(ipf); | |
305 | } else { | |
306 | ipf->ipf_last_uses = ipf->ipf_uses; | |
307 | ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses; | |
308 | ipstat.ips_forward += ipf->ipf_uses; | |
309 | ipstat.ips_fastforward += ipf->ipf_uses; | |
310 | ipf->ipf_uses = 0; | |
311 | } | |
312 | ipf = next_ipf; | |
313 | } | |
314 | } | |
315 | } | |
316 | ||
317 | void | |
318 | ipflow_create( | |
319 | const struct route *ro, | |
320 | struct mbuf *m) | |
321 | { | |
322 | const struct ip *const ip = mtod(m, struct ip *); | |
323 | struct ipflow *ipf; | |
324 | unsigned hash; | |
325 | int s; | |
326 | ||
327 | /* | |
328 | * Don't create cache entries for ICMP messages. | |
329 | */ | |
330 | if (!ipflow_active || ip->ip_p == IPPROTO_ICMP) | |
331 | return; | |
332 | /* | |
333 | * See if an existing flow struct exists. If so remove it from it's | |
334 | * list and free the old route. If not, try to malloc a new one | |
335 | * (if we aren't at our limit). | |
336 | */ | |
337 | ipf = ipflow_lookup(ip); | |
338 | if (ipf == NULL) { | |
339 | if (ipflow_inuse == IPFLOW_MAX) { | |
340 | ipf = ipflow_reap(); | |
341 | } else { | |
342 | ipf = (struct ipflow *) _MALLOC(sizeof(*ipf), M_IPFLOW, | |
343 | M_NOWAIT); | |
344 | if (ipf == NULL) | |
345 | return; | |
346 | ipflow_inuse++; | |
347 | } | |
348 | bzero((caddr_t) ipf, sizeof(*ipf)); | |
349 | } else { | |
350 | s = splimp(); | |
351 | LIST_REMOVE(ipf, ipf_next); | |
352 | splx(s); | |
353 | ipflow_addstats(ipf); | |
9bccf70c | 354 | rtfree(ipf->ipf_ro.ro_rt); |
1c79356b A |
355 | ipf->ipf_uses = ipf->ipf_last_uses = 0; |
356 | ipf->ipf_errors = ipf->ipf_dropped = 0; | |
357 | } | |
358 | ||
359 | /* | |
360 | * Fill in the updated information. | |
361 | */ | |
362 | ipf->ipf_ro = *ro; | |
91447636 | 363 | rtref(ro->ro_rt); //### LD 5/25/04 needs rt_mtx lock |
1c79356b A |
364 | ipf->ipf_dst = ip->ip_dst; |
365 | ipf->ipf_src = ip->ip_src; | |
366 | ipf->ipf_tos = ip->ip_tos; | |
367 | ipf->ipf_timer = IPFLOW_TIMER; | |
368 | /* | |
369 | * Insert into the approriate bucket of the flow table. | |
370 | */ | |
371 | hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos); | |
372 | s = splimp(); | |
373 | LIST_INSERT_HEAD(&ipflows[hash], ipf, ipf_next); | |
374 | splx(s); | |
375 | } |