]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/in_arp.c
xnu-3789.60.24.tar.gz
[apple/xnu.git] / bsd / netinet / in_arp.c
1 /*
2 * Copyright (c) 2004-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 */
61
62 #include <kern/debug.h>
63 #include <netinet/in_arp.h>
64 #include <sys/types.h>
65 #include <sys/param.h>
66 #include <sys/kernel_types.h>
67 #include <sys/syslog.h>
68 #include <sys/systm.h>
69 #include <sys/time.h>
70 #include <sys/kernel.h>
71 #include <sys/mbuf.h>
72 #include <sys/sysctl.h>
73 #include <sys/mcache.h>
74 #include <sys/protosw.h>
75 #include <string.h>
76 #include <net/if_arp.h>
77 #include <net/if_dl.h>
78 #include <net/dlil.h>
79 #include <net/if_types.h>
80 #include <net/if_llreach.h>
81 #include <net/route.h>
82
83 #include <netinet/if_ether.h>
84 #include <netinet/in_var.h>
85 #include <kern/zalloc.h>
86
87 #include <kern/thread.h>
88 #include <kern/sched_prim.h>
89
90 #define CONST_LLADDR(s) ((const u_char*)((s)->sdl_data + (s)->sdl_nlen))
91
92 static const size_t MAX_HW_LEN = 10;
93
94 /*
95 * Synchronization notes:
96 *
97 * The global list of ARP entries are stored in llinfo_arp; an entry
98 * gets inserted into the list when the route is created and gets
99 * removed from the list when it is deleted; this is done as part
100 * of RTM_ADD/RTM_RESOLVE/RTM_DELETE in arp_rtrequest().
101 *
102 * Because rnh_lock and rt_lock for the entry are held during those
103 * operations, the same locks (and thus lock ordering) must be used
104 * elsewhere to access the relevant data structure fields:
105 *
106 * la_le.{le_next,le_prev}, la_rt
107 *
108 * - Routing lock (rnh_lock)
109 *
110 * la_holdq, la_asked, la_llreach, la_lastused, la_flags
111 *
112 * - Routing entry lock (rt_lock)
113 *
114 * Due to the dependency on rt_lock, llinfo_arp has the same lifetime
115 * as the route entry itself. When a route is deleted (RTM_DELETE),
116 * it is simply removed from the global list but the memory is not
117 * freed until the route itself is freed.
118 */
119 struct llinfo_arp {
120 /*
121 * The following are protected by rnh_lock
122 */
123 LIST_ENTRY(llinfo_arp) la_le;
124 struct rtentry *la_rt;
125 /*
126 * The following are protected by rt_lock
127 */
128 class_queue_t la_holdq; /* packets awaiting resolution */
129 struct if_llreach *la_llreach; /* link-layer reachability record */
130 u_int64_t la_lastused; /* last used timestamp */
131 u_int32_t la_asked; /* # of requests sent */
132 u_int32_t la_maxtries; /* retry limit */
133 u_int64_t la_probeexp; /* probe deadline timestamp */
134 u_int32_t la_flags;
135 #define LLINFO_RTRFAIL_EVTSENT 0x1 /* sent an ARP event */
136 #define LLINFO_PROBING 0x2 /* waiting for an ARP reply */
137 };
138 static LIST_HEAD(, llinfo_arp) llinfo_arp;
139
140 static thread_call_t arp_timeout_tcall;
141 static int arp_timeout_run; /* arp_timeout is scheduled to run */
142 static void arp_timeout(thread_call_param_t arg0, thread_call_param_t arg1);
143 static void arp_sched_timeout(struct timeval *);
144
145 static thread_call_t arp_probe_tcall;
146 static int arp_probe_run; /* arp_probe is scheduled to run */
147 static void arp_probe(thread_call_param_t arg0, thread_call_param_t arg1);
148 static void arp_sched_probe(struct timeval *);
149
150 static void arptfree(struct llinfo_arp *, void *);
151 static errno_t arp_lookup_route(const struct in_addr *, int,
152 int, route_t *, unsigned int);
153 static int arp_getstat SYSCTL_HANDLER_ARGS;
154
155 static struct llinfo_arp *arp_llinfo_alloc(int);
156 static void arp_llinfo_free(void *);
157 static uint32_t arp_llinfo_flushq(struct llinfo_arp *);
158 static void arp_llinfo_purge(struct rtentry *);
159 static void arp_llinfo_get_ri(struct rtentry *, struct rt_reach_info *);
160 static void arp_llinfo_get_iflri(struct rtentry *, struct ifnet_llreach_info *);
161 static void arp_llinfo_refresh(struct rtentry *);
162
163 static __inline void arp_llreach_use(struct llinfo_arp *);
164 static __inline int arp_llreach_reachable(struct llinfo_arp *);
165 static void arp_llreach_alloc(struct rtentry *, struct ifnet *, void *,
166 unsigned int, boolean_t);
167
168 extern int tvtohz(struct timeval *);
169
170 static int arpinit_done;
171
172 SYSCTL_DECL(_net_link_ether);
173 SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "");
174
175 static int arpt_prune = (5*60*1); /* walk list every 5 minutes */
176 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, prune_intvl,
177 CTLFLAG_RW | CTLFLAG_LOCKED, &arpt_prune, 0, "");
178
179 #define ARP_PROBE_TIME 7 /* seconds */
180 static u_int32_t arpt_probe = ARP_PROBE_TIME;
181 SYSCTL_UINT(_net_link_ether_inet, OID_AUTO, probe_intvl,
182 CTLFLAG_RW | CTLFLAG_LOCKED, &arpt_probe, 0, "");
183
184 static int arpt_keep = (20*60); /* once resolved, good for 20 more minutes */
185 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_age,
186 CTLFLAG_RW | CTLFLAG_LOCKED, &arpt_keep, 0, "");
187
188 static int arpt_down = 20; /* once declared down, don't send for 20 sec */
189 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, host_down_time,
190 CTLFLAG_RW | CTLFLAG_LOCKED, &arpt_down, 0, "");
191
192 static int arp_llreach_base = 120; /* seconds */
193 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, arp_llreach_base,
194 CTLFLAG_RW | CTLFLAG_LOCKED, &arp_llreach_base, 0,
195 "default ARP link-layer reachability max lifetime (in seconds)");
196
197 #define ARP_UNICAST_LIMIT 3 /* # of probes until ARP refresh broadcast */
198 static u_int32_t arp_unicast_lim = ARP_UNICAST_LIMIT;
199 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, arp_unicast_lim,
200 CTLFLAG_RW | CTLFLAG_LOCKED, &arp_unicast_lim, ARP_UNICAST_LIMIT,
201 "number of unicast ARP refresh probes before using broadcast");
202
203 static u_int32_t arp_maxtries = 5;
204 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxtries,
205 CTLFLAG_RW | CTLFLAG_LOCKED, &arp_maxtries, 0, "");
206
207 static u_int32_t arp_maxhold = 16;
208 SYSCTL_UINT(_net_link_ether_inet, OID_AUTO, maxhold,
209 CTLFLAG_RW | CTLFLAG_LOCKED, &arp_maxhold, 0, "");
210
211 static int useloopback = 1; /* use loopback interface for local traffic */
212 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, useloopback,
213 CTLFLAG_RW | CTLFLAG_LOCKED, &useloopback, 0, "");
214
215 static int arp_proxyall = 0;
216 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, proxyall,
217 CTLFLAG_RW | CTLFLAG_LOCKED, &arp_proxyall, 0, "");
218
219 static int arp_sendllconflict = 0;
220 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, sendllconflict,
221 CTLFLAG_RW | CTLFLAG_LOCKED, &arp_sendllconflict, 0, "");
222
223 static int log_arp_warnings = 0; /* Thread safe: no accumulated state */
224 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_warnings,
225 CTLFLAG_RW | CTLFLAG_LOCKED,
226 &log_arp_warnings, 0,
227 "log arp warning messages");
228
229 static int keep_announcements = 1; /* Thread safe: no aging of state */
230 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, keep_announcements,
231 CTLFLAG_RW | CTLFLAG_LOCKED,
232 &keep_announcements, 0,
233 "keep arp announcements");
234
235 static int send_conflicting_probes = 1; /* Thread safe: no accumulated state */
236 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, send_conflicting_probes,
237 CTLFLAG_RW | CTLFLAG_LOCKED,
238 &send_conflicting_probes, 0,
239 "send conflicting link-local arp probes");
240
241 static int arp_verbose;
242 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, verbose,
243 CTLFLAG_RW | CTLFLAG_LOCKED, &arp_verbose, 0, "");
244
245 /*
246 * Generally protected by rnh_lock; use atomic operations on fields
247 * that are also modified outside of that lock (if needed).
248 */
249 struct arpstat arpstat __attribute__((aligned(sizeof (uint64_t))));
250 SYSCTL_PROC(_net_link_ether_inet, OID_AUTO, stats,
251 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
252 0, 0, arp_getstat, "S,arpstat",
253 "ARP statistics (struct arpstat, net/if_arp.h)");
254
255 static struct zone *llinfo_arp_zone;
256 #define LLINFO_ARP_ZONE_MAX 256 /* maximum elements in zone */
257 #define LLINFO_ARP_ZONE_NAME "llinfo_arp" /* name for zone */
258
259 void
260 arp_init(void)
261 {
262 VERIFY(!arpinit_done);
263
264 LIST_INIT(&llinfo_arp);
265
266 llinfo_arp_zone = zinit(sizeof (struct llinfo_arp),
267 LLINFO_ARP_ZONE_MAX * sizeof (struct llinfo_arp), 0,
268 LLINFO_ARP_ZONE_NAME);
269 if (llinfo_arp_zone == NULL)
270 panic("%s: failed allocating llinfo_arp_zone", __func__);
271
272 zone_change(llinfo_arp_zone, Z_EXPAND, TRUE);
273 zone_change(llinfo_arp_zone, Z_CALLERACCT, FALSE);
274
275 arpinit_done = 1;
276 }
277
278 static struct llinfo_arp *
279 arp_llinfo_alloc(int how)
280 {
281 struct llinfo_arp *la;
282
283 la = (how == M_WAITOK) ? zalloc(llinfo_arp_zone) :
284 zalloc_noblock(llinfo_arp_zone);
285 if (la != NULL) {
286 bzero(la, sizeof (*la));
287 /*
288 * The type of queue (Q_DROPHEAD) here is just a hint;
289 * the actual logic that works on this queue performs
290 * a head drop, details in arp_llinfo_addq().
291 */
292 _qinit(&la->la_holdq, Q_DROPHEAD, (arp_maxhold == 0) ?
293 (uint32_t)-1 : arp_maxhold);
294 }
295
296 return (la);
297 }
298
299 static void
300 arp_llinfo_free(void *arg)
301 {
302 struct llinfo_arp *la = arg;
303
304 if (la->la_le.le_next != NULL || la->la_le.le_prev != NULL) {
305 panic("%s: trying to free %p when it is in use", __func__, la);
306 /* NOTREACHED */
307 }
308
309 /* Free any held packets */
310 (void) arp_llinfo_flushq(la);
311
312 /* Purge any link-layer info caching */
313 VERIFY(la->la_rt->rt_llinfo == la);
314 if (la->la_rt->rt_llinfo_purge != NULL)
315 la->la_rt->rt_llinfo_purge(la->la_rt);
316
317 zfree(llinfo_arp_zone, la);
318 }
319
320 static void
321 arp_llinfo_addq(struct llinfo_arp *la, struct mbuf *m)
322 {
323 if (qlen(&la->la_holdq) >= qlimit(&la->la_holdq)) {
324 struct mbuf *_m;
325 /* prune less than CTL, else take what's at the head */
326 _m = _getq_scidx_lt(&la->la_holdq, SCIDX_CTL);
327 if (_m == NULL)
328 _m = _getq(&la->la_holdq);
329 VERIFY(_m != NULL);
330 if (arp_verbose) {
331 log(LOG_DEBUG, "%s: dropping packet (scidx %u)\n",
332 __func__, MBUF_SCIDX(mbuf_get_service_class(_m)));
333 }
334 m_freem(_m);
335 atomic_add_32(&arpstat.dropped, 1);
336 atomic_add_32(&arpstat.held, -1);
337 }
338 _addq(&la->la_holdq, m);
339 atomic_add_32(&arpstat.held, 1);
340 if (arp_verbose) {
341 log(LOG_DEBUG, "%s: enqueued packet (scidx %u), qlen now %u\n",
342 __func__, MBUF_SCIDX(mbuf_get_service_class(m)),
343 qlen(&la->la_holdq));
344 }
345 }
346
347 static uint32_t
348 arp_llinfo_flushq(struct llinfo_arp *la)
349 {
350 uint32_t held = qlen(&la->la_holdq);
351
352 atomic_add_32(&arpstat.purged, held);
353 atomic_add_32(&arpstat.held, -held);
354 _flushq(&la->la_holdq);
355 VERIFY(qempty(&la->la_holdq));
356
357 return (held);
358 }
359
360 static void
361 arp_llinfo_purge(struct rtentry *rt)
362 {
363 struct llinfo_arp *la = rt->rt_llinfo;
364
365 RT_LOCK_ASSERT_HELD(rt);
366 VERIFY(rt->rt_llinfo_purge == arp_llinfo_purge && la != NULL);
367
368 if (la->la_llreach != NULL) {
369 RT_CONVERT_LOCK(rt);
370 ifnet_llreach_free(la->la_llreach);
371 la->la_llreach = NULL;
372 }
373 la->la_lastused = 0;
374 }
375
376 static void
377 arp_llinfo_get_ri(struct rtentry *rt, struct rt_reach_info *ri)
378 {
379 struct llinfo_arp *la = rt->rt_llinfo;
380 struct if_llreach *lr = la->la_llreach;
381
382 if (lr == NULL) {
383 bzero(ri, sizeof (*ri));
384 ri->ri_rssi = IFNET_RSSI_UNKNOWN;
385 ri->ri_lqm = IFNET_LQM_THRESH_OFF;
386 ri->ri_npm = IFNET_NPM_THRESH_UNKNOWN;
387 } else {
388 IFLR_LOCK(lr);
389 /* Export to rt_reach_info structure */
390 ifnet_lr2ri(lr, ri);
391 /* Export ARP send expiration (calendar) time */
392 ri->ri_snd_expire =
393 ifnet_llreach_up2calexp(lr, la->la_lastused);
394 IFLR_UNLOCK(lr);
395 }
396 }
397
398 static void
399 arp_llinfo_get_iflri(struct rtentry *rt, struct ifnet_llreach_info *iflri)
400 {
401 struct llinfo_arp *la = rt->rt_llinfo;
402 struct if_llreach *lr = la->la_llreach;
403
404 if (lr == NULL) {
405 bzero(iflri, sizeof (*iflri));
406 iflri->iflri_rssi = IFNET_RSSI_UNKNOWN;
407 iflri->iflri_lqm = IFNET_LQM_THRESH_OFF;
408 iflri->iflri_npm = IFNET_NPM_THRESH_UNKNOWN;
409 } else {
410 IFLR_LOCK(lr);
411 /* Export to ifnet_llreach_info structure */
412 ifnet_lr2iflri(lr, iflri);
413 /* Export ARP send expiration (uptime) time */
414 iflri->iflri_snd_expire =
415 ifnet_llreach_up2upexp(lr, la->la_lastused);
416 IFLR_UNLOCK(lr);
417 }
418 }
419
420 static void
421 arp_llinfo_refresh(struct rtentry *rt)
422 {
423 uint64_t timenow = net_uptime();
424 /*
425 * If route entry is permanent or if expiry is less
426 * than timenow and extra time taken for unicast probe
427 * we can't expedite the refresh
428 */
429 if ((rt->rt_expire == 0) ||
430 (rt->rt_flags & RTF_STATIC) ||
431 !(rt->rt_flags & RTF_LLINFO)) {
432 return;
433 }
434
435 if (rt->rt_expire > timenow)
436 rt->rt_expire = timenow;
437 return;
438 }
439
440 void
441 arp_llreach_set_reachable(struct ifnet *ifp, void *addr, unsigned int alen)
442 {
443 /* Nothing more to do if it's disabled */
444 if (arp_llreach_base == 0)
445 return;
446
447 ifnet_llreach_set_reachable(ifp, ETHERTYPE_IP, addr, alen);
448 }
449
450 static __inline void
451 arp_llreach_use(struct llinfo_arp *la)
452 {
453 if (la->la_llreach != NULL)
454 la->la_lastused = net_uptime();
455 }
456
457 static __inline int
458 arp_llreach_reachable(struct llinfo_arp *la)
459 {
460 struct if_llreach *lr;
461 const char *why = NULL;
462
463 /* Nothing more to do if it's disabled; pretend it's reachable */
464 if (arp_llreach_base == 0)
465 return (1);
466
467 if ((lr = la->la_llreach) == NULL) {
468 /*
469 * Link-layer reachability record isn't present for this
470 * ARP entry; pretend it's reachable and use it as is.
471 */
472 return (1);
473 } else if (ifnet_llreach_reachable(lr)) {
474 /*
475 * Record is present, it's not shared with other ARP
476 * entries and a packet has recently been received
477 * from the remote host; consider it reachable.
478 */
479 if (lr->lr_reqcnt == 1)
480 return (1);
481
482 /* Prime it up, if this is the first time */
483 if (la->la_lastused == 0) {
484 VERIFY(la->la_llreach != NULL);
485 arp_llreach_use(la);
486 }
487
488 /*
489 * Record is present and shared with one or more ARP
490 * entries, and a packet has recently been received
491 * from the remote host. Since it's shared by more
492 * than one IP addresses, we can't rely on the link-
493 * layer reachability alone; consider it reachable if
494 * this ARP entry has been used "recently."
495 */
496 if (ifnet_llreach_reachable_delta(lr, la->la_lastused))
497 return (1);
498
499 why = "has alias(es) and hasn't been used in a while";
500 } else {
501 why = "haven't heard from it in a while";
502 }
503
504 if (arp_verbose > 1) {
505 char tmp[MAX_IPv4_STR_LEN];
506 u_int64_t now = net_uptime();
507
508 log(LOG_DEBUG, "%s: ARP probe(s) needed for %s; "
509 "%s [lastused %lld, lastrcvd %lld] secs ago\n",
510 if_name(lr->lr_ifp), inet_ntop(AF_INET,
511 &SIN(rt_key(la->la_rt))->sin_addr, tmp, sizeof (tmp)), why,
512 (la->la_lastused ? (int64_t)(now - la->la_lastused) : -1),
513 (lr->lr_lastrcvd ? (int64_t)(now - lr->lr_lastrcvd) : -1));
514
515 }
516 return (0);
517 }
518
519 /*
520 * Obtain a link-layer source cache entry for the sender.
521 *
522 * NOTE: This is currently only for ARP/Ethernet.
523 */
524 static void
525 arp_llreach_alloc(struct rtentry *rt, struct ifnet *ifp, void *addr,
526 unsigned int alen, boolean_t solicited)
527 {
528 VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0);
529 VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0);
530
531 if (arp_llreach_base != 0 && rt->rt_expire != 0 &&
532 !(rt->rt_ifp->if_flags & IFF_LOOPBACK) &&
533 ifp->if_addrlen == IF_LLREACH_MAXLEN && /* Ethernet */
534 alen == ifp->if_addrlen) {
535 struct llinfo_arp *la = rt->rt_llinfo;
536 struct if_llreach *lr;
537 const char *why = NULL, *type = "";
538
539 /* Become a regular mutex, just in case */
540 RT_CONVERT_LOCK(rt);
541
542 if ((lr = la->la_llreach) != NULL) {
543 type = (solicited ? "ARP reply" : "ARP announcement");
544 /*
545 * If target has changed, create a new record;
546 * otherwise keep existing record.
547 */
548 IFLR_LOCK(lr);
549 if (bcmp(addr, lr->lr_key.addr, alen) != 0) {
550 IFLR_UNLOCK(lr);
551 /* Purge any link-layer info caching */
552 VERIFY(rt->rt_llinfo_purge != NULL);
553 rt->rt_llinfo_purge(rt);
554 lr = NULL;
555 why = " for different target HW address; "
556 "using new llreach record";
557 } else {
558 lr->lr_probes = 0; /* reset probe count */
559 IFLR_UNLOCK(lr);
560 if (solicited) {
561 why = " for same target HW address; "
562 "keeping existing llreach record";
563 }
564 }
565 }
566
567 if (lr == NULL) {
568 lr = la->la_llreach = ifnet_llreach_alloc(ifp,
569 ETHERTYPE_IP, addr, alen, arp_llreach_base);
570 if (lr != NULL) {
571 lr->lr_probes = 0; /* reset probe count */
572 if (why == NULL)
573 why = "creating new llreach record";
574 }
575 }
576
577 if (arp_verbose > 1 && lr != NULL && why != NULL) {
578 char tmp[MAX_IPv4_STR_LEN];
579
580 log(LOG_DEBUG, "%s: %s%s for %s\n", if_name(ifp),
581 type, why, inet_ntop(AF_INET,
582 &SIN(rt_key(rt))->sin_addr, tmp, sizeof (tmp)));
583 }
584 }
585 }
586
587 struct arptf_arg {
588 boolean_t draining;
589 boolean_t probing;
590 uint32_t killed;
591 uint32_t aging;
592 uint32_t sticky;
593 uint32_t found;
594 uint32_t qlen;
595 uint32_t qsize;
596 };
597
598 /*
599 * Free an arp entry.
600 */
601 static void
602 arptfree(struct llinfo_arp *la, void *arg)
603 {
604 struct arptf_arg *ap = arg;
605 struct rtentry *rt = la->la_rt;
606 uint64_t timenow;
607
608 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
609
610 /* rnh_lock acquired by caller protects rt from going away */
611 RT_LOCK(rt);
612
613 VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0);
614 VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0);
615
616 ap->found++;
617 timenow = net_uptime();
618
619 /* If we're probing, flush out held packets upon probe expiration */
620 if (ap->probing && (la->la_flags & LLINFO_PROBING) &&
621 la->la_probeexp <= timenow) {
622 struct sockaddr_dl *sdl = SDL(rt->rt_gateway);
623 if (sdl != NULL)
624 sdl->sdl_alen = 0;
625 (void) arp_llinfo_flushq(la);
626 }
627
628 ap->qlen += qlen(&la->la_holdq);
629 ap->qsize += qsize(&la->la_holdq);
630
631 if (rt->rt_expire == 0 || (rt->rt_flags & RTF_STATIC)) {
632 ap->sticky++;
633 /* ARP entry is permanent? */
634 if (rt->rt_expire == 0) {
635 RT_UNLOCK(rt);
636 return;
637 }
638 }
639
640 /* ARP entry hasn't expired and we're not draining? */
641 if (!ap->draining && rt->rt_expire > timenow) {
642 RT_UNLOCK(rt);
643 ap->aging++;
644 return;
645 }
646
647 if (rt->rt_refcnt > 0) {
648 /*
649 * ARP entry has expired, with outstanding refcnt.
650 * If we're not draining, force ARP query to be
651 * generated next time this entry is used.
652 */
653 if (!ap->draining && !ap->probing) {
654 struct sockaddr_dl *sdl = SDL(rt->rt_gateway);
655 if (sdl != NULL)
656 sdl->sdl_alen = 0;
657 la->la_asked = 0;
658 rt->rt_flags &= ~RTF_REJECT;
659 }
660 RT_UNLOCK(rt);
661 } else if (!(rt->rt_flags & RTF_STATIC) && !ap->probing) {
662 /*
663 * ARP entry has no outstanding refcnt, and we're either
664 * draining or it has expired; delete it from the routing
665 * table. Safe to drop rt_lock and use rt_key, since holding
666 * rnh_lock here prevents another thread from calling
667 * rt_setgate() on this route.
668 */
669 RT_UNLOCK(rt);
670 rtrequest_locked(RTM_DELETE, rt_key(rt), NULL,
671 rt_mask(rt), 0, NULL);
672 arpstat.timeouts++;
673 ap->killed++;
674 } else {
675 /* ARP entry is static; let it linger */
676 RT_UNLOCK(rt);
677 }
678 }
679
680 void
681 in_arpdrain(void *arg)
682 {
683 #pragma unused(arg)
684 struct llinfo_arp *la, *ola;
685 struct arptf_arg farg;
686
687 if (arp_verbose)
688 log(LOG_DEBUG, "%s: draining ARP entries\n", __func__);
689
690 lck_mtx_lock(rnh_lock);
691 la = llinfo_arp.lh_first;
692 bzero(&farg, sizeof (farg));
693 farg.draining = TRUE;
694 while ((ola = la) != NULL) {
695 la = la->la_le.le_next;
696 arptfree(ola, &farg);
697 }
698 if (arp_verbose) {
699 log(LOG_DEBUG, "%s: found %u, aging %u, sticky %u, killed %u; "
700 "%u pkts held (%u bytes)\n", __func__, farg.found,
701 farg.aging, farg.sticky, farg.killed, farg.qlen,
702 farg.qsize);
703 }
704 lck_mtx_unlock(rnh_lock);
705 }
706
707 /*
708 * Timeout routine. Age arp_tab entries periodically.
709 */
710 static void
711 arp_timeout(thread_call_param_t arg0, thread_call_param_t arg1)
712 {
713 #pragma unused(arg0, arg1)
714 struct llinfo_arp *la, *ola;
715 struct timeval atv;
716 struct arptf_arg farg;
717
718 lck_mtx_lock(rnh_lock);
719 la = llinfo_arp.lh_first;
720 bzero(&farg, sizeof (farg));
721 while ((ola = la) != NULL) {
722 la = la->la_le.le_next;
723 arptfree(ola, &farg);
724 }
725 if (arp_verbose) {
726 log(LOG_DEBUG, "%s: found %u, aging %u, sticky %u, killed %u; "
727 "%u pkts held (%u bytes)\n", __func__, farg.found,
728 farg.aging, farg.sticky, farg.killed, farg.qlen,
729 farg.qsize);
730 }
731 atv.tv_usec = 0;
732 atv.tv_sec = MAX(arpt_prune, 5);
733 /* re-arm the timer if there's work to do */
734 arp_timeout_run = 0;
735 if (farg.aging > 0)
736 arp_sched_timeout(&atv);
737 else if (arp_verbose)
738 log(LOG_DEBUG, "%s: not rescheduling timer\n", __func__);
739 lck_mtx_unlock(rnh_lock);
740 }
741
742 static void
743 arp_sched_timeout(struct timeval *atv)
744 {
745 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
746
747 if (!arp_timeout_run) {
748 struct timeval tv;
749 uint64_t deadline = 0;
750
751 if (arp_timeout_tcall == NULL) {
752 arp_timeout_tcall =
753 thread_call_allocate(arp_timeout, NULL);
754 VERIFY(arp_timeout_tcall != NULL);
755 }
756
757 if (atv == NULL) {
758 tv.tv_usec = 0;
759 tv.tv_sec = MAX(arpt_prune / 5, 1);
760 atv = &tv;
761 }
762 if (arp_verbose) {
763 log(LOG_DEBUG, "%s: timer scheduled in "
764 "T+%llus.%lluu\n", __func__,
765 (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec);
766 }
767 arp_timeout_run = 1;
768
769 clock_deadline_for_periodic_event(atv->tv_sec * NSEC_PER_SEC,
770 mach_absolute_time(), &deadline);
771 (void) thread_call_enter_delayed(arp_timeout_tcall, deadline);
772 }
773 }
774
775 /*
776 * Probe routine.
777 */
778 static void
779 arp_probe(thread_call_param_t arg0, thread_call_param_t arg1)
780 {
781 #pragma unused(arg0, arg1)
782 struct llinfo_arp *la, *ola;
783 struct timeval atv;
784 struct arptf_arg farg;
785
786 lck_mtx_lock(rnh_lock);
787 la = llinfo_arp.lh_first;
788 bzero(&farg, sizeof (farg));
789 farg.probing = TRUE;
790 while ((ola = la) != NULL) {
791 la = la->la_le.le_next;
792 arptfree(ola, &farg);
793 }
794 if (arp_verbose) {
795 log(LOG_DEBUG, "%s: found %u, aging %u, sticky %u, killed %u; "
796 "%u pkts held (%u bytes)\n", __func__, farg.found,
797 farg.aging, farg.sticky, farg.killed, farg.qlen,
798 farg.qsize);
799 }
800 atv.tv_usec = 0;
801 atv.tv_sec = MAX(arpt_probe, ARP_PROBE_TIME);
802 /* re-arm the probe if there's work to do */
803 arp_probe_run = 0;
804 if (farg.qlen > 0)
805 arp_sched_probe(&atv);
806 else if (arp_verbose)
807 log(LOG_DEBUG, "%s: not rescheduling probe\n", __func__);
808 lck_mtx_unlock(rnh_lock);
809 }
810
811 static void
812 arp_sched_probe(struct timeval *atv)
813 {
814 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
815
816 if (!arp_probe_run) {
817 struct timeval tv;
818 uint64_t deadline = 0;
819
820 if (arp_probe_tcall == NULL) {
821 arp_probe_tcall =
822 thread_call_allocate(arp_probe, NULL);
823 VERIFY(arp_probe_tcall != NULL);
824 }
825
826 if (atv == NULL) {
827 tv.tv_usec = 0;
828 tv.tv_sec = MAX(arpt_probe, ARP_PROBE_TIME);
829 atv = &tv;
830 }
831 if (arp_verbose) {
832 log(LOG_DEBUG, "%s: probe scheduled in "
833 "T+%llus.%lluu\n", __func__,
834 (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec);
835 }
836 arp_probe_run = 1;
837
838 clock_deadline_for_periodic_event(atv->tv_sec * NSEC_PER_SEC,
839 mach_absolute_time(), &deadline);
840 (void) thread_call_enter_delayed(arp_probe_tcall, deadline);
841 }
842 }
843
844 /*
845 * ifa_rtrequest() callback
846 */
847 static void
848 arp_rtrequest(int req, struct rtentry *rt, struct sockaddr *sa)
849 {
850 #pragma unused(sa)
851 struct sockaddr *gate = rt->rt_gateway;
852 struct llinfo_arp *la = rt->rt_llinfo;
853 static struct sockaddr_dl null_sdl =
854 { .sdl_len = sizeof (null_sdl), .sdl_family = AF_LINK };
855 uint64_t timenow;
856 char buf[MAX_IPv4_STR_LEN];
857
858 VERIFY(arpinit_done);
859 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
860 RT_LOCK_ASSERT_HELD(rt);
861
862 if (rt->rt_flags & RTF_GATEWAY)
863 return;
864
865 timenow = net_uptime();
866 switch (req) {
867 case RTM_ADD:
868 /*
869 * XXX: If this is a manually added route to interface
870 * such as older version of routed or gated might provide,
871 * restore cloning bit.
872 */
873 if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != NULL &&
874 SIN(rt_mask(rt))->sin_addr.s_addr != INADDR_BROADCAST)
875 rt->rt_flags |= RTF_CLONING;
876
877 if (rt->rt_flags & RTF_CLONING) {
878 /*
879 * Case 1: This route should come from a route to iface.
880 */
881 if (rt_setgate(rt, rt_key(rt), SA(&null_sdl)) == 0) {
882 gate = rt->rt_gateway;
883 SDL(gate)->sdl_type = rt->rt_ifp->if_type;
884 SDL(gate)->sdl_index = rt->rt_ifp->if_index;
885 /*
886 * In case we're called before 1.0 sec.
887 * has elapsed.
888 */
889 rt_setexpire(rt, MAX(timenow, 1));
890 }
891 break;
892 }
893 /* Announce a new entry if requested. */
894 if (rt->rt_flags & RTF_ANNOUNCE) {
895 if (la != NULL)
896 arp_llreach_use(la); /* Mark use timestamp */
897 RT_UNLOCK(rt);
898 dlil_send_arp(rt->rt_ifp, ARPOP_REQUEST,
899 SDL(gate), rt_key(rt), NULL, rt_key(rt), 0);
900 RT_LOCK(rt);
901 arpstat.txannounces++;
902 }
903 /* FALLTHRU */
904 case RTM_RESOLVE:
905 if (gate->sa_family != AF_LINK ||
906 gate->sa_len < sizeof (null_sdl)) {
907 arpstat.invalidreqs++;
908 log(LOG_ERR, "%s: route to %s has bad gateway address "
909 "(sa_family %u sa_len %u) on %s\n",
910 __func__, inet_ntop(AF_INET,
911 &SIN(rt_key(rt))->sin_addr.s_addr, buf,
912 sizeof (buf)), gate->sa_family, gate->sa_len,
913 if_name(rt->rt_ifp));
914 break;
915 }
916 SDL(gate)->sdl_type = rt->rt_ifp->if_type;
917 SDL(gate)->sdl_index = rt->rt_ifp->if_index;
918
919 if (la != NULL)
920 break; /* This happens on a route change */
921
922 /*
923 * Case 2: This route may come from cloning, or a manual route
924 * add with a LL address.
925 */
926 rt->rt_llinfo = la = arp_llinfo_alloc(M_WAITOK);
927 if (la == NULL) {
928 arpstat.reqnobufs++;
929 break;
930 }
931 rt->rt_llinfo_get_ri = arp_llinfo_get_ri;
932 rt->rt_llinfo_get_iflri = arp_llinfo_get_iflri;
933 rt->rt_llinfo_purge = arp_llinfo_purge;
934 rt->rt_llinfo_free = arp_llinfo_free;
935 rt->rt_llinfo_refresh = arp_llinfo_refresh;
936 rt->rt_flags |= RTF_LLINFO;
937 la->la_rt = rt;
938 LIST_INSERT_HEAD(&llinfo_arp, la, la_le);
939 arpstat.inuse++;
940
941 /* We have at least one entry; arm the timer if not already */
942 arp_sched_timeout(NULL);
943
944 /*
945 * This keeps the multicast addresses from showing up
946 * in `arp -a' listings as unresolved. It's not actually
947 * functional. Then the same for broadcast. For IPv4
948 * link-local address, keep the entry around even after
949 * it has expired.
950 */
951 if (IN_MULTICAST(ntohl(SIN(rt_key(rt))->sin_addr.s_addr))) {
952 RT_UNLOCK(rt);
953 dlil_resolve_multi(rt->rt_ifp, rt_key(rt), gate,
954 sizeof (struct sockaddr_dl));
955 RT_LOCK(rt);
956 rt_setexpire(rt, 0);
957 } else if (in_broadcast(SIN(rt_key(rt))->sin_addr,
958 rt->rt_ifp)) {
959 struct sockaddr_dl *gate_ll = SDL(gate);
960 size_t broadcast_len;
961 ifnet_llbroadcast_copy_bytes(rt->rt_ifp,
962 LLADDR(gate_ll), sizeof (gate_ll->sdl_data),
963 &broadcast_len);
964 gate_ll->sdl_alen = broadcast_len;
965 gate_ll->sdl_family = AF_LINK;
966 gate_ll->sdl_len = sizeof (struct sockaddr_dl);
967 /* In case we're called before 1.0 sec. has elapsed */
968 rt_setexpire(rt, MAX(timenow, 1));
969 } else if (IN_LINKLOCAL(ntohl(SIN(rt_key(rt))->
970 sin_addr.s_addr))) {
971 rt->rt_flags |= RTF_STATIC;
972 }
973
974 /* Set default maximum number of retries */
975 la->la_maxtries = arp_maxtries;
976
977 /* Become a regular mutex, just in case */
978 RT_CONVERT_LOCK(rt);
979 IFA_LOCK_SPIN(rt->rt_ifa);
980 if (SIN(rt_key(rt))->sin_addr.s_addr ==
981 (IA_SIN(rt->rt_ifa))->sin_addr.s_addr) {
982 IFA_UNLOCK(rt->rt_ifa);
983 /*
984 * This test used to be
985 * if (loif.if_flags & IFF_UP)
986 * It allowed local traffic to be forced through the
987 * hardware by configuring the loopback down. However,
988 * it causes problems during network configuration
989 * for boards that can't receive packets they send.
990 * It is now necessary to clear "useloopback" and
991 * remove the route to force traffic out to the
992 * hardware.
993 */
994 rt_setexpire(rt, 0);
995 ifnet_lladdr_copy_bytes(rt->rt_ifp, LLADDR(SDL(gate)),
996 SDL(gate)->sdl_alen = rt->rt_ifp->if_addrlen);
997 if (useloopback) {
998 if (rt->rt_ifp != lo_ifp) {
999 /*
1000 * Purge any link-layer info caching.
1001 */
1002 if (rt->rt_llinfo_purge != NULL)
1003 rt->rt_llinfo_purge(rt);
1004
1005 /*
1006 * Adjust route ref count for the
1007 * interfaces.
1008 */
1009 if (rt->rt_if_ref_fn != NULL) {
1010 rt->rt_if_ref_fn(lo_ifp, 1);
1011 rt->rt_if_ref_fn(rt->rt_ifp, -1);
1012 }
1013 }
1014 rt->rt_ifp = lo_ifp;
1015 /*
1016 * If rmx_mtu is not locked, update it
1017 * to the MTU used by the new interface.
1018 */
1019 if (!(rt->rt_rmx.rmx_locks & RTV_MTU))
1020 rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
1021 }
1022 } else {
1023 IFA_UNLOCK(rt->rt_ifa);
1024 }
1025 break;
1026
1027 case RTM_DELETE:
1028 if (la == NULL)
1029 break;
1030 /*
1031 * Unchain it but defer the actual freeing until the route
1032 * itself is to be freed. rt->rt_llinfo still points to
1033 * llinfo_arp, and likewise, la->la_rt still points to this
1034 * route entry, except that RTF_LLINFO is now cleared.
1035 */
1036 LIST_REMOVE(la, la_le);
1037 la->la_le.le_next = NULL;
1038 la->la_le.le_prev = NULL;
1039 arpstat.inuse--;
1040
1041 /*
1042 * Purge any link-layer info caching.
1043 */
1044 if (rt->rt_llinfo_purge != NULL)
1045 rt->rt_llinfo_purge(rt);
1046
1047 rt->rt_flags &= ~RTF_LLINFO;
1048 (void) arp_llinfo_flushq(la);
1049 }
1050 }
1051
1052 /*
1053 * convert hardware address to hex string for logging errors.
1054 */
1055 static const char *
1056 sdl_addr_to_hex(const struct sockaddr_dl *sdl, char *orig_buf, int buflen)
1057 {
1058 char *buf = orig_buf;
1059 int i;
1060 const u_char *lladdr = (u_char *)(size_t)sdl->sdl_data;
1061 int maxbytes = buflen / 3;
1062
1063 if (maxbytes > sdl->sdl_alen) {
1064 maxbytes = sdl->sdl_alen;
1065 }
1066 *buf = '\0';
1067 for (i = 0; i < maxbytes; i++) {
1068 snprintf(buf, 3, "%02x", lladdr[i]);
1069 buf += 2;
1070 *buf = (i == maxbytes - 1) ? '\0' : ':';
1071 buf++;
1072 }
1073 return (orig_buf);
1074 }
1075
1076 /*
1077 * arp_lookup_route will lookup the route for a given address.
1078 *
1079 * The address must be for a host on a local network on this interface.
1080 * If the returned route is non-NULL, the route is locked and the caller
1081 * is responsible for unlocking it and releasing its reference.
1082 */
1083 static errno_t
1084 arp_lookup_route(const struct in_addr *addr, int create, int proxy,
1085 route_t *route, unsigned int ifscope)
1086 {
1087 struct sockaddr_inarp sin =
1088 { sizeof (sin), AF_INET, 0, { 0 }, { 0 }, 0, 0 };
1089 const char *why = NULL;
1090 errno_t error = 0;
1091 route_t rt;
1092
1093 *route = NULL;
1094
1095 sin.sin_addr.s_addr = addr->s_addr;
1096 sin.sin_other = proxy ? SIN_PROXY : 0;
1097
1098 /*
1099 * If the destination is a link-local address, don't
1100 * constrain the lookup (don't scope it).
1101 */
1102 if (IN_LINKLOCAL(ntohl(addr->s_addr)))
1103 ifscope = IFSCOPE_NONE;
1104
1105 rt = rtalloc1_scoped((struct sockaddr *)&sin, create, 0, ifscope);
1106 if (rt == NULL)
1107 return (ENETUNREACH);
1108
1109 RT_LOCK(rt);
1110
1111 if (rt->rt_flags & RTF_GATEWAY) {
1112 why = "host is not on local network";
1113 error = ENETUNREACH;
1114 } else if (!(rt->rt_flags & RTF_LLINFO)) {
1115 why = "could not allocate llinfo";
1116 error = ENOMEM;
1117 } else if (rt->rt_gateway->sa_family != AF_LINK) {
1118 why = "gateway route is not ours";
1119 error = EPROTONOSUPPORT;
1120 }
1121
1122 if (error != 0) {
1123 if (create && (arp_verbose || log_arp_warnings)) {
1124 char tmp[MAX_IPv4_STR_LEN];
1125 log(LOG_DEBUG, "%s: link#%d %s failed: %s\n",
1126 __func__, ifscope, inet_ntop(AF_INET, addr, tmp,
1127 sizeof (tmp)), why);
1128 }
1129
1130 /*
1131 * If there are no references to this route, and it is
1132 * a cloned route, and not static, and ARP had created
1133 * the route, then purge it from the routing table as
1134 * it is probably bogus.
1135 */
1136 if (rt->rt_refcnt == 1 &&
1137 (rt->rt_flags & (RTF_WASCLONED | RTF_STATIC)) ==
1138 RTF_WASCLONED) {
1139 /*
1140 * Prevent another thread from modiying rt_key,
1141 * rt_gateway via rt_setgate() after rt_lock is
1142 * dropped by marking the route as defunct.
1143 */
1144 rt->rt_flags |= RTF_CONDEMNED;
1145 RT_UNLOCK(rt);
1146 rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
1147 rt_mask(rt), rt->rt_flags, NULL);
1148 rtfree(rt);
1149 } else {
1150 RT_REMREF_LOCKED(rt);
1151 RT_UNLOCK(rt);
1152 }
1153 return (error);
1154 }
1155
1156 /*
1157 * Caller releases reference and does RT_UNLOCK(rt).
1158 */
1159 *route = rt;
1160 return (0);
1161 }
1162
1163 /*
1164 * This is the ARP pre-output routine; care must be taken to ensure that
1165 * the "hint" route never gets freed via rtfree(), since the caller may
1166 * have stored it inside a struct route with a reference held for that
1167 * placeholder.
1168 */
1169 errno_t
1170 arp_lookup_ip(ifnet_t ifp, const struct sockaddr_in *net_dest,
1171 struct sockaddr_dl *ll_dest, size_t ll_dest_len, route_t hint,
1172 mbuf_t packet)
1173 {
1174 route_t route = NULL; /* output route */
1175 errno_t result = 0;
1176 struct sockaddr_dl *gateway;
1177 struct llinfo_arp *llinfo = NULL;
1178 boolean_t usable, probing = FALSE;
1179 uint64_t timenow;
1180 struct if_llreach *lr;
1181 struct ifaddr *rt_ifa;
1182 struct sockaddr *sa;
1183 uint32_t rtflags;
1184 struct sockaddr_dl sdl;
1185
1186 if (ifp == NULL || net_dest == NULL)
1187 return (EINVAL);
1188
1189 if (net_dest->sin_family != AF_INET)
1190 return (EAFNOSUPPORT);
1191
1192 if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING))
1193 return (ENETDOWN);
1194
1195 /*
1196 * If we were given a route, verify the route and grab the gateway
1197 */
1198 if (hint != NULL) {
1199 /*
1200 * Callee holds a reference on the route and returns
1201 * with the route entry locked, upon success.
1202 */
1203 result = route_to_gwroute((const struct sockaddr *)
1204 net_dest, hint, &route);
1205 if (result != 0)
1206 return (result);
1207 if (route != NULL)
1208 RT_LOCK_ASSERT_HELD(route);
1209 }
1210
1211 if ((packet != NULL && (packet->m_flags & M_BCAST)) ||
1212 in_broadcast(net_dest->sin_addr, ifp)) {
1213 size_t broadcast_len;
1214 bzero(ll_dest, ll_dest_len);
1215 result = ifnet_llbroadcast_copy_bytes(ifp, LLADDR(ll_dest),
1216 ll_dest_len - offsetof(struct sockaddr_dl, sdl_data),
1217 &broadcast_len);
1218 if (result == 0) {
1219 ll_dest->sdl_alen = broadcast_len;
1220 ll_dest->sdl_family = AF_LINK;
1221 ll_dest->sdl_len = sizeof (struct sockaddr_dl);
1222 }
1223 goto release;
1224 }
1225 if ((packet != NULL && (packet->m_flags & M_MCAST)) ||
1226 ((ifp->if_flags & IFF_MULTICAST) &&
1227 IN_MULTICAST(ntohl(net_dest->sin_addr.s_addr)))) {
1228 if (route != NULL)
1229 RT_UNLOCK(route);
1230 result = dlil_resolve_multi(ifp,
1231 (const struct sockaddr *)net_dest,
1232 (struct sockaddr *)ll_dest, ll_dest_len);
1233 if (route != NULL)
1234 RT_LOCK(route);
1235 goto release;
1236 }
1237
1238 /*
1239 * If we didn't find a route, or the route doesn't have
1240 * link layer information, trigger the creation of the
1241 * route and link layer information.
1242 */
1243 if (route == NULL || route->rt_llinfo == NULL) {
1244 /* Clean up now while we can */
1245 if (route != NULL) {
1246 if (route == hint) {
1247 RT_REMREF_LOCKED(route);
1248 RT_UNLOCK(route);
1249 } else {
1250 RT_UNLOCK(route);
1251 rtfree(route);
1252 }
1253 }
1254 /*
1255 * Callee holds a reference on the route and returns
1256 * with the route entry locked, upon success.
1257 */
1258 result = arp_lookup_route(&net_dest->sin_addr, 1, 0, &route,
1259 ifp->if_index);
1260 if (result == 0)
1261 RT_LOCK_ASSERT_HELD(route);
1262 }
1263
1264 if (result || route == NULL || (llinfo = route->rt_llinfo) == NULL) {
1265 /* In case result is 0 but no route, return an error */
1266 if (result == 0)
1267 result = EHOSTUNREACH;
1268
1269 if (route != NULL && route->rt_llinfo == NULL) {
1270 char tmp[MAX_IPv4_STR_LEN];
1271 log(LOG_ERR, "%s: can't allocate llinfo for %s\n",
1272 __func__, inet_ntop(AF_INET, &net_dest->sin_addr,
1273 tmp, sizeof (tmp)));
1274 }
1275 goto release;
1276 }
1277
1278 /*
1279 * Now that we have the right route, is it filled in?
1280 */
1281 gateway = SDL(route->rt_gateway);
1282 timenow = net_uptime();
1283 VERIFY(route->rt_expire == 0 || route->rt_rmx.rmx_expire != 0);
1284 VERIFY(route->rt_expire != 0 || route->rt_rmx.rmx_expire == 0);
1285
1286 usable = ((route->rt_expire == 0 || route->rt_expire > timenow) &&
1287 gateway != NULL && gateway->sdl_family == AF_LINK &&
1288 gateway->sdl_alen != 0);
1289
1290 if (usable) {
1291 boolean_t unreachable = !arp_llreach_reachable(llinfo);
1292
1293 /* Entry is usable, so fill in info for caller */
1294 bcopy(gateway, ll_dest, MIN(gateway->sdl_len, ll_dest_len));
1295 result = 0;
1296 arp_llreach_use(llinfo); /* Mark use timestamp */
1297
1298 lr = llinfo->la_llreach;
1299 if (lr == NULL)
1300 goto release;
1301 rt_ifa = route->rt_ifa;
1302
1303 /* Become a regular mutex, just in case */
1304 RT_CONVERT_LOCK(route);
1305 IFLR_LOCK_SPIN(lr);
1306
1307 if ((unreachable || (llinfo->la_flags & LLINFO_PROBING)) &&
1308 lr->lr_probes < arp_unicast_lim) {
1309 /*
1310 * Thus mark the entry with la_probeexp deadline to
1311 * trigger the probe timer to be scheduled (if not
1312 * already). This gets cleared the moment we get
1313 * an ARP reply.
1314 */
1315 probing = TRUE;
1316 if (lr->lr_probes == 0) {
1317 llinfo->la_probeexp = (timenow + arpt_probe);
1318 llinfo->la_flags |= LLINFO_PROBING;
1319 }
1320
1321 /*
1322 * Start the unicast probe and anticipate a reply;
1323 * afterwards, return existing entry to caller and
1324 * let it be used anyway. If peer is non-existent
1325 * we'll broadcast ARP next time around.
1326 */
1327 lr->lr_probes++;
1328 bzero(&sdl, sizeof (sdl));
1329 sdl.sdl_alen = ifp->if_addrlen;
1330 bcopy(&lr->lr_key.addr, LLADDR(&sdl),
1331 ifp->if_addrlen);
1332 IFLR_UNLOCK(lr);
1333 IFA_LOCK_SPIN(rt_ifa);
1334 IFA_ADDREF_LOCKED(rt_ifa);
1335 sa = rt_ifa->ifa_addr;
1336 IFA_UNLOCK(rt_ifa);
1337 rtflags = route->rt_flags;
1338 RT_UNLOCK(route);
1339 dlil_send_arp(ifp, ARPOP_REQUEST, NULL, sa,
1340 (const struct sockaddr_dl *)&sdl,
1341 (const struct sockaddr *)net_dest, rtflags);
1342 IFA_REMREF(rt_ifa);
1343 RT_LOCK(route);
1344 goto release;
1345 } else {
1346 IFLR_UNLOCK(lr);
1347 if (!unreachable &&
1348 !(llinfo->la_flags & LLINFO_PROBING)) {
1349 /*
1350 * Normal case where peer is still reachable,
1351 * we're not probing and if_addrlen is anything
1352 * but IF_LLREACH_MAXLEN.
1353 */
1354 goto release;
1355 }
1356 }
1357 }
1358
1359 if (ifp->if_flags & IFF_NOARP) {
1360 result = ENOTSUP;
1361 goto release;
1362 }
1363
1364 /*
1365 * Route wasn't complete/valid; we need to send out ARP request.
1366 * If we've exceeded the limit of la_holdq, drop from the head
1367 * of queue and add this packet to the tail. If we end up with
1368 * RTF_REJECT below, we'll dequeue this from tail and have the
1369 * caller free the packet instead. It's safe to do that since
1370 * we still hold the route's rt_lock.
1371 */
1372 if (packet != NULL)
1373 arp_llinfo_addq(llinfo, packet);
1374
1375 /*
1376 * Regardless of permanent vs. expirable entry, we need to
1377 * avoid having packets sit in la_holdq forever; thus mark the
1378 * entry with la_probeexp deadline to trigger the probe timer
1379 * to be scheduled (if not already). This gets cleared the
1380 * moment we get an ARP reply.
1381 */
1382 probing = TRUE;
1383 if (qlen(&llinfo->la_holdq) == 1) {
1384 llinfo->la_probeexp = (timenow + arpt_probe);
1385 llinfo->la_flags |= LLINFO_PROBING;
1386 }
1387 if (route->rt_expire) {
1388 route->rt_flags &= ~RTF_REJECT;
1389 if (llinfo->la_asked == 0 || route->rt_expire != timenow) {
1390 rt_setexpire(route, timenow);
1391 if (llinfo->la_asked++ < llinfo->la_maxtries) {
1392 struct kev_msg ev_msg;
1393 struct kev_in_arpfailure in_arpfailure;
1394 boolean_t sendkev = FALSE;
1395
1396 rt_ifa = route->rt_ifa;
1397 lr = llinfo->la_llreach;
1398 /* Become a regular mutex, just in case */
1399 RT_CONVERT_LOCK(route);
1400 /* Update probe count, if applicable */
1401 if (lr != NULL) {
1402 IFLR_LOCK_SPIN(lr);
1403 lr->lr_probes++;
1404 IFLR_UNLOCK(lr);
1405 }
1406 if (ifp->if_addrlen == IF_LLREACH_MAXLEN &&
1407 route->rt_flags & RTF_ROUTER &&
1408 llinfo->la_asked > 1) {
1409 sendkev = TRUE;
1410 llinfo->la_flags |= LLINFO_RTRFAIL_EVTSENT;
1411 }
1412 IFA_LOCK_SPIN(rt_ifa);
1413 IFA_ADDREF_LOCKED(rt_ifa);
1414 sa = rt_ifa->ifa_addr;
1415 IFA_UNLOCK(rt_ifa);
1416 arp_llreach_use(llinfo); /* Mark use tstamp */
1417 rtflags = route->rt_flags;
1418 RT_UNLOCK(route);
1419 dlil_send_arp(ifp, ARPOP_REQUEST, NULL, sa,
1420 NULL, (const struct sockaddr *)net_dest,
1421 rtflags);
1422 IFA_REMREF(rt_ifa);
1423 if (sendkev) {
1424 bzero(&ev_msg, sizeof(ev_msg));
1425 bzero(&in_arpfailure,
1426 sizeof(in_arpfailure));
1427 in_arpfailure.link_data.if_family =
1428 ifp->if_family;
1429 in_arpfailure.link_data.if_unit =
1430 ifp->if_unit;
1431 strlcpy(in_arpfailure.link_data.if_name,
1432 ifp->if_name, IFNAMSIZ);
1433 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1434 ev_msg.kev_class = KEV_NETWORK_CLASS;
1435 ev_msg.kev_subclass = KEV_INET_SUBCLASS;
1436 ev_msg.event_code =
1437 KEV_INET_ARPRTRFAILURE;
1438 ev_msg.dv[0].data_ptr = &in_arpfailure;
1439 ev_msg.dv[0].data_length =
1440 sizeof(struct
1441 kev_in_arpfailure);
1442 dlil_post_complete_msg(NULL, &ev_msg);
1443 }
1444 result = EJUSTRETURN;
1445 RT_LOCK(route);
1446 goto release;
1447 } else {
1448 route->rt_flags |= RTF_REJECT;
1449 rt_setexpire(route,
1450 route->rt_expire + arpt_down);
1451 llinfo->la_asked = 0;
1452 /*
1453 * Remove the packet that was just added above;
1454 * don't free it since we're not returning
1455 * EJUSTRETURN. The caller will handle the
1456 * freeing. Since we haven't dropped rt_lock
1457 * from the time of _addq() above, this packet
1458 * must be at the tail.
1459 */
1460 if (packet != NULL) {
1461 struct mbuf *_m =
1462 _getq_tail(&llinfo->la_holdq);
1463 atomic_add_32(&arpstat.held, -1);
1464 VERIFY(_m == packet);
1465 }
1466 result = EHOSTUNREACH;
1467 goto release;
1468 }
1469 }
1470 }
1471
1472 /* The packet is now held inside la_holdq */
1473 result = EJUSTRETURN;
1474
1475 release:
1476 if (result == EHOSTUNREACH)
1477 atomic_add_32(&arpstat.dropped, 1);
1478
1479 if (route != NULL) {
1480 if (route == hint) {
1481 RT_REMREF_LOCKED(route);
1482 RT_UNLOCK(route);
1483 } else {
1484 RT_UNLOCK(route);
1485 rtfree(route);
1486 }
1487 }
1488 if (probing) {
1489 /* Do this after we drop rt_lock to preserve ordering */
1490 lck_mtx_lock(rnh_lock);
1491 arp_sched_probe(NULL);
1492 lck_mtx_unlock(rnh_lock);
1493 }
1494 return (result);
1495 }
1496
1497 errno_t
1498 arp_ip_handle_input(ifnet_t ifp, u_short arpop,
1499 const struct sockaddr_dl *sender_hw, const struct sockaddr_in *sender_ip,
1500 const struct sockaddr_in *target_ip)
1501 {
1502 char ipv4str[MAX_IPv4_STR_LEN];
1503 struct sockaddr_dl proxied;
1504 struct sockaddr_dl *gateway, *target_hw = NULL;
1505 struct ifaddr *ifa;
1506 struct in_ifaddr *ia;
1507 struct in_ifaddr *best_ia = NULL;
1508 struct sockaddr_in best_ia_sin;
1509 route_t route = NULL;
1510 char buf[3 * MAX_HW_LEN]; /* enough for MAX_HW_LEN byte hw address */
1511 struct llinfo_arp *llinfo;
1512 errno_t error;
1513 int created_announcement = 0;
1514 int bridged = 0, is_bridge = 0;
1515
1516 /*
1517 * Here and other places within this routine where we don't hold
1518 * rnh_lock, trade accuracy for speed for the common scenarios
1519 * and avoid the use of atomic updates.
1520 */
1521 arpstat.received++;
1522
1523 /* Do not respond to requests for 0.0.0.0 */
1524 if (target_ip->sin_addr.s_addr == INADDR_ANY && arpop == ARPOP_REQUEST)
1525 goto done;
1526
1527 if (ifp->if_bridge)
1528 bridged = 1;
1529 if (ifp->if_type == IFT_BRIDGE)
1530 is_bridge = 1;
1531
1532 if (arpop == ARPOP_REPLY)
1533 arpstat.rxreplies++;
1534
1535 /*
1536 * Determine if this ARP is for us
1537 * For a bridge, we want to check the address irrespective
1538 * of the receive interface.
1539 */
1540 lck_rw_lock_shared(in_ifaddr_rwlock);
1541 TAILQ_FOREACH(ia, INADDR_HASH(target_ip->sin_addr.s_addr), ia_hash) {
1542 IFA_LOCK_SPIN(&ia->ia_ifa);
1543 if (((bridged && ia->ia_ifp->if_bridge != NULL) ||
1544 (ia->ia_ifp == ifp)) &&
1545 ia->ia_addr.sin_addr.s_addr == target_ip->sin_addr.s_addr) {
1546 best_ia = ia;
1547 best_ia_sin = best_ia->ia_addr;
1548 IFA_ADDREF_LOCKED(&ia->ia_ifa);
1549 IFA_UNLOCK(&ia->ia_ifa);
1550 lck_rw_done(in_ifaddr_rwlock);
1551 goto match;
1552 }
1553 IFA_UNLOCK(&ia->ia_ifa);
1554 }
1555
1556 TAILQ_FOREACH(ia, INADDR_HASH(sender_ip->sin_addr.s_addr), ia_hash) {
1557 IFA_LOCK_SPIN(&ia->ia_ifa);
1558 if (((bridged && ia->ia_ifp->if_bridge != NULL) ||
1559 (ia->ia_ifp == ifp)) &&
1560 ia->ia_addr.sin_addr.s_addr == sender_ip->sin_addr.s_addr) {
1561 best_ia = ia;
1562 best_ia_sin = best_ia->ia_addr;
1563 IFA_ADDREF_LOCKED(&ia->ia_ifa);
1564 IFA_UNLOCK(&ia->ia_ifa);
1565 lck_rw_done(in_ifaddr_rwlock);
1566 goto match;
1567 }
1568 IFA_UNLOCK(&ia->ia_ifa);
1569 }
1570
1571 #define BDG_MEMBER_MATCHES_ARP(addr, ifp, ia) \
1572 (ia->ia_ifp->if_bridge == ifp->if_softc && \
1573 bcmp(IF_LLADDR(ia->ia_ifp), IF_LLADDR(ifp), ifp->if_addrlen) == 0 && \
1574 addr == ia->ia_addr.sin_addr.s_addr)
1575 /*
1576 * Check the case when bridge shares its MAC address with
1577 * some of its children, so packets are claimed by bridge
1578 * itself (bridge_input() does it first), but they are really
1579 * meant to be destined to the bridge member.
1580 */
1581 if (is_bridge) {
1582 TAILQ_FOREACH(ia, INADDR_HASH(target_ip->sin_addr.s_addr),
1583 ia_hash) {
1584 IFA_LOCK_SPIN(&ia->ia_ifa);
1585 if (BDG_MEMBER_MATCHES_ARP(target_ip->sin_addr.s_addr,
1586 ifp, ia)) {
1587 ifp = ia->ia_ifp;
1588 best_ia = ia;
1589 best_ia_sin = best_ia->ia_addr;
1590 IFA_ADDREF_LOCKED(&ia->ia_ifa);
1591 IFA_UNLOCK(&ia->ia_ifa);
1592 lck_rw_done(in_ifaddr_rwlock);
1593 goto match;
1594 }
1595 IFA_UNLOCK(&ia->ia_ifa);
1596 }
1597 }
1598 #undef BDG_MEMBER_MATCHES_ARP
1599 lck_rw_done(in_ifaddr_rwlock);
1600
1601 /*
1602 * No match, use the first inet address on the receive interface
1603 * as a dummy address for the rest of the function; we may be
1604 * proxying for another address.
1605 */
1606 ifnet_lock_shared(ifp);
1607 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1608 IFA_LOCK_SPIN(ifa);
1609 if (ifa->ifa_addr->sa_family != AF_INET) {
1610 IFA_UNLOCK(ifa);
1611 continue;
1612 }
1613 best_ia = (struct in_ifaddr *)ifa;
1614 best_ia_sin = best_ia->ia_addr;
1615 IFA_ADDREF_LOCKED(ifa);
1616 IFA_UNLOCK(ifa);
1617 ifnet_lock_done(ifp);
1618 goto match;
1619 }
1620 ifnet_lock_done(ifp);
1621
1622 /*
1623 * If we're not a bridge member, or if we are but there's no
1624 * IPv4 address to use for the interface, drop the packet.
1625 */
1626 if (!bridged || best_ia == NULL)
1627 goto done;
1628
1629 match:
1630 /* If the packet is from this interface, ignore the packet */
1631 if (bcmp(CONST_LLADDR(sender_hw), IF_LLADDR(ifp),
1632 sender_hw->sdl_alen) == 0)
1633 goto done;
1634
1635 /* Check for a conflict */
1636 if (!bridged &&
1637 sender_ip->sin_addr.s_addr == best_ia_sin.sin_addr.s_addr) {
1638 struct kev_msg ev_msg;
1639 struct kev_in_collision *in_collision;
1640 u_char storage[sizeof (struct kev_in_collision) + MAX_HW_LEN];
1641
1642 bzero(&ev_msg, sizeof (struct kev_msg));
1643 bzero(storage, (sizeof (struct kev_in_collision) + MAX_HW_LEN));
1644 in_collision = (struct kev_in_collision *)(void *)storage;
1645 log(LOG_ERR, "%s duplicate IP address %s sent from "
1646 "address %s\n", if_name(ifp),
1647 inet_ntop(AF_INET, &sender_ip->sin_addr, ipv4str,
1648 sizeof (ipv4str)), sdl_addr_to_hex(sender_hw, buf,
1649 sizeof (buf)));
1650
1651 /* Send a kernel event so anyone can learn of the conflict */
1652 in_collision->link_data.if_family = ifp->if_family;
1653 in_collision->link_data.if_unit = ifp->if_unit;
1654 strlcpy(&in_collision->link_data.if_name[0],
1655 ifp->if_name, IFNAMSIZ);
1656 in_collision->ia_ipaddr = sender_ip->sin_addr;
1657 in_collision->hw_len = (sender_hw->sdl_alen < MAX_HW_LEN) ?
1658 sender_hw->sdl_alen : MAX_HW_LEN;
1659 bcopy(CONST_LLADDR(sender_hw), (caddr_t)in_collision->hw_addr,
1660 in_collision->hw_len);
1661 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1662 ev_msg.kev_class = KEV_NETWORK_CLASS;
1663 ev_msg.kev_subclass = KEV_INET_SUBCLASS;
1664 ev_msg.event_code = KEV_INET_ARPCOLLISION;
1665 ev_msg.dv[0].data_ptr = in_collision;
1666 ev_msg.dv[0].data_length =
1667 sizeof (struct kev_in_collision) + in_collision->hw_len;
1668 ev_msg.dv[1].data_length = 0;
1669 dlil_post_complete_msg(NULL, &ev_msg);
1670 atomic_add_32(&arpstat.dupips, 1);
1671 goto respond;
1672 }
1673
1674 /*
1675 * Look up the routing entry. If it doesn't exist and we are the
1676 * target, and the sender isn't 0.0.0.0, go ahead and create one.
1677 * Callee holds a reference on the route and returns with the route
1678 * entry locked, upon success.
1679 */
1680 error = arp_lookup_route(&sender_ip->sin_addr,
1681 (target_ip->sin_addr.s_addr == best_ia_sin.sin_addr.s_addr &&
1682 sender_ip->sin_addr.s_addr != 0), 0, &route, ifp->if_index);
1683
1684 if (error == 0)
1685 RT_LOCK_ASSERT_HELD(route);
1686
1687 if (error || route == NULL || route->rt_gateway == NULL) {
1688 if (arpop != ARPOP_REQUEST)
1689 goto respond;
1690
1691 if (arp_sendllconflict && send_conflicting_probes != 0 &&
1692 (ifp->if_eflags & IFEF_ARPLL) &&
1693 IN_LINKLOCAL(ntohl(target_ip->sin_addr.s_addr)) &&
1694 sender_ip->sin_addr.s_addr == INADDR_ANY) {
1695 /*
1696 * Verify this ARP probe doesn't conflict with
1697 * an IPv4LL we know of on another interface.
1698 */
1699 if (route != NULL) {
1700 RT_REMREF_LOCKED(route);
1701 RT_UNLOCK(route);
1702 route = NULL;
1703 }
1704 /*
1705 * Callee holds a reference on the route and returns
1706 * with the route entry locked, upon success.
1707 */
1708 error = arp_lookup_route(&target_ip->sin_addr, 0, 0,
1709 &route, ifp->if_index);
1710
1711 if (error != 0 || route == NULL ||
1712 route->rt_gateway == NULL)
1713 goto respond;
1714
1715 RT_LOCK_ASSERT_HELD(route);
1716
1717 gateway = SDL(route->rt_gateway);
1718 if (route->rt_ifp != ifp && gateway->sdl_alen != 0 &&
1719 (gateway->sdl_alen != sender_hw->sdl_alen ||
1720 bcmp(CONST_LLADDR(gateway), CONST_LLADDR(sender_hw),
1721 gateway->sdl_alen) != 0)) {
1722 /*
1723 * A node is probing for an IPv4LL we know
1724 * exists on a different interface. We respond
1725 * with a conflicting probe to force the new
1726 * device to pick a different IPv4LL address.
1727 */
1728 if (arp_verbose || log_arp_warnings) {
1729 log(LOG_INFO, "arp: %s on %s sent "
1730 "probe for %s, already on %s\n",
1731 sdl_addr_to_hex(sender_hw, buf,
1732 sizeof (buf)), if_name(ifp),
1733 inet_ntop(AF_INET,
1734 &target_ip->sin_addr, ipv4str,
1735 sizeof (ipv4str)),
1736 if_name(route->rt_ifp));
1737 log(LOG_INFO, "arp: sending "
1738 "conflicting probe to %s on %s\n",
1739 sdl_addr_to_hex(sender_hw, buf,
1740 sizeof (buf)), if_name(ifp));
1741 }
1742 /* Mark use timestamp */
1743 if (route->rt_llinfo != NULL)
1744 arp_llreach_use(route->rt_llinfo);
1745 /* We're done with the route */
1746 RT_REMREF_LOCKED(route);
1747 RT_UNLOCK(route);
1748 route = NULL;
1749 /*
1750 * Send a conservative unicast "ARP probe".
1751 * This should force the other device to pick
1752 * a new number. This will not force the
1753 * device to pick a new number if the device
1754 * has already assigned that number. This will
1755 * not imply to the device that we own that
1756 * address. The link address is always
1757 * present; it's never freed.
1758 */
1759 ifnet_lock_shared(ifp);
1760 ifa = ifp->if_lladdr;
1761 IFA_ADDREF(ifa);
1762 ifnet_lock_done(ifp);
1763 dlil_send_arp_internal(ifp, ARPOP_REQUEST,
1764 SDL(ifa->ifa_addr),
1765 (const struct sockaddr *)sender_ip,
1766 sender_hw,
1767 (const struct sockaddr *)target_ip);
1768 IFA_REMREF(ifa);
1769 ifa = NULL;
1770 atomic_add_32(&arpstat.txconflicts, 1);
1771 }
1772 goto respond;
1773 } else if (keep_announcements != 0 &&
1774 target_ip->sin_addr.s_addr == sender_ip->sin_addr.s_addr) {
1775 /*
1776 * Don't create entry if link-local address and
1777 * link-local is disabled
1778 */
1779 if (!IN_LINKLOCAL(ntohl(sender_ip->sin_addr.s_addr)) ||
1780 (ifp->if_eflags & IFEF_ARPLL)) {
1781 if (route != NULL) {
1782 RT_REMREF_LOCKED(route);
1783 RT_UNLOCK(route);
1784 route = NULL;
1785 }
1786 /*
1787 * Callee holds a reference on the route and
1788 * returns with the route entry locked, upon
1789 * success.
1790 */
1791 error = arp_lookup_route(&sender_ip->sin_addr,
1792 1, 0, &route, ifp->if_index);
1793
1794 if (error == 0)
1795 RT_LOCK_ASSERT_HELD(route);
1796
1797 if (error == 0 && route != NULL &&
1798 route->rt_gateway != NULL)
1799 created_announcement = 1;
1800 }
1801 if (created_announcement == 0)
1802 goto respond;
1803 } else {
1804 goto respond;
1805 }
1806 }
1807
1808 RT_LOCK_ASSERT_HELD(route);
1809 VERIFY(route->rt_expire == 0 || route->rt_rmx.rmx_expire != 0);
1810 VERIFY(route->rt_expire != 0 || route->rt_rmx.rmx_expire == 0);
1811
1812 gateway = SDL(route->rt_gateway);
1813 if (!bridged && route->rt_ifp != ifp) {
1814 if (!IN_LINKLOCAL(ntohl(sender_ip->sin_addr.s_addr)) ||
1815 !(ifp->if_eflags & IFEF_ARPLL)) {
1816 if (arp_verbose || log_arp_warnings)
1817 log(LOG_ERR, "arp: %s is on %s but got "
1818 "reply from %s on %s\n",
1819 inet_ntop(AF_INET, &sender_ip->sin_addr,
1820 ipv4str, sizeof (ipv4str)),
1821 if_name(route->rt_ifp),
1822 sdl_addr_to_hex(sender_hw, buf,
1823 sizeof (buf)), if_name(ifp));
1824 goto respond;
1825 } else {
1826 /* Don't change a permanent address */
1827 if (route->rt_expire == 0)
1828 goto respond;
1829
1830 /*
1831 * We're about to check and/or change the route's ifp
1832 * and ifa, so do the lock dance: drop rt_lock, hold
1833 * rnh_lock and re-hold rt_lock to avoid violating the
1834 * lock ordering. We have an extra reference on the
1835 * route, so it won't go away while we do this.
1836 */
1837 RT_UNLOCK(route);
1838 lck_mtx_lock(rnh_lock);
1839 RT_LOCK(route);
1840 /*
1841 * Don't change the cloned route away from the
1842 * parent's interface if the address did resolve
1843 * or if the route is defunct. rt_ifp on both
1844 * the parent and the clone can now be freely
1845 * accessed now that we have acquired rnh_lock.
1846 */
1847 gateway = SDL(route->rt_gateway);
1848 if ((gateway->sdl_alen != 0 &&
1849 route->rt_parent != NULL &&
1850 route->rt_parent->rt_ifp == route->rt_ifp) ||
1851 (route->rt_flags & RTF_CONDEMNED)) {
1852 RT_REMREF_LOCKED(route);
1853 RT_UNLOCK(route);
1854 route = NULL;
1855 lck_mtx_unlock(rnh_lock);
1856 goto respond;
1857 }
1858 if (route->rt_ifp != ifp) {
1859 /*
1860 * Purge any link-layer info caching.
1861 */
1862 if (route->rt_llinfo_purge != NULL)
1863 route->rt_llinfo_purge(route);
1864
1865 /* Adjust route ref count for the interfaces */
1866 if (route->rt_if_ref_fn != NULL) {
1867 route->rt_if_ref_fn(ifp, 1);
1868 route->rt_if_ref_fn(route->rt_ifp, -1);
1869 }
1870 }
1871 /* Change the interface when the existing route is on */
1872 route->rt_ifp = ifp;
1873 /*
1874 * If rmx_mtu is not locked, update it
1875 * to the MTU used by the new interface.
1876 */
1877 if (!(route->rt_rmx.rmx_locks & RTV_MTU))
1878 route->rt_rmx.rmx_mtu = route->rt_ifp->if_mtu;
1879
1880 rtsetifa(route, &best_ia->ia_ifa);
1881 gateway->sdl_index = ifp->if_index;
1882 RT_UNLOCK(route);
1883 lck_mtx_unlock(rnh_lock);
1884 RT_LOCK(route);
1885 /* Don't bother if the route is down */
1886 if (!(route->rt_flags & RTF_UP))
1887 goto respond;
1888 /* Refresh gateway pointer */
1889 gateway = SDL(route->rt_gateway);
1890 }
1891 RT_LOCK_ASSERT_HELD(route);
1892 }
1893
1894 if (gateway->sdl_alen != 0 && bcmp(LLADDR(gateway),
1895 CONST_LLADDR(sender_hw), gateway->sdl_alen) != 0) {
1896 if (route->rt_expire != 0 &&
1897 (arp_verbose || log_arp_warnings)) {
1898 char buf2[3 * MAX_HW_LEN];
1899 log(LOG_INFO, "arp: %s moved from %s to %s on %s\n",
1900 inet_ntop(AF_INET, &sender_ip->sin_addr, ipv4str,
1901 sizeof (ipv4str)),
1902 sdl_addr_to_hex(gateway, buf, sizeof (buf)),
1903 sdl_addr_to_hex(sender_hw, buf2, sizeof (buf2)),
1904 if_name(ifp));
1905 } else if (route->rt_expire == 0) {
1906 if (arp_verbose || log_arp_warnings) {
1907 log(LOG_ERR, "arp: %s attempts to modify "
1908 "permanent entry for %s on %s\n",
1909 sdl_addr_to_hex(sender_hw, buf,
1910 sizeof (buf)),
1911 inet_ntop(AF_INET, &sender_ip->sin_addr,
1912 ipv4str, sizeof (ipv4str)),
1913 if_name(ifp));
1914 }
1915 goto respond;
1916 }
1917 }
1918
1919 /* Copy the sender hardware address in to the route's gateway address */
1920 gateway->sdl_alen = sender_hw->sdl_alen;
1921 bcopy(CONST_LLADDR(sender_hw), LLADDR(gateway), gateway->sdl_alen);
1922
1923 /* Update the expire time for the route and clear the reject flag */
1924 if (route->rt_expire != 0)
1925 rt_setexpire(route, net_uptime() + arpt_keep);
1926 route->rt_flags &= ~RTF_REJECT;
1927
1928 /* cache the gateway (sender HW) address */
1929 arp_llreach_alloc(route, ifp, LLADDR(gateway), gateway->sdl_alen,
1930 (arpop == ARPOP_REPLY));
1931
1932 llinfo = route->rt_llinfo;
1933 /* send a notification that the route is back up */
1934 if (ifp->if_addrlen == IF_LLREACH_MAXLEN &&
1935 route->rt_flags & RTF_ROUTER &&
1936 llinfo->la_flags & LLINFO_RTRFAIL_EVTSENT) {
1937 struct kev_msg ev_msg;
1938 struct kev_in_arpalive in_arpalive;
1939
1940 llinfo->la_flags &= ~LLINFO_RTRFAIL_EVTSENT;
1941 RT_UNLOCK(route);
1942 bzero(&ev_msg, sizeof(ev_msg));
1943 bzero(&in_arpalive, sizeof(in_arpalive));
1944 in_arpalive.link_data.if_family = ifp->if_family;
1945 in_arpalive.link_data.if_unit = ifp->if_unit;
1946 strlcpy(in_arpalive.link_data.if_name, ifp->if_name, IFNAMSIZ);
1947 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1948 ev_msg.kev_class = KEV_NETWORK_CLASS;
1949 ev_msg.kev_subclass = KEV_INET_SUBCLASS;
1950 ev_msg.event_code = KEV_INET_ARPRTRALIVE;
1951 ev_msg.dv[0].data_ptr = &in_arpalive;
1952 ev_msg.dv[0].data_length = sizeof(struct kev_in_arpalive);
1953 dlil_post_complete_msg(NULL, &ev_msg);
1954 RT_LOCK(route);
1955 }
1956 /* Update the llinfo, send out all queued packets at once */
1957 llinfo->la_asked = 0;
1958 llinfo->la_flags &= ~LLINFO_PROBING;
1959 if (!qempty(&llinfo->la_holdq)) {
1960 uint32_t held;
1961 struct mbuf *m0 =
1962 _getq_all(&llinfo->la_holdq, NULL, &held, NULL);
1963 if (arp_verbose) {
1964 log(LOG_DEBUG, "%s: sending %u held packets\n",
1965 __func__, held);
1966 }
1967 atomic_add_32(&arpstat.held, -held);
1968 VERIFY(qempty(&llinfo->la_holdq));
1969 RT_UNLOCK(route);
1970 dlil_output(ifp, PF_INET, m0, (caddr_t)route,
1971 rt_key(route), 0, NULL);
1972 RT_REMREF(route);
1973 route = NULL;
1974 }
1975
1976
1977 respond:
1978 if (route != NULL) {
1979 /* Mark use timestamp if we're going to send a reply */
1980 if (arpop == ARPOP_REQUEST && route->rt_llinfo != NULL)
1981 arp_llreach_use(route->rt_llinfo);
1982 RT_REMREF_LOCKED(route);
1983 RT_UNLOCK(route);
1984 route = NULL;
1985 }
1986
1987 if (arpop != ARPOP_REQUEST)
1988 goto done;
1989
1990 /* See comments at the beginning of this routine */
1991 arpstat.rxrequests++;
1992
1993 /* If we are not the target, check if we should proxy */
1994 if (target_ip->sin_addr.s_addr != best_ia_sin.sin_addr.s_addr) {
1995 /*
1996 * Find a proxy route; callee holds a reference on the
1997 * route and returns with the route entry locked, upon
1998 * success.
1999 */
2000 error = arp_lookup_route(&target_ip->sin_addr, 0, SIN_PROXY,
2001 &route, ifp->if_index);
2002
2003 if (error == 0) {
2004 RT_LOCK_ASSERT_HELD(route);
2005 /*
2006 * Return proxied ARP replies only on the interface
2007 * or bridge cluster where this network resides.
2008 * Otherwise we may conflict with the host we are
2009 * proxying for.
2010 */
2011 if (route->rt_ifp != ifp &&
2012 (route->rt_ifp->if_bridge != ifp->if_bridge ||
2013 ifp->if_bridge == NULL)) {
2014 RT_REMREF_LOCKED(route);
2015 RT_UNLOCK(route);
2016 goto done;
2017 }
2018 proxied = *SDL(route->rt_gateway);
2019 target_hw = &proxied;
2020 } else {
2021 /*
2022 * We don't have a route entry indicating we should
2023 * use proxy. If we aren't supposed to proxy all,
2024 * we are done.
2025 */
2026 if (!arp_proxyall)
2027 goto done;
2028
2029 /*
2030 * See if we have a route to the target ip before
2031 * we proxy it.
2032 */
2033 route = rtalloc1_scoped((struct sockaddr *)
2034 (size_t)target_ip, 0, 0, ifp->if_index);
2035 if (!route)
2036 goto done;
2037
2038 /*
2039 * Don't proxy for hosts already on the same interface.
2040 */
2041 RT_LOCK(route);
2042 if (route->rt_ifp == ifp) {
2043 RT_UNLOCK(route);
2044 rtfree(route);
2045 goto done;
2046 }
2047 }
2048 /* Mark use timestamp */
2049 if (route->rt_llinfo != NULL)
2050 arp_llreach_use(route->rt_llinfo);
2051 RT_REMREF_LOCKED(route);
2052 RT_UNLOCK(route);
2053 }
2054
2055 dlil_send_arp(ifp, ARPOP_REPLY,
2056 target_hw, (const struct sockaddr *)target_ip,
2057 sender_hw, (const struct sockaddr *)sender_ip, 0);
2058
2059 done:
2060 if (best_ia != NULL)
2061 IFA_REMREF(&best_ia->ia_ifa);
2062 return (0);
2063 }
2064
2065 void
2066 arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
2067 {
2068 struct sockaddr *sa;
2069
2070 IFA_LOCK(ifa);
2071 ifa->ifa_rtrequest = arp_rtrequest;
2072 ifa->ifa_flags |= RTF_CLONING;
2073 sa = ifa->ifa_addr;
2074 IFA_UNLOCK(ifa);
2075 dlil_send_arp(ifp, ARPOP_REQUEST, NULL, sa, NULL, sa, 0);
2076 }
2077
2078 static int
2079 arp_getstat SYSCTL_HANDLER_ARGS
2080 {
2081 #pragma unused(oidp, arg1, arg2)
2082 if (req->oldptr == USER_ADDR_NULL)
2083 req->oldlen = (size_t)sizeof (struct arpstat);
2084
2085 return (SYSCTL_OUT(req, &arpstat, MIN(sizeof (arpstat), req->oldlen)));
2086 }