]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/in_arp.c
xnu-4570.71.2.tar.gz
[apple/xnu.git] / bsd / netinet / in_arp.c
1 /*
2 * Copyright (c) 2004-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 */
61
62 #include <kern/debug.h>
63 #include <netinet/in_arp.h>
64 #include <sys/types.h>
65 #include <sys/param.h>
66 #include <sys/kernel_types.h>
67 #include <sys/syslog.h>
68 #include <sys/systm.h>
69 #include <sys/time.h>
70 #include <sys/kernel.h>
71 #include <sys/mbuf.h>
72 #include <sys/sysctl.h>
73 #include <sys/mcache.h>
74 #include <sys/protosw.h>
75 #include <string.h>
76 #include <net/if_arp.h>
77 #include <net/if_dl.h>
78 #include <net/dlil.h>
79 #include <net/if_types.h>
80 #include <net/if_llreach.h>
81 #include <net/route.h>
82 #include <net/nwk_wq.h>
83
84 #include <netinet/if_ether.h>
85 #include <netinet/in_var.h>
86 #include <kern/zalloc.h>
87
88 #include <kern/thread.h>
89 #include <kern/sched_prim.h>
90
91 #define CONST_LLADDR(s) ((const u_char*)((s)->sdl_data + (s)->sdl_nlen))
92
93 static const size_t MAX_HW_LEN = 10;
94
95 /*
96 * Synchronization notes:
97 *
98 * The global list of ARP entries are stored in llinfo_arp; an entry
99 * gets inserted into the list when the route is created and gets
100 * removed from the list when it is deleted; this is done as part
101 * of RTM_ADD/RTM_RESOLVE/RTM_DELETE in arp_rtrequest().
102 *
103 * Because rnh_lock and rt_lock for the entry are held during those
104 * operations, the same locks (and thus lock ordering) must be used
105 * elsewhere to access the relevant data structure fields:
106 *
107 * la_le.{le_next,le_prev}, la_rt
108 *
109 * - Routing lock (rnh_lock)
110 *
111 * la_holdq, la_asked, la_llreach, la_lastused, la_flags
112 *
113 * - Routing entry lock (rt_lock)
114 *
115 * Due to the dependency on rt_lock, llinfo_arp has the same lifetime
116 * as the route entry itself. When a route is deleted (RTM_DELETE),
117 * it is simply removed from the global list but the memory is not
118 * freed until the route itself is freed.
119 */
120 struct llinfo_arp {
121 /*
122 * The following are protected by rnh_lock
123 */
124 LIST_ENTRY(llinfo_arp) la_le;
125 struct rtentry *la_rt;
126 /*
127 * The following are protected by rt_lock
128 */
129 class_queue_t la_holdq; /* packets awaiting resolution */
130 struct if_llreach *la_llreach; /* link-layer reachability record */
131 u_int64_t la_lastused; /* last used timestamp */
132 u_int32_t la_asked; /* # of requests sent */
133 u_int32_t la_maxtries; /* retry limit */
134 u_int64_t la_probeexp; /* probe deadline timestamp */
135 u_int32_t la_prbreq_cnt; /* probe request count */
136 u_int32_t la_flags;
137 #define LLINFO_RTRFAIL_EVTSENT 0x1 /* sent an ARP event */
138 #define LLINFO_PROBING 0x2 /* waiting for an ARP reply */
139 };
140
141 static LIST_HEAD(, llinfo_arp) llinfo_arp;
142
143 static thread_call_t arp_timeout_tcall;
144 static int arp_timeout_run; /* arp_timeout is scheduled to run */
145 static void arp_timeout(thread_call_param_t arg0, thread_call_param_t arg1);
146 static void arp_sched_timeout(struct timeval *);
147
148 static thread_call_t arp_probe_tcall;
149 static int arp_probe_run; /* arp_probe is scheduled to run */
150 static void arp_probe(thread_call_param_t arg0, thread_call_param_t arg1);
151 static void arp_sched_probe(struct timeval *);
152
153 static void arptfree(struct llinfo_arp *, void *);
154 static errno_t arp_lookup_route(const struct in_addr *, int,
155 int, route_t *, unsigned int);
156 static int arp_getstat SYSCTL_HANDLER_ARGS;
157
158 static struct llinfo_arp *arp_llinfo_alloc(int);
159 static void arp_llinfo_free(void *);
160 static uint32_t arp_llinfo_flushq(struct llinfo_arp *);
161 static void arp_llinfo_purge(struct rtentry *);
162 static void arp_llinfo_get_ri(struct rtentry *, struct rt_reach_info *);
163 static void arp_llinfo_get_iflri(struct rtentry *, struct ifnet_llreach_info *);
164 static void arp_llinfo_refresh(struct rtentry *);
165
166 static __inline void arp_llreach_use(struct llinfo_arp *);
167 static __inline int arp_llreach_reachable(struct llinfo_arp *);
168 static void arp_llreach_alloc(struct rtentry *, struct ifnet *, void *,
169 unsigned int, boolean_t, uint32_t *);
170
171 extern int tvtohz(struct timeval *);
172
173 static int arpinit_done;
174
175 SYSCTL_DECL(_net_link_ether);
176 SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "");
177
178 static int arpt_prune = (5*60*1); /* walk list every 5 minutes */
179 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, prune_intvl,
180 CTLFLAG_RW | CTLFLAG_LOCKED, &arpt_prune, 0, "");
181
182 #define ARP_PROBE_TIME 7 /* seconds */
183 static u_int32_t arpt_probe = ARP_PROBE_TIME;
184 SYSCTL_UINT(_net_link_ether_inet, OID_AUTO, probe_intvl,
185 CTLFLAG_RW | CTLFLAG_LOCKED, &arpt_probe, 0, "");
186
187 static int arpt_keep = (20*60); /* once resolved, good for 20 more minutes */
188 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_age,
189 CTLFLAG_RW | CTLFLAG_LOCKED, &arpt_keep, 0, "");
190
191 static int arpt_down = 20; /* once declared down, don't send for 20 sec */
192 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, host_down_time,
193 CTLFLAG_RW | CTLFLAG_LOCKED, &arpt_down, 0, "");
194
195 static int arp_llreach_base = 120; /* seconds */
196 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, arp_llreach_base,
197 CTLFLAG_RW | CTLFLAG_LOCKED, &arp_llreach_base, 0,
198 "default ARP link-layer reachability max lifetime (in seconds)");
199
200 #define ARP_UNICAST_LIMIT 3 /* # of probes until ARP refresh broadcast */
201 static u_int32_t arp_unicast_lim = ARP_UNICAST_LIMIT;
202 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, arp_unicast_lim,
203 CTLFLAG_RW | CTLFLAG_LOCKED, &arp_unicast_lim, ARP_UNICAST_LIMIT,
204 "number of unicast ARP refresh probes before using broadcast");
205
206 static u_int32_t arp_maxtries = 5;
207 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxtries,
208 CTLFLAG_RW | CTLFLAG_LOCKED, &arp_maxtries, 0, "");
209
210 static u_int32_t arp_maxhold = 16;
211 SYSCTL_UINT(_net_link_ether_inet, OID_AUTO, maxhold,
212 CTLFLAG_RW | CTLFLAG_LOCKED, &arp_maxhold, 0, "");
213
214 static int useloopback = 1; /* use loopback interface for local traffic */
215 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, useloopback,
216 CTLFLAG_RW | CTLFLAG_LOCKED, &useloopback, 0, "");
217
218 static int arp_proxyall = 0;
219 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, proxyall,
220 CTLFLAG_RW | CTLFLAG_LOCKED, &arp_proxyall, 0, "");
221
222 static int arp_sendllconflict = 0;
223 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, sendllconflict,
224 CTLFLAG_RW | CTLFLAG_LOCKED, &arp_sendllconflict, 0, "");
225
226 static int log_arp_warnings = 0; /* Thread safe: no accumulated state */
227 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_warnings,
228 CTLFLAG_RW | CTLFLAG_LOCKED,
229 &log_arp_warnings, 0,
230 "log arp warning messages");
231
232 static int keep_announcements = 1; /* Thread safe: no aging of state */
233 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, keep_announcements,
234 CTLFLAG_RW | CTLFLAG_LOCKED,
235 &keep_announcements, 0,
236 "keep arp announcements");
237
238 static int send_conflicting_probes = 1; /* Thread safe: no accumulated state */
239 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, send_conflicting_probes,
240 CTLFLAG_RW | CTLFLAG_LOCKED,
241 &send_conflicting_probes, 0,
242 "send conflicting link-local arp probes");
243
244 static int arp_verbose;
245 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, verbose,
246 CTLFLAG_RW | CTLFLAG_LOCKED, &arp_verbose, 0, "");
247
248 /*
249 * Generally protected by rnh_lock; use atomic operations on fields
250 * that are also modified outside of that lock (if needed).
251 */
252 struct arpstat arpstat __attribute__((aligned(sizeof (uint64_t))));
253 SYSCTL_PROC(_net_link_ether_inet, OID_AUTO, stats,
254 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
255 0, 0, arp_getstat, "S,arpstat",
256 "ARP statistics (struct arpstat, net/if_arp.h)");
257
258 static struct zone *llinfo_arp_zone;
259 #define LLINFO_ARP_ZONE_MAX 256 /* maximum elements in zone */
260 #define LLINFO_ARP_ZONE_NAME "llinfo_arp" /* name for zone */
261
262 void
263 arp_init(void)
264 {
265 VERIFY(!arpinit_done);
266
267 LIST_INIT(&llinfo_arp);
268
269 llinfo_arp_zone = zinit(sizeof (struct llinfo_arp),
270 LLINFO_ARP_ZONE_MAX * sizeof (struct llinfo_arp), 0,
271 LLINFO_ARP_ZONE_NAME);
272 if (llinfo_arp_zone == NULL)
273 panic("%s: failed allocating llinfo_arp_zone", __func__);
274
275 zone_change(llinfo_arp_zone, Z_EXPAND, TRUE);
276 zone_change(llinfo_arp_zone, Z_CALLERACCT, FALSE);
277
278 arpinit_done = 1;
279 }
280
281 static struct llinfo_arp *
282 arp_llinfo_alloc(int how)
283 {
284 struct llinfo_arp *la;
285
286 la = (how == M_WAITOK) ? zalloc(llinfo_arp_zone) :
287 zalloc_noblock(llinfo_arp_zone);
288 if (la != NULL) {
289 bzero(la, sizeof (*la));
290 /*
291 * The type of queue (Q_DROPHEAD) here is just a hint;
292 * the actual logic that works on this queue performs
293 * a head drop, details in arp_llinfo_addq().
294 */
295 _qinit(&la->la_holdq, Q_DROPHEAD, (arp_maxhold == 0) ?
296 (uint32_t)-1 : arp_maxhold, QP_MBUF);
297 }
298
299 return (la);
300 }
301
302 static void
303 arp_llinfo_free(void *arg)
304 {
305 struct llinfo_arp *la = arg;
306
307 if (la->la_le.le_next != NULL || la->la_le.le_prev != NULL) {
308 panic("%s: trying to free %p when it is in use", __func__, la);
309 /* NOTREACHED */
310 }
311
312 /* Free any held packets */
313 (void) arp_llinfo_flushq(la);
314
315 /* Purge any link-layer info caching */
316 VERIFY(la->la_rt->rt_llinfo == la);
317 if (la->la_rt->rt_llinfo_purge != NULL)
318 la->la_rt->rt_llinfo_purge(la->la_rt);
319
320 zfree(llinfo_arp_zone, la);
321 }
322
323 static void
324 arp_llinfo_addq(struct llinfo_arp *la, struct mbuf *m)
325 {
326 if (qlen(&la->la_holdq) >= qlimit(&la->la_holdq)) {
327 struct mbuf *_m;
328 /* prune less than CTL, else take what's at the head */
329 _m = _getq_scidx_lt(&la->la_holdq, SCIDX_CTL);
330 if (_m == NULL)
331 _m = _getq(&la->la_holdq);
332 VERIFY(_m != NULL);
333 if (arp_verbose) {
334 log(LOG_DEBUG, "%s: dropping packet (scidx %u)\n",
335 __func__, MBUF_SCIDX(mbuf_get_service_class(_m)));
336 }
337 m_freem(_m);
338 atomic_add_32(&arpstat.dropped, 1);
339 atomic_add_32(&arpstat.held, -1);
340 }
341 _addq(&la->la_holdq, m);
342 atomic_add_32(&arpstat.held, 1);
343 if (arp_verbose) {
344 log(LOG_DEBUG, "%s: enqueued packet (scidx %u), qlen now %u\n",
345 __func__, MBUF_SCIDX(mbuf_get_service_class(m)),
346 qlen(&la->la_holdq));
347 }
348 }
349
350 static uint32_t
351 arp_llinfo_flushq(struct llinfo_arp *la)
352 {
353 uint32_t held = qlen(&la->la_holdq);
354
355 if (held != 0) {
356 atomic_add_32(&arpstat.purged, held);
357 atomic_add_32(&arpstat.held, -held);
358 _flushq(&la->la_holdq);
359 }
360 la->la_prbreq_cnt = 0;
361 VERIFY(qempty(&la->la_holdq));
362 return (held);
363 }
364
365 static void
366 arp_llinfo_purge(struct rtentry *rt)
367 {
368 struct llinfo_arp *la = rt->rt_llinfo;
369
370 RT_LOCK_ASSERT_HELD(rt);
371 VERIFY(rt->rt_llinfo_purge == arp_llinfo_purge && la != NULL);
372
373 if (la->la_llreach != NULL) {
374 RT_CONVERT_LOCK(rt);
375 ifnet_llreach_free(la->la_llreach);
376 la->la_llreach = NULL;
377 }
378 la->la_lastused = 0;
379 }
380
381 static void
382 arp_llinfo_get_ri(struct rtentry *rt, struct rt_reach_info *ri)
383 {
384 struct llinfo_arp *la = rt->rt_llinfo;
385 struct if_llreach *lr = la->la_llreach;
386
387 if (lr == NULL) {
388 bzero(ri, sizeof (*ri));
389 ri->ri_rssi = IFNET_RSSI_UNKNOWN;
390 ri->ri_lqm = IFNET_LQM_THRESH_OFF;
391 ri->ri_npm = IFNET_NPM_THRESH_UNKNOWN;
392 } else {
393 IFLR_LOCK(lr);
394 /* Export to rt_reach_info structure */
395 ifnet_lr2ri(lr, ri);
396 /* Export ARP send expiration (calendar) time */
397 ri->ri_snd_expire =
398 ifnet_llreach_up2calexp(lr, la->la_lastused);
399 IFLR_UNLOCK(lr);
400 }
401 }
402
403 static void
404 arp_llinfo_get_iflri(struct rtentry *rt, struct ifnet_llreach_info *iflri)
405 {
406 struct llinfo_arp *la = rt->rt_llinfo;
407 struct if_llreach *lr = la->la_llreach;
408
409 if (lr == NULL) {
410 bzero(iflri, sizeof (*iflri));
411 iflri->iflri_rssi = IFNET_RSSI_UNKNOWN;
412 iflri->iflri_lqm = IFNET_LQM_THRESH_OFF;
413 iflri->iflri_npm = IFNET_NPM_THRESH_UNKNOWN;
414 } else {
415 IFLR_LOCK(lr);
416 /* Export to ifnet_llreach_info structure */
417 ifnet_lr2iflri(lr, iflri);
418 /* Export ARP send expiration (uptime) time */
419 iflri->iflri_snd_expire =
420 ifnet_llreach_up2upexp(lr, la->la_lastused);
421 IFLR_UNLOCK(lr);
422 }
423 }
424
425 static void
426 arp_llinfo_refresh(struct rtentry *rt)
427 {
428 uint64_t timenow = net_uptime();
429 /*
430 * If route entry is permanent or if expiry is less
431 * than timenow and extra time taken for unicast probe
432 * we can't expedite the refresh
433 */
434 if ((rt->rt_expire == 0) ||
435 (rt->rt_flags & RTF_STATIC) ||
436 !(rt->rt_flags & RTF_LLINFO)) {
437 return;
438 }
439
440 if (rt->rt_expire > timenow)
441 rt->rt_expire = timenow;
442 return;
443 }
444
445 void
446 arp_llreach_set_reachable(struct ifnet *ifp, void *addr, unsigned int alen)
447 {
448 /* Nothing more to do if it's disabled */
449 if (arp_llreach_base == 0)
450 return;
451
452 ifnet_llreach_set_reachable(ifp, ETHERTYPE_IP, addr, alen);
453 }
454
455 static __inline void
456 arp_llreach_use(struct llinfo_arp *la)
457 {
458 if (la->la_llreach != NULL)
459 la->la_lastused = net_uptime();
460 }
461
462 static __inline int
463 arp_llreach_reachable(struct llinfo_arp *la)
464 {
465 struct if_llreach *lr;
466 const char *why = NULL;
467
468 /* Nothing more to do if it's disabled; pretend it's reachable */
469 if (arp_llreach_base == 0)
470 return (1);
471
472 if ((lr = la->la_llreach) == NULL) {
473 /*
474 * Link-layer reachability record isn't present for this
475 * ARP entry; pretend it's reachable and use it as is.
476 */
477 return (1);
478 } else if (ifnet_llreach_reachable(lr)) {
479 /*
480 * Record is present, it's not shared with other ARP
481 * entries and a packet has recently been received
482 * from the remote host; consider it reachable.
483 */
484 if (lr->lr_reqcnt == 1)
485 return (1);
486
487 /* Prime it up, if this is the first time */
488 if (la->la_lastused == 0) {
489 VERIFY(la->la_llreach != NULL);
490 arp_llreach_use(la);
491 }
492
493 /*
494 * Record is present and shared with one or more ARP
495 * entries, and a packet has recently been received
496 * from the remote host. Since it's shared by more
497 * than one IP addresses, we can't rely on the link-
498 * layer reachability alone; consider it reachable if
499 * this ARP entry has been used "recently."
500 */
501 if (ifnet_llreach_reachable_delta(lr, la->la_lastused))
502 return (1);
503
504 why = "has alias(es) and hasn't been used in a while";
505 } else {
506 why = "haven't heard from it in a while";
507 }
508
509 if (arp_verbose > 1) {
510 char tmp[MAX_IPv4_STR_LEN];
511 u_int64_t now = net_uptime();
512
513 log(LOG_DEBUG, "%s: ARP probe(s) needed for %s; "
514 "%s [lastused %lld, lastrcvd %lld] secs ago\n",
515 if_name(lr->lr_ifp), inet_ntop(AF_INET,
516 &SIN(rt_key(la->la_rt))->sin_addr, tmp, sizeof (tmp)), why,
517 (la->la_lastused ? (int64_t)(now - la->la_lastused) : -1),
518 (lr->lr_lastrcvd ? (int64_t)(now - lr->lr_lastrcvd) : -1));
519
520 }
521 return (0);
522 }
523
524 /*
525 * Obtain a link-layer source cache entry for the sender.
526 *
527 * NOTE: This is currently only for ARP/Ethernet.
528 */
529 static void
530 arp_llreach_alloc(struct rtentry *rt, struct ifnet *ifp, void *addr,
531 unsigned int alen, boolean_t solicited, uint32_t *p_rt_event_code)
532 {
533 VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0);
534 VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0);
535
536 if (arp_llreach_base != 0 && rt->rt_expire != 0 &&
537 !(rt->rt_ifp->if_flags & IFF_LOOPBACK) &&
538 ifp->if_addrlen == IF_LLREACH_MAXLEN && /* Ethernet */
539 alen == ifp->if_addrlen) {
540 struct llinfo_arp *la = rt->rt_llinfo;
541 struct if_llreach *lr;
542 const char *why = NULL, *type = "";
543
544 /* Become a regular mutex, just in case */
545 RT_CONVERT_LOCK(rt);
546
547 if ((lr = la->la_llreach) != NULL) {
548 type = (solicited ? "ARP reply" : "ARP announcement");
549 /*
550 * If target has changed, create a new record;
551 * otherwise keep existing record.
552 */
553 IFLR_LOCK(lr);
554 if (bcmp(addr, lr->lr_key.addr, alen) != 0) {
555 IFLR_UNLOCK(lr);
556 /* Purge any link-layer info caching */
557 VERIFY(rt->rt_llinfo_purge != NULL);
558 rt->rt_llinfo_purge(rt);
559 lr = NULL;
560 why = " for different target HW address; "
561 "using new llreach record";
562 *p_rt_event_code = ROUTE_LLENTRY_CHANGED;
563 } else {
564 /*
565 * If we were doing unicast probing, we need to
566 * deliver an event for neighbor cache resolution
567 */
568 if (lr->lr_probes != 0)
569 *p_rt_event_code = ROUTE_LLENTRY_RESOLVED;
570
571 lr->lr_probes = 0; /* reset probe count */
572 IFLR_UNLOCK(lr);
573 if (solicited) {
574 why = " for same target HW address; "
575 "keeping existing llreach record";
576 }
577 }
578 }
579
580 if (lr == NULL) {
581 lr = la->la_llreach = ifnet_llreach_alloc(ifp,
582 ETHERTYPE_IP, addr, alen, arp_llreach_base);
583 if (lr != NULL) {
584 lr->lr_probes = 0; /* reset probe count */
585 if (why == NULL)
586 why = "creating new llreach record";
587 }
588 *p_rt_event_code = ROUTE_LLENTRY_RESOLVED;
589 }
590
591 if (arp_verbose > 1 && lr != NULL && why != NULL) {
592 char tmp[MAX_IPv4_STR_LEN];
593
594 log(LOG_DEBUG, "%s: %s%s for %s\n", if_name(ifp),
595 type, why, inet_ntop(AF_INET,
596 &SIN(rt_key(rt))->sin_addr, tmp, sizeof (tmp)));
597 }
598 }
599 }
600
601 struct arptf_arg {
602 boolean_t draining;
603 boolean_t probing;
604 uint32_t killed;
605 uint32_t aging;
606 uint32_t sticky;
607 uint32_t found;
608 uint32_t qlen;
609 uint32_t qsize;
610 };
611
612 /*
613 * Free an arp entry.
614 */
615 static void
616 arptfree(struct llinfo_arp *la, void *arg)
617 {
618 struct arptf_arg *ap = arg;
619 struct rtentry *rt = la->la_rt;
620 uint64_t timenow;
621
622 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
623
624 /* rnh_lock acquired by caller protects rt from going away */
625 RT_LOCK(rt);
626
627 VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0);
628 VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0);
629
630 ap->found++;
631 timenow = net_uptime();
632
633 /* If we're probing, flush out held packets upon probe expiration */
634 if (ap->probing && (la->la_flags & LLINFO_PROBING) &&
635 la->la_probeexp <= timenow) {
636 struct sockaddr_dl *sdl = SDL(rt->rt_gateway);
637 if (sdl != NULL)
638 sdl->sdl_alen = 0;
639 (void) arp_llinfo_flushq(la);
640 /*
641 * Enqueue work item to invoke callback for this route entry
642 */
643 route_event_enqueue_nwk_wq_entry(rt, NULL,
644 ROUTE_LLENTRY_UNREACH, NULL, TRUE);
645 }
646
647 /*
648 * The following is mostly being used to arm the timer
649 * again and for logging.
650 * qlen is used to re-arm the timer. Therefore, pure probe
651 * requests can be considered as 0 length packets
652 * contributing only to length but not to the size.
653 */
654 ap->qlen += qlen(&la->la_holdq);
655 ap->qlen += la->la_prbreq_cnt;
656 ap->qsize += qsize(&la->la_holdq);
657
658 if (rt->rt_expire == 0 || (rt->rt_flags & RTF_STATIC)) {
659 ap->sticky++;
660 /* ARP entry is permanent? */
661 if (rt->rt_expire == 0) {
662 RT_UNLOCK(rt);
663 return;
664 }
665 }
666
667 /* ARP entry hasn't expired and we're not draining? */
668 if (!ap->draining && rt->rt_expire > timenow) {
669 RT_UNLOCK(rt);
670 ap->aging++;
671 return;
672 }
673
674 if (rt->rt_refcnt > 0) {
675 /*
676 * ARP entry has expired, with outstanding refcnt.
677 * If we're not draining, force ARP query to be
678 * generated next time this entry is used.
679 */
680 if (!ap->draining && !ap->probing) {
681 struct sockaddr_dl *sdl = SDL(rt->rt_gateway);
682 if (sdl != NULL)
683 sdl->sdl_alen = 0;
684 la->la_asked = 0;
685 rt->rt_flags &= ~RTF_REJECT;
686 }
687 RT_UNLOCK(rt);
688 } else if (!(rt->rt_flags & RTF_STATIC) && !ap->probing) {
689 /*
690 * ARP entry has no outstanding refcnt, and we're either
691 * draining or it has expired; delete it from the routing
692 * table. Safe to drop rt_lock and use rt_key, since holding
693 * rnh_lock here prevents another thread from calling
694 * rt_setgate() on this route.
695 */
696 RT_UNLOCK(rt);
697 rtrequest_locked(RTM_DELETE, rt_key(rt), NULL,
698 rt_mask(rt), 0, NULL);
699 arpstat.timeouts++;
700 ap->killed++;
701 } else {
702 /* ARP entry is static; let it linger */
703 RT_UNLOCK(rt);
704 }
705 }
706
707 void
708 in_arpdrain(void *arg)
709 {
710 #pragma unused(arg)
711 struct llinfo_arp *la, *ola;
712 struct arptf_arg farg;
713
714 if (arp_verbose)
715 log(LOG_DEBUG, "%s: draining ARP entries\n", __func__);
716
717 lck_mtx_lock(rnh_lock);
718 la = llinfo_arp.lh_first;
719 bzero(&farg, sizeof (farg));
720 farg.draining = TRUE;
721 while ((ola = la) != NULL) {
722 la = la->la_le.le_next;
723 arptfree(ola, &farg);
724 }
725 if (arp_verbose) {
726 log(LOG_DEBUG, "%s: found %u, aging %u, sticky %u, killed %u; "
727 "%u pkts held (%u bytes)\n", __func__, farg.found,
728 farg.aging, farg.sticky, farg.killed, farg.qlen,
729 farg.qsize);
730 }
731 lck_mtx_unlock(rnh_lock);
732 }
733
734 /*
735 * Timeout routine. Age arp_tab entries periodically.
736 */
737 static void
738 arp_timeout(thread_call_param_t arg0, thread_call_param_t arg1)
739 {
740 #pragma unused(arg0, arg1)
741 struct llinfo_arp *la, *ola;
742 struct timeval atv;
743 struct arptf_arg farg;
744
745 lck_mtx_lock(rnh_lock);
746 la = llinfo_arp.lh_first;
747 bzero(&farg, sizeof (farg));
748 while ((ola = la) != NULL) {
749 la = la->la_le.le_next;
750 arptfree(ola, &farg);
751 }
752 if (arp_verbose) {
753 log(LOG_DEBUG, "%s: found %u, aging %u, sticky %u, killed %u; "
754 "%u pkts held (%u bytes)\n", __func__, farg.found,
755 farg.aging, farg.sticky, farg.killed, farg.qlen,
756 farg.qsize);
757 }
758 atv.tv_usec = 0;
759 atv.tv_sec = MAX(arpt_prune, 5);
760 /* re-arm the timer if there's work to do */
761 arp_timeout_run = 0;
762 if (farg.aging > 0)
763 arp_sched_timeout(&atv);
764 else if (arp_verbose)
765 log(LOG_DEBUG, "%s: not rescheduling timer\n", __func__);
766 lck_mtx_unlock(rnh_lock);
767 }
768
769 static void
770 arp_sched_timeout(struct timeval *atv)
771 {
772 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
773
774 if (!arp_timeout_run) {
775 struct timeval tv;
776 uint64_t deadline = 0;
777
778 if (arp_timeout_tcall == NULL) {
779 arp_timeout_tcall =
780 thread_call_allocate(arp_timeout, NULL);
781 VERIFY(arp_timeout_tcall != NULL);
782 }
783
784 if (atv == NULL) {
785 tv.tv_usec = 0;
786 tv.tv_sec = MAX(arpt_prune / 5, 1);
787 atv = &tv;
788 }
789 if (arp_verbose) {
790 log(LOG_DEBUG, "%s: timer scheduled in "
791 "T+%llus.%lluu\n", __func__,
792 (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec);
793 }
794 arp_timeout_run = 1;
795
796 clock_deadline_for_periodic_event(atv->tv_sec * NSEC_PER_SEC,
797 mach_absolute_time(), &deadline);
798 (void) thread_call_enter_delayed(arp_timeout_tcall, deadline);
799 }
800 }
801
802 /*
803 * Probe routine.
804 */
805 static void
806 arp_probe(thread_call_param_t arg0, thread_call_param_t arg1)
807 {
808 #pragma unused(arg0, arg1)
809 struct llinfo_arp *la, *ola;
810 struct timeval atv;
811 struct arptf_arg farg;
812
813 lck_mtx_lock(rnh_lock);
814 la = llinfo_arp.lh_first;
815 bzero(&farg, sizeof (farg));
816 farg.probing = TRUE;
817 while ((ola = la) != NULL) {
818 la = la->la_le.le_next;
819 arptfree(ola, &farg);
820 }
821 if (arp_verbose) {
822 log(LOG_DEBUG, "%s: found %u, aging %u, sticky %u, killed %u; "
823 "%u pkts held (%u bytes)\n", __func__, farg.found,
824 farg.aging, farg.sticky, farg.killed, farg.qlen,
825 farg.qsize);
826 }
827 atv.tv_usec = 0;
828 atv.tv_sec = MAX(arpt_probe, ARP_PROBE_TIME);
829 /* re-arm the probe if there's work to do */
830 arp_probe_run = 0;
831 if (farg.qlen > 0)
832 arp_sched_probe(&atv);
833 else if (arp_verbose)
834 log(LOG_DEBUG, "%s: not rescheduling probe\n", __func__);
835 lck_mtx_unlock(rnh_lock);
836 }
837
838 static void
839 arp_sched_probe(struct timeval *atv)
840 {
841 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
842
843 if (!arp_probe_run) {
844 struct timeval tv;
845 uint64_t deadline = 0;
846
847 if (arp_probe_tcall == NULL) {
848 arp_probe_tcall =
849 thread_call_allocate(arp_probe, NULL);
850 VERIFY(arp_probe_tcall != NULL);
851 }
852
853 if (atv == NULL) {
854 tv.tv_usec = 0;
855 tv.tv_sec = MAX(arpt_probe, ARP_PROBE_TIME);
856 atv = &tv;
857 }
858 if (arp_verbose) {
859 log(LOG_DEBUG, "%s: probe scheduled in "
860 "T+%llus.%lluu\n", __func__,
861 (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec);
862 }
863 arp_probe_run = 1;
864
865 clock_deadline_for_periodic_event(atv->tv_sec * NSEC_PER_SEC,
866 mach_absolute_time(), &deadline);
867 (void) thread_call_enter_delayed(arp_probe_tcall, deadline);
868 }
869 }
870
871 /*
872 * ifa_rtrequest() callback
873 */
874 static void
875 arp_rtrequest(int req, struct rtentry *rt, struct sockaddr *sa)
876 {
877 #pragma unused(sa)
878 struct sockaddr *gate = rt->rt_gateway;
879 struct llinfo_arp *la = rt->rt_llinfo;
880 static struct sockaddr_dl null_sdl =
881 { .sdl_len = sizeof (null_sdl), .sdl_family = AF_LINK };
882 uint64_t timenow;
883 char buf[MAX_IPv4_STR_LEN];
884
885 VERIFY(arpinit_done);
886 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
887 RT_LOCK_ASSERT_HELD(rt);
888
889 if (rt->rt_flags & RTF_GATEWAY)
890 return;
891
892 timenow = net_uptime();
893 switch (req) {
894 case RTM_ADD:
895 /*
896 * XXX: If this is a manually added route to interface
897 * such as older version of routed or gated might provide,
898 * restore cloning bit.
899 */
900 if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != NULL &&
901 SIN(rt_mask(rt))->sin_addr.s_addr != INADDR_BROADCAST)
902 rt->rt_flags |= RTF_CLONING;
903
904 if (rt->rt_flags & RTF_CLONING) {
905 /*
906 * Case 1: This route should come from a route to iface.
907 */
908 if (rt_setgate(rt, rt_key(rt), SA(&null_sdl)) == 0) {
909 gate = rt->rt_gateway;
910 SDL(gate)->sdl_type = rt->rt_ifp->if_type;
911 SDL(gate)->sdl_index = rt->rt_ifp->if_index;
912 /*
913 * In case we're called before 1.0 sec.
914 * has elapsed.
915 */
916 rt_setexpire(rt, MAX(timenow, 1));
917 }
918 break;
919 }
920 /* Announce a new entry if requested. */
921 if (rt->rt_flags & RTF_ANNOUNCE) {
922 if (la != NULL)
923 arp_llreach_use(la); /* Mark use timestamp */
924 RT_UNLOCK(rt);
925 dlil_send_arp(rt->rt_ifp, ARPOP_REQUEST,
926 SDL(gate), rt_key(rt), NULL, rt_key(rt), 0);
927 RT_LOCK(rt);
928 arpstat.txannounces++;
929 }
930 /* FALLTHRU */
931 case RTM_RESOLVE:
932 if (gate->sa_family != AF_LINK ||
933 gate->sa_len < sizeof (null_sdl)) {
934 arpstat.invalidreqs++;
935 log(LOG_ERR, "%s: route to %s has bad gateway address "
936 "(sa_family %u sa_len %u) on %s\n",
937 __func__, inet_ntop(AF_INET,
938 &SIN(rt_key(rt))->sin_addr.s_addr, buf,
939 sizeof (buf)), gate->sa_family, gate->sa_len,
940 if_name(rt->rt_ifp));
941 break;
942 }
943 SDL(gate)->sdl_type = rt->rt_ifp->if_type;
944 SDL(gate)->sdl_index = rt->rt_ifp->if_index;
945
946 if (la != NULL)
947 break; /* This happens on a route change */
948
949 /*
950 * Case 2: This route may come from cloning, or a manual route
951 * add with a LL address.
952 */
953 rt->rt_llinfo = la = arp_llinfo_alloc(M_WAITOK);
954 if (la == NULL) {
955 arpstat.reqnobufs++;
956 break;
957 }
958 rt->rt_llinfo_get_ri = arp_llinfo_get_ri;
959 rt->rt_llinfo_get_iflri = arp_llinfo_get_iflri;
960 rt->rt_llinfo_purge = arp_llinfo_purge;
961 rt->rt_llinfo_free = arp_llinfo_free;
962 rt->rt_llinfo_refresh = arp_llinfo_refresh;
963 rt->rt_flags |= RTF_LLINFO;
964 la->la_rt = rt;
965 LIST_INSERT_HEAD(&llinfo_arp, la, la_le);
966 arpstat.inuse++;
967
968 /* We have at least one entry; arm the timer if not already */
969 arp_sched_timeout(NULL);
970
971 /*
972 * This keeps the multicast addresses from showing up
973 * in `arp -a' listings as unresolved. It's not actually
974 * functional. Then the same for broadcast. For IPv4
975 * link-local address, keep the entry around even after
976 * it has expired.
977 */
978 if (IN_MULTICAST(ntohl(SIN(rt_key(rt))->sin_addr.s_addr))) {
979 RT_UNLOCK(rt);
980 dlil_resolve_multi(rt->rt_ifp, rt_key(rt), gate,
981 sizeof (struct sockaddr_dl));
982 RT_LOCK(rt);
983 rt_setexpire(rt, 0);
984 } else if (in_broadcast(SIN(rt_key(rt))->sin_addr,
985 rt->rt_ifp)) {
986 struct sockaddr_dl *gate_ll = SDL(gate);
987 size_t broadcast_len;
988 ifnet_llbroadcast_copy_bytes(rt->rt_ifp,
989 LLADDR(gate_ll), sizeof (gate_ll->sdl_data),
990 &broadcast_len);
991 gate_ll->sdl_alen = broadcast_len;
992 gate_ll->sdl_family = AF_LINK;
993 gate_ll->sdl_len = sizeof (struct sockaddr_dl);
994 /* In case we're called before 1.0 sec. has elapsed */
995 rt_setexpire(rt, MAX(timenow, 1));
996 } else if (IN_LINKLOCAL(ntohl(SIN(rt_key(rt))->
997 sin_addr.s_addr))) {
998 rt->rt_flags |= RTF_STATIC;
999 }
1000
1001 /* Set default maximum number of retries */
1002 la->la_maxtries = arp_maxtries;
1003
1004 /* Become a regular mutex, just in case */
1005 RT_CONVERT_LOCK(rt);
1006 IFA_LOCK_SPIN(rt->rt_ifa);
1007 if (SIN(rt_key(rt))->sin_addr.s_addr ==
1008 (IA_SIN(rt->rt_ifa))->sin_addr.s_addr) {
1009 IFA_UNLOCK(rt->rt_ifa);
1010 /*
1011 * This test used to be
1012 * if (loif.if_flags & IFF_UP)
1013 * It allowed local traffic to be forced through the
1014 * hardware by configuring the loopback down. However,
1015 * it causes problems during network configuration
1016 * for boards that can't receive packets they send.
1017 * It is now necessary to clear "useloopback" and
1018 * remove the route to force traffic out to the
1019 * hardware.
1020 */
1021 rt_setexpire(rt, 0);
1022 ifnet_lladdr_copy_bytes(rt->rt_ifp, LLADDR(SDL(gate)),
1023 SDL(gate)->sdl_alen = rt->rt_ifp->if_addrlen);
1024 if (useloopback) {
1025 if (rt->rt_ifp != lo_ifp) {
1026 /*
1027 * Purge any link-layer info caching.
1028 */
1029 if (rt->rt_llinfo_purge != NULL)
1030 rt->rt_llinfo_purge(rt);
1031
1032 /*
1033 * Adjust route ref count for the
1034 * interfaces.
1035 */
1036 if (rt->rt_if_ref_fn != NULL) {
1037 rt->rt_if_ref_fn(lo_ifp, 1);
1038 rt->rt_if_ref_fn(rt->rt_ifp, -1);
1039 }
1040 }
1041 rt->rt_ifp = lo_ifp;
1042 /*
1043 * If rmx_mtu is not locked, update it
1044 * to the MTU used by the new interface.
1045 */
1046 if (!(rt->rt_rmx.rmx_locks & RTV_MTU))
1047 rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
1048 }
1049 } else {
1050 IFA_UNLOCK(rt->rt_ifa);
1051 }
1052 break;
1053
1054 case RTM_DELETE:
1055 if (la == NULL)
1056 break;
1057 /*
1058 * Unchain it but defer the actual freeing until the route
1059 * itself is to be freed. rt->rt_llinfo still points to
1060 * llinfo_arp, and likewise, la->la_rt still points to this
1061 * route entry, except that RTF_LLINFO is now cleared.
1062 */
1063 LIST_REMOVE(la, la_le);
1064 la->la_le.le_next = NULL;
1065 la->la_le.le_prev = NULL;
1066 arpstat.inuse--;
1067
1068 /*
1069 * Purge any link-layer info caching.
1070 */
1071 if (rt->rt_llinfo_purge != NULL)
1072 rt->rt_llinfo_purge(rt);
1073
1074 rt->rt_flags &= ~RTF_LLINFO;
1075 (void) arp_llinfo_flushq(la);
1076 }
1077 }
1078
1079 /*
1080 * convert hardware address to hex string for logging errors.
1081 */
1082 static const char *
1083 sdl_addr_to_hex(const struct sockaddr_dl *sdl, char *orig_buf, int buflen)
1084 {
1085 char *buf = orig_buf;
1086 int i;
1087 const u_char *lladdr = (u_char *)(size_t)sdl->sdl_data;
1088 int maxbytes = buflen / 3;
1089
1090 if (maxbytes > sdl->sdl_alen) {
1091 maxbytes = sdl->sdl_alen;
1092 }
1093 *buf = '\0';
1094 for (i = 0; i < maxbytes; i++) {
1095 snprintf(buf, 3, "%02x", lladdr[i]);
1096 buf += 2;
1097 *buf = (i == maxbytes - 1) ? '\0' : ':';
1098 buf++;
1099 }
1100 return (orig_buf);
1101 }
1102
1103 /*
1104 * arp_lookup_route will lookup the route for a given address.
1105 *
1106 * The address must be for a host on a local network on this interface.
1107 * If the returned route is non-NULL, the route is locked and the caller
1108 * is responsible for unlocking it and releasing its reference.
1109 */
1110 static errno_t
1111 arp_lookup_route(const struct in_addr *addr, int create, int proxy,
1112 route_t *route, unsigned int ifscope)
1113 {
1114 struct sockaddr_inarp sin =
1115 { sizeof (sin), AF_INET, 0, { 0 }, { 0 }, 0, 0 };
1116 const char *why = NULL;
1117 errno_t error = 0;
1118 route_t rt;
1119
1120 *route = NULL;
1121
1122 sin.sin_addr.s_addr = addr->s_addr;
1123 sin.sin_other = proxy ? SIN_PROXY : 0;
1124
1125 /*
1126 * If the destination is a link-local address, don't
1127 * constrain the lookup (don't scope it).
1128 */
1129 if (IN_LINKLOCAL(ntohl(addr->s_addr)))
1130 ifscope = IFSCOPE_NONE;
1131
1132 rt = rtalloc1_scoped((struct sockaddr *)&sin, create, 0, ifscope);
1133 if (rt == NULL)
1134 return (ENETUNREACH);
1135
1136 RT_LOCK(rt);
1137
1138 if (rt->rt_flags & RTF_GATEWAY) {
1139 why = "host is not on local network";
1140 error = ENETUNREACH;
1141 } else if (!(rt->rt_flags & RTF_LLINFO)) {
1142 why = "could not allocate llinfo";
1143 error = ENOMEM;
1144 } else if (rt->rt_gateway->sa_family != AF_LINK) {
1145 why = "gateway route is not ours";
1146 error = EPROTONOSUPPORT;
1147 }
1148
1149 if (error != 0) {
1150 if (create && (arp_verbose || log_arp_warnings)) {
1151 char tmp[MAX_IPv4_STR_LEN];
1152 log(LOG_DEBUG, "%s: link#%d %s failed: %s\n",
1153 __func__, ifscope, inet_ntop(AF_INET, addr, tmp,
1154 sizeof (tmp)), why);
1155 }
1156
1157 /*
1158 * If there are no references to this route, and it is
1159 * a cloned route, and not static, and ARP had created
1160 * the route, then purge it from the routing table as
1161 * it is probably bogus.
1162 */
1163 if (rt->rt_refcnt == 1 &&
1164 (rt->rt_flags & (RTF_WASCLONED | RTF_STATIC)) ==
1165 RTF_WASCLONED) {
1166 /*
1167 * Prevent another thread from modiying rt_key,
1168 * rt_gateway via rt_setgate() after rt_lock is
1169 * dropped by marking the route as defunct.
1170 */
1171 rt->rt_flags |= RTF_CONDEMNED;
1172 RT_UNLOCK(rt);
1173 rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
1174 rt_mask(rt), rt->rt_flags, NULL);
1175 rtfree(rt);
1176 } else {
1177 RT_REMREF_LOCKED(rt);
1178 RT_UNLOCK(rt);
1179 }
1180 return (error);
1181 }
1182
1183 /*
1184 * Caller releases reference and does RT_UNLOCK(rt).
1185 */
1186 *route = rt;
1187 return (0);
1188 }
1189
1190 boolean_t
1191 arp_is_entry_probing (route_t p_route)
1192 {
1193 struct llinfo_arp *llinfo = p_route->rt_llinfo;
1194
1195 if (llinfo != NULL &&
1196 llinfo->la_llreach != NULL &&
1197 llinfo->la_llreach->lr_probes != 0)
1198 return (TRUE);
1199
1200 return (FALSE);
1201 }
1202
1203 /*
1204 * This is the ARP pre-output routine; care must be taken to ensure that
1205 * the "hint" route never gets freed via rtfree(), since the caller may
1206 * have stored it inside a struct route with a reference held for that
1207 * placeholder.
1208 */
1209 errno_t
1210 arp_lookup_ip(ifnet_t ifp, const struct sockaddr_in *net_dest,
1211 struct sockaddr_dl *ll_dest, size_t ll_dest_len, route_t hint,
1212 mbuf_t packet)
1213 {
1214 route_t route = NULL; /* output route */
1215 errno_t result = 0;
1216 struct sockaddr_dl *gateway;
1217 struct llinfo_arp *llinfo = NULL;
1218 boolean_t usable, probing = FALSE;
1219 uint64_t timenow;
1220 struct if_llreach *lr;
1221 struct ifaddr *rt_ifa;
1222 struct sockaddr *sa;
1223 uint32_t rtflags;
1224 struct sockaddr_dl sdl;
1225 boolean_t send_probe_notif = FALSE;
1226
1227 if (ifp == NULL || net_dest == NULL)
1228 return (EINVAL);
1229
1230 if (net_dest->sin_family != AF_INET)
1231 return (EAFNOSUPPORT);
1232
1233 if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING))
1234 return (ENETDOWN);
1235
1236 /*
1237 * If we were given a route, verify the route and grab the gateway
1238 */
1239 if (hint != NULL) {
1240 /*
1241 * Callee holds a reference on the route and returns
1242 * with the route entry locked, upon success.
1243 */
1244 result = route_to_gwroute((const struct sockaddr *)
1245 net_dest, hint, &route);
1246 if (result != 0)
1247 return (result);
1248 if (route != NULL)
1249 RT_LOCK_ASSERT_HELD(route);
1250 }
1251
1252 if ((packet != NULL && (packet->m_flags & M_BCAST)) ||
1253 in_broadcast(net_dest->sin_addr, ifp)) {
1254 size_t broadcast_len;
1255 bzero(ll_dest, ll_dest_len);
1256 result = ifnet_llbroadcast_copy_bytes(ifp, LLADDR(ll_dest),
1257 ll_dest_len - offsetof(struct sockaddr_dl, sdl_data),
1258 &broadcast_len);
1259 if (result == 0) {
1260 ll_dest->sdl_alen = broadcast_len;
1261 ll_dest->sdl_family = AF_LINK;
1262 ll_dest->sdl_len = sizeof (struct sockaddr_dl);
1263 }
1264 goto release;
1265 }
1266 if ((packet != NULL && (packet->m_flags & M_MCAST)) ||
1267 ((ifp->if_flags & IFF_MULTICAST) &&
1268 IN_MULTICAST(ntohl(net_dest->sin_addr.s_addr)))) {
1269 if (route != NULL)
1270 RT_UNLOCK(route);
1271 result = dlil_resolve_multi(ifp,
1272 (const struct sockaddr *)net_dest,
1273 (struct sockaddr *)ll_dest, ll_dest_len);
1274 if (route != NULL)
1275 RT_LOCK(route);
1276 goto release;
1277 }
1278
1279 /*
1280 * If we didn't find a route, or the route doesn't have
1281 * link layer information, trigger the creation of the
1282 * route and link layer information.
1283 */
1284 if (route == NULL || route->rt_llinfo == NULL) {
1285 /* Clean up now while we can */
1286 if (route != NULL) {
1287 if (route == hint) {
1288 RT_REMREF_LOCKED(route);
1289 RT_UNLOCK(route);
1290 } else {
1291 RT_UNLOCK(route);
1292 rtfree(route);
1293 }
1294 }
1295 /*
1296 * Callee holds a reference on the route and returns
1297 * with the route entry locked, upon success.
1298 */
1299 result = arp_lookup_route(&net_dest->sin_addr, 1, 0, &route,
1300 ifp->if_index);
1301 if (result == 0)
1302 RT_LOCK_ASSERT_HELD(route);
1303 }
1304
1305 if (result || route == NULL || (llinfo = route->rt_llinfo) == NULL) {
1306 /* In case result is 0 but no route, return an error */
1307 if (result == 0)
1308 result = EHOSTUNREACH;
1309
1310 if (route != NULL && route->rt_llinfo == NULL) {
1311 char tmp[MAX_IPv4_STR_LEN];
1312 log(LOG_ERR, "%s: can't allocate llinfo for %s\n",
1313 __func__, inet_ntop(AF_INET, &net_dest->sin_addr,
1314 tmp, sizeof (tmp)));
1315 }
1316 goto release;
1317 }
1318
1319 /*
1320 * Now that we have the right route, is it filled in?
1321 */
1322 gateway = SDL(route->rt_gateway);
1323 timenow = net_uptime();
1324 VERIFY(route->rt_expire == 0 || route->rt_rmx.rmx_expire != 0);
1325 VERIFY(route->rt_expire != 0 || route->rt_rmx.rmx_expire == 0);
1326
1327 usable = ((route->rt_expire == 0 || route->rt_expire > timenow) &&
1328 gateway != NULL && gateway->sdl_family == AF_LINK &&
1329 gateway->sdl_alen != 0);
1330
1331 if (usable) {
1332 boolean_t unreachable = !arp_llreach_reachable(llinfo);
1333
1334 /* Entry is usable, so fill in info for caller */
1335 bcopy(gateway, ll_dest, MIN(gateway->sdl_len, ll_dest_len));
1336 result = 0;
1337 arp_llreach_use(llinfo); /* Mark use timestamp */
1338
1339 lr = llinfo->la_llreach;
1340 if (lr == NULL)
1341 goto release;
1342 rt_ifa = route->rt_ifa;
1343
1344 /* Become a regular mutex, just in case */
1345 RT_CONVERT_LOCK(route);
1346 IFLR_LOCK_SPIN(lr);
1347
1348 if ((unreachable || (llinfo->la_flags & LLINFO_PROBING)) &&
1349 lr->lr_probes < arp_unicast_lim) {
1350 /*
1351 * Thus mark the entry with la_probeexp deadline to
1352 * trigger the probe timer to be scheduled (if not
1353 * already). This gets cleared the moment we get
1354 * an ARP reply.
1355 */
1356 probing = TRUE;
1357 if (lr->lr_probes == 0) {
1358 llinfo->la_probeexp = (timenow + arpt_probe);
1359 llinfo->la_flags |= LLINFO_PROBING;
1360 /*
1361 * Provide notification that ARP unicast
1362 * probing has started.
1363 * We only do it for the first unicast probe
1364 * attempt.
1365 */
1366 send_probe_notif = TRUE;
1367 }
1368
1369 /*
1370 * Start the unicast probe and anticipate a reply;
1371 * afterwards, return existing entry to caller and
1372 * let it be used anyway. If peer is non-existent
1373 * we'll broadcast ARP next time around.
1374 */
1375 lr->lr_probes++;
1376 bzero(&sdl, sizeof (sdl));
1377 sdl.sdl_alen = ifp->if_addrlen;
1378 bcopy(&lr->lr_key.addr, LLADDR(&sdl),
1379 ifp->if_addrlen);
1380 IFLR_UNLOCK(lr);
1381 IFA_LOCK_SPIN(rt_ifa);
1382 IFA_ADDREF_LOCKED(rt_ifa);
1383 sa = rt_ifa->ifa_addr;
1384 IFA_UNLOCK(rt_ifa);
1385 rtflags = route->rt_flags;
1386 RT_UNLOCK(route);
1387 dlil_send_arp(ifp, ARPOP_REQUEST, NULL, sa,
1388 (const struct sockaddr_dl *)&sdl,
1389 (const struct sockaddr *)net_dest, rtflags);
1390 IFA_REMREF(rt_ifa);
1391 RT_LOCK(route);
1392 goto release;
1393 } else {
1394 IFLR_UNLOCK(lr);
1395 if (!unreachable &&
1396 !(llinfo->la_flags & LLINFO_PROBING)) {
1397 /*
1398 * Normal case where peer is still reachable,
1399 * we're not probing and if_addrlen is anything
1400 * but IF_LLREACH_MAXLEN.
1401 */
1402 goto release;
1403 }
1404 }
1405 }
1406
1407 if (ifp->if_flags & IFF_NOARP) {
1408 result = ENOTSUP;
1409 goto release;
1410 }
1411
1412 /*
1413 * Route wasn't complete/valid; we need to send out ARP request.
1414 * If we've exceeded the limit of la_holdq, drop from the head
1415 * of queue and add this packet to the tail. If we end up with
1416 * RTF_REJECT below, we'll dequeue this from tail and have the
1417 * caller free the packet instead. It's safe to do that since
1418 * we still hold the route's rt_lock.
1419 */
1420 if (packet != NULL)
1421 arp_llinfo_addq(llinfo, packet);
1422 else
1423 llinfo->la_prbreq_cnt++;
1424 /*
1425 * Regardless of permanent vs. expirable entry, we need to
1426 * avoid having packets sit in la_holdq forever; thus mark the
1427 * entry with la_probeexp deadline to trigger the probe timer
1428 * to be scheduled (if not already). This gets cleared the
1429 * moment we get an ARP reply.
1430 */
1431 probing = TRUE;
1432 if ((qlen(&llinfo->la_holdq) + llinfo->la_prbreq_cnt) == 1) {
1433 llinfo->la_probeexp = (timenow + arpt_probe);
1434 llinfo->la_flags |= LLINFO_PROBING;
1435 }
1436
1437 if (route->rt_expire) {
1438 route->rt_flags &= ~RTF_REJECT;
1439 if (llinfo->la_asked == 0 || route->rt_expire != timenow) {
1440 rt_setexpire(route, timenow);
1441 if (llinfo->la_asked++ < llinfo->la_maxtries) {
1442 struct kev_msg ev_msg;
1443 struct kev_in_arpfailure in_arpfailure;
1444 boolean_t sendkev = FALSE;
1445
1446 rt_ifa = route->rt_ifa;
1447 lr = llinfo->la_llreach;
1448 /* Become a regular mutex, just in case */
1449 RT_CONVERT_LOCK(route);
1450 /* Update probe count, if applicable */
1451 if (lr != NULL) {
1452 IFLR_LOCK_SPIN(lr);
1453 lr->lr_probes++;
1454 IFLR_UNLOCK(lr);
1455 }
1456 if (ifp->if_addrlen == IF_LLREACH_MAXLEN &&
1457 route->rt_flags & RTF_ROUTER &&
1458 llinfo->la_asked > 1) {
1459 sendkev = TRUE;
1460 llinfo->la_flags |= LLINFO_RTRFAIL_EVTSENT;
1461 }
1462 IFA_LOCK_SPIN(rt_ifa);
1463 IFA_ADDREF_LOCKED(rt_ifa);
1464 sa = rt_ifa->ifa_addr;
1465 IFA_UNLOCK(rt_ifa);
1466 arp_llreach_use(llinfo); /* Mark use tstamp */
1467 rtflags = route->rt_flags;
1468 RT_UNLOCK(route);
1469 dlil_send_arp(ifp, ARPOP_REQUEST, NULL, sa,
1470 NULL, (const struct sockaddr *)net_dest,
1471 rtflags);
1472 IFA_REMREF(rt_ifa);
1473 if (sendkev) {
1474 bzero(&ev_msg, sizeof(ev_msg));
1475 bzero(&in_arpfailure,
1476 sizeof(in_arpfailure));
1477 in_arpfailure.link_data.if_family =
1478 ifp->if_family;
1479 in_arpfailure.link_data.if_unit =
1480 ifp->if_unit;
1481 strlcpy(in_arpfailure.link_data.if_name,
1482 ifp->if_name, IFNAMSIZ);
1483 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1484 ev_msg.kev_class = KEV_NETWORK_CLASS;
1485 ev_msg.kev_subclass = KEV_INET_SUBCLASS;
1486 ev_msg.event_code =
1487 KEV_INET_ARPRTRFAILURE;
1488 ev_msg.dv[0].data_ptr = &in_arpfailure;
1489 ev_msg.dv[0].data_length =
1490 sizeof(struct
1491 kev_in_arpfailure);
1492 dlil_post_complete_msg(NULL, &ev_msg);
1493 }
1494 result = EJUSTRETURN;
1495 RT_LOCK(route);
1496 goto release;
1497 } else {
1498 route->rt_flags |= RTF_REJECT;
1499 rt_setexpire(route,
1500 route->rt_expire + arpt_down);
1501 llinfo->la_asked = 0;
1502 /*
1503 * Remove the packet that was just added above;
1504 * don't free it since we're not returning
1505 * EJUSTRETURN. The caller will handle the
1506 * freeing. Since we haven't dropped rt_lock
1507 * from the time of _addq() above, this packet
1508 * must be at the tail.
1509 */
1510 if (packet != NULL) {
1511 struct mbuf *_m =
1512 _getq_tail(&llinfo->la_holdq);
1513 atomic_add_32(&arpstat.held, -1);
1514 VERIFY(_m == packet);
1515 }
1516 result = EHOSTUNREACH;
1517
1518 /*
1519 * Enqueue work item to invoke callback for this route entry
1520 */
1521 route_event_enqueue_nwk_wq_entry(route, NULL,
1522 ROUTE_LLENTRY_UNREACH, NULL, TRUE);
1523 goto release;
1524 }
1525 }
1526 }
1527
1528 /* The packet is now held inside la_holdq */
1529 result = EJUSTRETURN;
1530
1531 release:
1532 if (result == EHOSTUNREACH)
1533 atomic_add_32(&arpstat.dropped, 1);
1534
1535 if (route != NULL) {
1536 if (send_probe_notif) {
1537 route_event_enqueue_nwk_wq_entry(route, NULL,
1538 ROUTE_LLENTRY_PROBED, NULL, TRUE);
1539
1540 if (route->rt_flags & RTF_ROUTER) {
1541 struct radix_node_head *rnh = NULL;
1542 struct route_event rt_ev;
1543 route_event_init(&rt_ev, route, NULL, ROUTE_LLENTRY_PROBED);
1544 /*
1545 * We already have a reference on rt. The function
1546 * frees it before returning.
1547 */
1548 RT_UNLOCK(route);
1549 lck_mtx_lock(rnh_lock);
1550 rnh = rt_tables[AF_INET];
1551
1552 if (rnh != NULL)
1553 (void) rnh->rnh_walktree(rnh,
1554 route_event_walktree, (void *)&rt_ev);
1555 lck_mtx_unlock(rnh_lock);
1556 RT_LOCK(route);
1557 }
1558 }
1559
1560 if (route == hint) {
1561 RT_REMREF_LOCKED(route);
1562 RT_UNLOCK(route);
1563 } else {
1564 RT_UNLOCK(route);
1565 rtfree(route);
1566 }
1567 }
1568 if (probing) {
1569 /* Do this after we drop rt_lock to preserve ordering */
1570 lck_mtx_lock(rnh_lock);
1571 arp_sched_probe(NULL);
1572 lck_mtx_unlock(rnh_lock);
1573 }
1574 return (result);
1575 }
1576
1577 errno_t
1578 arp_ip_handle_input(ifnet_t ifp, u_short arpop,
1579 const struct sockaddr_dl *sender_hw, const struct sockaddr_in *sender_ip,
1580 const struct sockaddr_in *target_ip)
1581 {
1582 char ipv4str[MAX_IPv4_STR_LEN];
1583 struct sockaddr_dl proxied;
1584 struct sockaddr_dl *gateway, *target_hw = NULL;
1585 struct ifaddr *ifa;
1586 struct in_ifaddr *ia;
1587 struct in_ifaddr *best_ia = NULL;
1588 struct sockaddr_in best_ia_sin;
1589 route_t route = NULL;
1590 char buf[3 * MAX_HW_LEN]; /* enough for MAX_HW_LEN byte hw address */
1591 struct llinfo_arp *llinfo;
1592 errno_t error;
1593 int created_announcement = 0;
1594 int bridged = 0, is_bridge = 0;
1595 uint32_t rt_evcode = 0;
1596
1597 /*
1598 * Here and other places within this routine where we don't hold
1599 * rnh_lock, trade accuracy for speed for the common scenarios
1600 * and avoid the use of atomic updates.
1601 */
1602 arpstat.received++;
1603
1604 /* Do not respond to requests for 0.0.0.0 */
1605 if (target_ip->sin_addr.s_addr == INADDR_ANY && arpop == ARPOP_REQUEST)
1606 goto done;
1607
1608 if (ifp->if_bridge)
1609 bridged = 1;
1610 if (ifp->if_type == IFT_BRIDGE)
1611 is_bridge = 1;
1612
1613 if (arpop == ARPOP_REPLY)
1614 arpstat.rxreplies++;
1615
1616 /*
1617 * Determine if this ARP is for us
1618 * For a bridge, we want to check the address irrespective
1619 * of the receive interface.
1620 */
1621 lck_rw_lock_shared(in_ifaddr_rwlock);
1622 TAILQ_FOREACH(ia, INADDR_HASH(target_ip->sin_addr.s_addr), ia_hash) {
1623 IFA_LOCK_SPIN(&ia->ia_ifa);
1624 if (((bridged && ia->ia_ifp->if_bridge != NULL) ||
1625 (ia->ia_ifp == ifp)) &&
1626 ia->ia_addr.sin_addr.s_addr == target_ip->sin_addr.s_addr) {
1627 best_ia = ia;
1628 best_ia_sin = best_ia->ia_addr;
1629 IFA_ADDREF_LOCKED(&ia->ia_ifa);
1630 IFA_UNLOCK(&ia->ia_ifa);
1631 lck_rw_done(in_ifaddr_rwlock);
1632 goto match;
1633 }
1634 IFA_UNLOCK(&ia->ia_ifa);
1635 }
1636
1637 TAILQ_FOREACH(ia, INADDR_HASH(sender_ip->sin_addr.s_addr), ia_hash) {
1638 IFA_LOCK_SPIN(&ia->ia_ifa);
1639 if (((bridged && ia->ia_ifp->if_bridge != NULL) ||
1640 (ia->ia_ifp == ifp)) &&
1641 ia->ia_addr.sin_addr.s_addr == sender_ip->sin_addr.s_addr) {
1642 best_ia = ia;
1643 best_ia_sin = best_ia->ia_addr;
1644 IFA_ADDREF_LOCKED(&ia->ia_ifa);
1645 IFA_UNLOCK(&ia->ia_ifa);
1646 lck_rw_done(in_ifaddr_rwlock);
1647 goto match;
1648 }
1649 IFA_UNLOCK(&ia->ia_ifa);
1650 }
1651
1652 #define BDG_MEMBER_MATCHES_ARP(addr, ifp, ia) \
1653 (ia->ia_ifp->if_bridge == ifp->if_softc && \
1654 bcmp(IF_LLADDR(ia->ia_ifp), IF_LLADDR(ifp), ifp->if_addrlen) == 0 && \
1655 addr == ia->ia_addr.sin_addr.s_addr)
1656 /*
1657 * Check the case when bridge shares its MAC address with
1658 * some of its children, so packets are claimed by bridge
1659 * itself (bridge_input() does it first), but they are really
1660 * meant to be destined to the bridge member.
1661 */
1662 if (is_bridge) {
1663 TAILQ_FOREACH(ia, INADDR_HASH(target_ip->sin_addr.s_addr),
1664 ia_hash) {
1665 IFA_LOCK_SPIN(&ia->ia_ifa);
1666 if (BDG_MEMBER_MATCHES_ARP(target_ip->sin_addr.s_addr,
1667 ifp, ia)) {
1668 ifp = ia->ia_ifp;
1669 best_ia = ia;
1670 best_ia_sin = best_ia->ia_addr;
1671 IFA_ADDREF_LOCKED(&ia->ia_ifa);
1672 IFA_UNLOCK(&ia->ia_ifa);
1673 lck_rw_done(in_ifaddr_rwlock);
1674 goto match;
1675 }
1676 IFA_UNLOCK(&ia->ia_ifa);
1677 }
1678 }
1679 #undef BDG_MEMBER_MATCHES_ARP
1680 lck_rw_done(in_ifaddr_rwlock);
1681
1682 /*
1683 * No match, use the first inet address on the receive interface
1684 * as a dummy address for the rest of the function; we may be
1685 * proxying for another address.
1686 */
1687 ifnet_lock_shared(ifp);
1688 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1689 IFA_LOCK_SPIN(ifa);
1690 if (ifa->ifa_addr->sa_family != AF_INET) {
1691 IFA_UNLOCK(ifa);
1692 continue;
1693 }
1694 best_ia = (struct in_ifaddr *)ifa;
1695 best_ia_sin = best_ia->ia_addr;
1696 IFA_ADDREF_LOCKED(ifa);
1697 IFA_UNLOCK(ifa);
1698 ifnet_lock_done(ifp);
1699 goto match;
1700 }
1701 ifnet_lock_done(ifp);
1702
1703 /*
1704 * If we're not a bridge member, or if we are but there's no
1705 * IPv4 address to use for the interface, drop the packet.
1706 */
1707 if (!bridged || best_ia == NULL)
1708 goto done;
1709
1710 match:
1711 /* If the packet is from this interface, ignore the packet */
1712 if (bcmp(CONST_LLADDR(sender_hw), IF_LLADDR(ifp),
1713 sender_hw->sdl_alen) == 0)
1714 goto done;
1715
1716 /* Check for a conflict */
1717 if (!bridged &&
1718 sender_ip->sin_addr.s_addr == best_ia_sin.sin_addr.s_addr) {
1719 struct kev_msg ev_msg;
1720 struct kev_in_collision *in_collision;
1721 u_char storage[sizeof (struct kev_in_collision) + MAX_HW_LEN];
1722
1723 bzero(&ev_msg, sizeof (struct kev_msg));
1724 bzero(storage, (sizeof (struct kev_in_collision) + MAX_HW_LEN));
1725 in_collision = (struct kev_in_collision *)(void *)storage;
1726 log(LOG_ERR, "%s duplicate IP address %s sent from "
1727 "address %s\n", if_name(ifp),
1728 inet_ntop(AF_INET, &sender_ip->sin_addr, ipv4str,
1729 sizeof (ipv4str)), sdl_addr_to_hex(sender_hw, buf,
1730 sizeof (buf)));
1731
1732 /* Send a kernel event so anyone can learn of the conflict */
1733 in_collision->link_data.if_family = ifp->if_family;
1734 in_collision->link_data.if_unit = ifp->if_unit;
1735 strlcpy(&in_collision->link_data.if_name[0],
1736 ifp->if_name, IFNAMSIZ);
1737 in_collision->ia_ipaddr = sender_ip->sin_addr;
1738 in_collision->hw_len = (sender_hw->sdl_alen < MAX_HW_LEN) ?
1739 sender_hw->sdl_alen : MAX_HW_LEN;
1740 bcopy(CONST_LLADDR(sender_hw), (caddr_t)in_collision->hw_addr,
1741 in_collision->hw_len);
1742 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1743 ev_msg.kev_class = KEV_NETWORK_CLASS;
1744 ev_msg.kev_subclass = KEV_INET_SUBCLASS;
1745 ev_msg.event_code = KEV_INET_ARPCOLLISION;
1746 ev_msg.dv[0].data_ptr = in_collision;
1747 ev_msg.dv[0].data_length =
1748 sizeof (struct kev_in_collision) + in_collision->hw_len;
1749 ev_msg.dv[1].data_length = 0;
1750 dlil_post_complete_msg(NULL, &ev_msg);
1751 atomic_add_32(&arpstat.dupips, 1);
1752 goto respond;
1753 }
1754
1755 /*
1756 * Look up the routing entry. If it doesn't exist and we are the
1757 * target, and the sender isn't 0.0.0.0, go ahead and create one.
1758 * Callee holds a reference on the route and returns with the route
1759 * entry locked, upon success.
1760 */
1761 error = arp_lookup_route(&sender_ip->sin_addr,
1762 (target_ip->sin_addr.s_addr == best_ia_sin.sin_addr.s_addr &&
1763 sender_ip->sin_addr.s_addr != 0), 0, &route, ifp->if_index);
1764
1765 if (error == 0)
1766 RT_LOCK_ASSERT_HELD(route);
1767
1768 if (error || route == NULL || route->rt_gateway == NULL) {
1769 if (arpop != ARPOP_REQUEST)
1770 goto respond;
1771
1772 if (arp_sendllconflict && send_conflicting_probes != 0 &&
1773 (ifp->if_eflags & IFEF_ARPLL) &&
1774 IN_LINKLOCAL(ntohl(target_ip->sin_addr.s_addr)) &&
1775 sender_ip->sin_addr.s_addr == INADDR_ANY) {
1776 /*
1777 * Verify this ARP probe doesn't conflict with
1778 * an IPv4LL we know of on another interface.
1779 */
1780 if (route != NULL) {
1781 RT_REMREF_LOCKED(route);
1782 RT_UNLOCK(route);
1783 route = NULL;
1784 }
1785 /*
1786 * Callee holds a reference on the route and returns
1787 * with the route entry locked, upon success.
1788 */
1789 error = arp_lookup_route(&target_ip->sin_addr, 0, 0,
1790 &route, ifp->if_index);
1791
1792 if (error != 0 || route == NULL ||
1793 route->rt_gateway == NULL)
1794 goto respond;
1795
1796 RT_LOCK_ASSERT_HELD(route);
1797
1798 gateway = SDL(route->rt_gateway);
1799 if (route->rt_ifp != ifp && gateway->sdl_alen != 0 &&
1800 (gateway->sdl_alen != sender_hw->sdl_alen ||
1801 bcmp(CONST_LLADDR(gateway), CONST_LLADDR(sender_hw),
1802 gateway->sdl_alen) != 0)) {
1803 /*
1804 * A node is probing for an IPv4LL we know
1805 * exists on a different interface. We respond
1806 * with a conflicting probe to force the new
1807 * device to pick a different IPv4LL address.
1808 */
1809 if (arp_verbose || log_arp_warnings) {
1810 log(LOG_INFO, "arp: %s on %s sent "
1811 "probe for %s, already on %s\n",
1812 sdl_addr_to_hex(sender_hw, buf,
1813 sizeof (buf)), if_name(ifp),
1814 inet_ntop(AF_INET,
1815 &target_ip->sin_addr, ipv4str,
1816 sizeof (ipv4str)),
1817 if_name(route->rt_ifp));
1818 log(LOG_INFO, "arp: sending "
1819 "conflicting probe to %s on %s\n",
1820 sdl_addr_to_hex(sender_hw, buf,
1821 sizeof (buf)), if_name(ifp));
1822 }
1823 /* Mark use timestamp */
1824 if (route->rt_llinfo != NULL)
1825 arp_llreach_use(route->rt_llinfo);
1826 /* We're done with the route */
1827 RT_REMREF_LOCKED(route);
1828 RT_UNLOCK(route);
1829 route = NULL;
1830 /*
1831 * Send a conservative unicast "ARP probe".
1832 * This should force the other device to pick
1833 * a new number. This will not force the
1834 * device to pick a new number if the device
1835 * has already assigned that number. This will
1836 * not imply to the device that we own that
1837 * address. The link address is always
1838 * present; it's never freed.
1839 */
1840 ifnet_lock_shared(ifp);
1841 ifa = ifp->if_lladdr;
1842 IFA_ADDREF(ifa);
1843 ifnet_lock_done(ifp);
1844 dlil_send_arp_internal(ifp, ARPOP_REQUEST,
1845 SDL(ifa->ifa_addr),
1846 (const struct sockaddr *)sender_ip,
1847 sender_hw,
1848 (const struct sockaddr *)target_ip);
1849 IFA_REMREF(ifa);
1850 ifa = NULL;
1851 atomic_add_32(&arpstat.txconflicts, 1);
1852 }
1853 goto respond;
1854 } else if (keep_announcements != 0 &&
1855 target_ip->sin_addr.s_addr == sender_ip->sin_addr.s_addr) {
1856 /*
1857 * Don't create entry if link-local address and
1858 * link-local is disabled
1859 */
1860 if (!IN_LINKLOCAL(ntohl(sender_ip->sin_addr.s_addr)) ||
1861 (ifp->if_eflags & IFEF_ARPLL)) {
1862 if (route != NULL) {
1863 RT_REMREF_LOCKED(route);
1864 RT_UNLOCK(route);
1865 route = NULL;
1866 }
1867 /*
1868 * Callee holds a reference on the route and
1869 * returns with the route entry locked, upon
1870 * success.
1871 */
1872 error = arp_lookup_route(&sender_ip->sin_addr,
1873 1, 0, &route, ifp->if_index);
1874
1875 if (error == 0)
1876 RT_LOCK_ASSERT_HELD(route);
1877
1878 if (error == 0 && route != NULL &&
1879 route->rt_gateway != NULL)
1880 created_announcement = 1;
1881 }
1882 if (created_announcement == 0)
1883 goto respond;
1884 } else {
1885 goto respond;
1886 }
1887 }
1888
1889 RT_LOCK_ASSERT_HELD(route);
1890 VERIFY(route->rt_expire == 0 || route->rt_rmx.rmx_expire != 0);
1891 VERIFY(route->rt_expire != 0 || route->rt_rmx.rmx_expire == 0);
1892
1893 gateway = SDL(route->rt_gateway);
1894 if (!bridged && route->rt_ifp != ifp) {
1895 if (!IN_LINKLOCAL(ntohl(sender_ip->sin_addr.s_addr)) ||
1896 !(ifp->if_eflags & IFEF_ARPLL)) {
1897 if (arp_verbose || log_arp_warnings)
1898 log(LOG_ERR, "arp: %s is on %s but got "
1899 "reply from %s on %s\n",
1900 inet_ntop(AF_INET, &sender_ip->sin_addr,
1901 ipv4str, sizeof (ipv4str)),
1902 if_name(route->rt_ifp),
1903 sdl_addr_to_hex(sender_hw, buf,
1904 sizeof (buf)), if_name(ifp));
1905 goto respond;
1906 } else {
1907 /* Don't change a permanent address */
1908 if (route->rt_expire == 0)
1909 goto respond;
1910
1911 /*
1912 * We're about to check and/or change the route's ifp
1913 * and ifa, so do the lock dance: drop rt_lock, hold
1914 * rnh_lock and re-hold rt_lock to avoid violating the
1915 * lock ordering. We have an extra reference on the
1916 * route, so it won't go away while we do this.
1917 */
1918 RT_UNLOCK(route);
1919 lck_mtx_lock(rnh_lock);
1920 RT_LOCK(route);
1921 /*
1922 * Don't change the cloned route away from the
1923 * parent's interface if the address did resolve
1924 * or if the route is defunct. rt_ifp on both
1925 * the parent and the clone can now be freely
1926 * accessed now that we have acquired rnh_lock.
1927 */
1928 gateway = SDL(route->rt_gateway);
1929 if ((gateway->sdl_alen != 0 &&
1930 route->rt_parent != NULL &&
1931 route->rt_parent->rt_ifp == route->rt_ifp) ||
1932 (route->rt_flags & RTF_CONDEMNED)) {
1933 RT_REMREF_LOCKED(route);
1934 RT_UNLOCK(route);
1935 route = NULL;
1936 lck_mtx_unlock(rnh_lock);
1937 goto respond;
1938 }
1939 if (route->rt_ifp != ifp) {
1940 /*
1941 * Purge any link-layer info caching.
1942 */
1943 if (route->rt_llinfo_purge != NULL)
1944 route->rt_llinfo_purge(route);
1945
1946 /* Adjust route ref count for the interfaces */
1947 if (route->rt_if_ref_fn != NULL) {
1948 route->rt_if_ref_fn(ifp, 1);
1949 route->rt_if_ref_fn(route->rt_ifp, -1);
1950 }
1951 }
1952 /* Change the interface when the existing route is on */
1953 route->rt_ifp = ifp;
1954 /*
1955 * If rmx_mtu is not locked, update it
1956 * to the MTU used by the new interface.
1957 */
1958 if (!(route->rt_rmx.rmx_locks & RTV_MTU))
1959 route->rt_rmx.rmx_mtu = route->rt_ifp->if_mtu;
1960
1961 rtsetifa(route, &best_ia->ia_ifa);
1962 gateway->sdl_index = ifp->if_index;
1963 RT_UNLOCK(route);
1964 lck_mtx_unlock(rnh_lock);
1965 RT_LOCK(route);
1966 /* Don't bother if the route is down */
1967 if (!(route->rt_flags & RTF_UP))
1968 goto respond;
1969 /* Refresh gateway pointer */
1970 gateway = SDL(route->rt_gateway);
1971 }
1972 RT_LOCK_ASSERT_HELD(route);
1973 }
1974
1975 if (gateway->sdl_alen != 0 && bcmp(LLADDR(gateway),
1976 CONST_LLADDR(sender_hw), gateway->sdl_alen) != 0) {
1977 if (route->rt_expire != 0 &&
1978 (arp_verbose || log_arp_warnings)) {
1979 char buf2[3 * MAX_HW_LEN];
1980 log(LOG_INFO, "arp: %s moved from %s to %s on %s\n",
1981 inet_ntop(AF_INET, &sender_ip->sin_addr, ipv4str,
1982 sizeof (ipv4str)),
1983 sdl_addr_to_hex(gateway, buf, sizeof (buf)),
1984 sdl_addr_to_hex(sender_hw, buf2, sizeof (buf2)),
1985 if_name(ifp));
1986 } else if (route->rt_expire == 0) {
1987 if (arp_verbose || log_arp_warnings) {
1988 log(LOG_ERR, "arp: %s attempts to modify "
1989 "permanent entry for %s on %s\n",
1990 sdl_addr_to_hex(sender_hw, buf,
1991 sizeof (buf)),
1992 inet_ntop(AF_INET, &sender_ip->sin_addr,
1993 ipv4str, sizeof (ipv4str)),
1994 if_name(ifp));
1995 }
1996 goto respond;
1997 }
1998 }
1999
2000 /* Copy the sender hardware address in to the route's gateway address */
2001 gateway->sdl_alen = sender_hw->sdl_alen;
2002 bcopy(CONST_LLADDR(sender_hw), LLADDR(gateway), gateway->sdl_alen);
2003
2004 /* Update the expire time for the route and clear the reject flag */
2005 if (route->rt_expire != 0)
2006 rt_setexpire(route, net_uptime() + arpt_keep);
2007 route->rt_flags &= ~RTF_REJECT;
2008
2009 /* cache the gateway (sender HW) address */
2010 arp_llreach_alloc(route, ifp, LLADDR(gateway), gateway->sdl_alen,
2011 (arpop == ARPOP_REPLY), &rt_evcode);
2012
2013 llinfo = route->rt_llinfo;
2014 /* send a notification that the route is back up */
2015 if (ifp->if_addrlen == IF_LLREACH_MAXLEN &&
2016 route->rt_flags & RTF_ROUTER &&
2017 llinfo->la_flags & LLINFO_RTRFAIL_EVTSENT) {
2018 struct kev_msg ev_msg;
2019 struct kev_in_arpalive in_arpalive;
2020
2021 llinfo->la_flags &= ~LLINFO_RTRFAIL_EVTSENT;
2022 RT_UNLOCK(route);
2023 bzero(&ev_msg, sizeof(ev_msg));
2024 bzero(&in_arpalive, sizeof(in_arpalive));
2025 in_arpalive.link_data.if_family = ifp->if_family;
2026 in_arpalive.link_data.if_unit = ifp->if_unit;
2027 strlcpy(in_arpalive.link_data.if_name, ifp->if_name, IFNAMSIZ);
2028 ev_msg.vendor_code = KEV_VENDOR_APPLE;
2029 ev_msg.kev_class = KEV_NETWORK_CLASS;
2030 ev_msg.kev_subclass = KEV_INET_SUBCLASS;
2031 ev_msg.event_code = KEV_INET_ARPRTRALIVE;
2032 ev_msg.dv[0].data_ptr = &in_arpalive;
2033 ev_msg.dv[0].data_length = sizeof(struct kev_in_arpalive);
2034 dlil_post_complete_msg(NULL, &ev_msg);
2035 RT_LOCK(route);
2036 }
2037 /* Update the llinfo, send out all queued packets at once */
2038 llinfo->la_asked = 0;
2039 llinfo->la_flags &= ~LLINFO_PROBING;
2040 llinfo->la_prbreq_cnt = 0;
2041
2042 if (rt_evcode) {
2043 /*
2044 * Enqueue work item to invoke callback for this route entry
2045 */
2046 route_event_enqueue_nwk_wq_entry(route, NULL, rt_evcode, NULL, TRUE);
2047
2048 if (route->rt_flags & RTF_ROUTER) {
2049 struct radix_node_head *rnh = NULL;
2050 struct route_event rt_ev;
2051 route_event_init(&rt_ev, route, NULL, rt_evcode);
2052 /*
2053 * We already have a reference on rt. The function
2054 * frees it before returning.
2055 */
2056 RT_UNLOCK(route);
2057 lck_mtx_lock(rnh_lock);
2058 rnh = rt_tables[AF_INET];
2059
2060 if (rnh != NULL)
2061 (void) rnh->rnh_walktree(rnh, route_event_walktree,
2062 (void *)&rt_ev);
2063 lck_mtx_unlock(rnh_lock);
2064 RT_LOCK(route);
2065 }
2066 }
2067
2068 if (!qempty(&llinfo->la_holdq)) {
2069 uint32_t held;
2070 struct mbuf *m0 =
2071 _getq_all(&llinfo->la_holdq, NULL, &held, NULL);
2072 if (arp_verbose) {
2073 log(LOG_DEBUG, "%s: sending %u held packets\n",
2074 __func__, held);
2075 }
2076 atomic_add_32(&arpstat.held, -held);
2077 VERIFY(qempty(&llinfo->la_holdq));
2078 RT_UNLOCK(route);
2079 dlil_output(ifp, PF_INET, m0, (caddr_t)route,
2080 rt_key(route), 0, NULL);
2081 RT_REMREF(route);
2082 route = NULL;
2083 }
2084
2085 respond:
2086 if (route != NULL) {
2087 /* Mark use timestamp if we're going to send a reply */
2088 if (arpop == ARPOP_REQUEST && route->rt_llinfo != NULL)
2089 arp_llreach_use(route->rt_llinfo);
2090 RT_REMREF_LOCKED(route);
2091 RT_UNLOCK(route);
2092 route = NULL;
2093 }
2094
2095 if (arpop != ARPOP_REQUEST)
2096 goto done;
2097
2098 /* See comments at the beginning of this routine */
2099 arpstat.rxrequests++;
2100
2101 /* If we are not the target, check if we should proxy */
2102 if (target_ip->sin_addr.s_addr != best_ia_sin.sin_addr.s_addr) {
2103 /*
2104 * Find a proxy route; callee holds a reference on the
2105 * route and returns with the route entry locked, upon
2106 * success.
2107 */
2108 error = arp_lookup_route(&target_ip->sin_addr, 0, SIN_PROXY,
2109 &route, ifp->if_index);
2110
2111 if (error == 0) {
2112 RT_LOCK_ASSERT_HELD(route);
2113 /*
2114 * Return proxied ARP replies only on the interface
2115 * or bridge cluster where this network resides.
2116 * Otherwise we may conflict with the host we are
2117 * proxying for.
2118 */
2119 if (route->rt_ifp != ifp &&
2120 (route->rt_ifp->if_bridge != ifp->if_bridge ||
2121 ifp->if_bridge == NULL)) {
2122 RT_REMREF_LOCKED(route);
2123 RT_UNLOCK(route);
2124 goto done;
2125 }
2126 proxied = *SDL(route->rt_gateway);
2127 target_hw = &proxied;
2128 } else {
2129 /*
2130 * We don't have a route entry indicating we should
2131 * use proxy. If we aren't supposed to proxy all,
2132 * we are done.
2133 */
2134 if (!arp_proxyall)
2135 goto done;
2136
2137 /*
2138 * See if we have a route to the target ip before
2139 * we proxy it.
2140 */
2141 route = rtalloc1_scoped((struct sockaddr *)
2142 (size_t)target_ip, 0, 0, ifp->if_index);
2143 if (!route)
2144 goto done;
2145
2146 /*
2147 * Don't proxy for hosts already on the same interface.
2148 */
2149 RT_LOCK(route);
2150 if (route->rt_ifp == ifp) {
2151 RT_UNLOCK(route);
2152 rtfree(route);
2153 goto done;
2154 }
2155 }
2156 /* Mark use timestamp */
2157 if (route->rt_llinfo != NULL)
2158 arp_llreach_use(route->rt_llinfo);
2159 RT_REMREF_LOCKED(route);
2160 RT_UNLOCK(route);
2161 }
2162
2163 dlil_send_arp(ifp, ARPOP_REPLY,
2164 target_hw, (const struct sockaddr *)target_ip,
2165 sender_hw, (const struct sockaddr *)sender_ip, 0);
2166
2167 done:
2168 if (best_ia != NULL)
2169 IFA_REMREF(&best_ia->ia_ifa);
2170 return (0);
2171 }
2172
2173 void
2174 arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
2175 {
2176 struct sockaddr *sa;
2177
2178 IFA_LOCK(ifa);
2179 ifa->ifa_rtrequest = arp_rtrequest;
2180 ifa->ifa_flags |= RTF_CLONING;
2181 sa = ifa->ifa_addr;
2182 IFA_UNLOCK(ifa);
2183 dlil_send_arp(ifp, ARPOP_REQUEST, NULL, sa, NULL, sa, 0);
2184 }
2185
2186 static int
2187 arp_getstat SYSCTL_HANDLER_ARGS
2188 {
2189 #pragma unused(oidp, arg1, arg2)
2190 if (req->oldptr == USER_ADDR_NULL)
2191 req->oldlen = (size_t)sizeof (struct arpstat);
2192
2193 return (SYSCTL_OUT(req, &arpstat, MIN(sizeof (arpstat), req->oldlen)));
2194 }