]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/if_bond.c
xnu-3248.20.55.tar.gz
[apple/xnu.git] / bsd / net / if_bond.c
1 /*
2 * Copyright (c) 2004-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * if_bond.c
31 * - bond/failover interface
32 * - implements IEEE 802.3ad Link Aggregation
33 */
34
35 /*
36 * Modification History:
37 *
38 * April 29, 2004 Dieter Siegmund (dieter@apple.com)
39 * - created
40 */
41
42 #include <sys/param.h>
43 #include <sys/kernel.h>
44 #include <sys/malloc.h>
45 #include <sys/mbuf.h>
46 #include <sys/queue.h>
47 #include <sys/socket.h>
48 #include <sys/sockio.h>
49 #include <sys/sysctl.h>
50 #include <sys/systm.h>
51 #include <sys/kern_event.h>
52
53 #include <net/bpf.h>
54 #include <net/ethernet.h>
55 #include <net/if.h>
56 #include <net/kpi_interface.h>
57 #include <net/if_arp.h>
58 #include <net/if_dl.h>
59 #include <net/if_ether.h>
60 #include <net/if_types.h>
61 #include <net/if_bond_var.h>
62 #include <net/ieee8023ad.h>
63 #include <net/lacp.h>
64 #include <net/dlil.h>
65 #include <sys/time.h>
66 #include <net/devtimer.h>
67 #include <net/if_vlan_var.h>
68 #include <net/kpi_protocol.h>
69
70 #include <kern/locks.h>
71 #include <libkern/OSAtomic.h>
72
73 #include <netinet/in.h>
74 #include <netinet/if_ether.h>
75 #include <netinet/in_systm.h>
76 #include <netinet/ip.h>
77 #include <netinet/ip6.h>
78
79 #include <net/if_media.h>
80 #include <net/multicast_list.h>
81
82 static struct ether_addr slow_proto_multicast = {
83 IEEE8023AD_SLOW_PROTO_MULTICAST
84 };
85
86 #define BOND_MAXUNIT 128
87 #define BONDNAME "bond"
88 #define M_BOND M_DEVBUF
89
90 #define EA_FORMAT "%x:%x:%x:%x:%x:%x"
91 #define EA_CH(e, i) ((u_char)((u_char *)(e))[(i)])
92 #define EA_LIST(ea) EA_CH(ea,0),EA_CH(ea,1),EA_CH(ea,2),EA_CH(ea,3),EA_CH(ea,4),EA_CH(ea,5)
93
94 #define timestamp_printf printf
95
96 /**
97 ** bond locks
98 **/
99 static __inline__ lck_grp_t *
100 my_lck_grp_alloc_init(const char * grp_name)
101 {
102 lck_grp_t * grp;
103 lck_grp_attr_t * grp_attrs;
104
105 grp_attrs = lck_grp_attr_alloc_init();
106 grp = lck_grp_alloc_init(grp_name, grp_attrs);
107 lck_grp_attr_free(grp_attrs);
108 return (grp);
109 }
110
111 static __inline__ lck_mtx_t *
112 my_lck_mtx_alloc_init(lck_grp_t * lck_grp)
113 {
114 lck_attr_t * lck_attrs;
115 lck_mtx_t * lck_mtx;
116
117 lck_attrs = lck_attr_alloc_init();
118 lck_mtx = lck_mtx_alloc_init(lck_grp, lck_attrs);
119 lck_attr_free(lck_attrs);
120 return (lck_mtx);
121 }
122
123 static lck_mtx_t * bond_lck_mtx;
124
125 static __inline__ void
126 bond_lock_init(void)
127 {
128 lck_grp_t * bond_lck_grp;
129
130 bond_lck_grp = my_lck_grp_alloc_init("if_bond");
131 bond_lck_mtx = my_lck_mtx_alloc_init(bond_lck_grp);
132 }
133
134 static __inline__ void
135 bond_assert_lock_held(void)
136 {
137 lck_mtx_assert(bond_lck_mtx, LCK_MTX_ASSERT_OWNED);
138 return;
139 }
140
141 static __inline__ void
142 bond_assert_lock_not_held(void)
143 {
144 lck_mtx_assert(bond_lck_mtx, LCK_MTX_ASSERT_NOTOWNED);
145 return;
146 }
147
148 static __inline__ void
149 bond_lock(void)
150 {
151 lck_mtx_lock(bond_lck_mtx);
152 return;
153 }
154
155 static __inline__ void
156 bond_unlock(void)
157 {
158 lck_mtx_unlock(bond_lck_mtx);
159 return;
160 }
161
162 /**
163 ** bond structures, types
164 **/
165
166 struct LAG_info_s {
167 lacp_system li_system;
168 lacp_system_priority li_system_priority;
169 lacp_key li_key;
170 };
171 typedef struct LAG_info_s LAG_info, * LAG_info_ref;
172
173 struct bondport_s;
174 TAILQ_HEAD(port_list, bondport_s);
175 struct ifbond_s;
176 TAILQ_HEAD(ifbond_list, ifbond_s);
177 struct LAG_s;
178 TAILQ_HEAD(lag_list, LAG_s);
179
180 typedef struct ifbond_s ifbond, * ifbond_ref;
181 typedef struct bondport_s bondport, * bondport_ref;
182
183 struct LAG_s {
184 TAILQ_ENTRY(LAG_s) lag_list;
185 struct port_list lag_port_list;
186 short lag_port_count;
187 short lag_selected_port_count;
188 int lag_active_media;
189 LAG_info lag_info;
190 };
191 typedef struct LAG_s LAG, * LAG_ref;
192
193 typedef struct partner_state_s {
194 LAG_info ps_lag_info;
195 lacp_port ps_port;
196 lacp_port_priority ps_port_priority;
197 lacp_actor_partner_state ps_state;
198 } partner_state, * partner_state_ref;
199
200 struct ifbond_s {
201 TAILQ_ENTRY(ifbond_s) ifb_bond_list;
202 int ifb_flags;
203 SInt32 ifb_retain_count;
204 char ifb_name[IFNAMSIZ];
205 struct ifnet * ifb_ifp;
206 bpf_packet_func ifb_bpf_input;
207 bpf_packet_func ifb_bpf_output;
208 int ifb_altmtu;
209 struct port_list ifb_port_list;
210 short ifb_port_count;
211 struct lag_list ifb_lag_list;
212 lacp_key ifb_key;
213 short ifb_max_active; /* 0 == unlimited */
214 LAG_ref ifb_active_lag;
215 struct ifmultiaddr * ifb_ifma_slow_proto;
216 bondport_ref * ifb_distributing_array;
217 int ifb_distributing_count;
218 int ifb_last_link_event;
219 int ifb_mode; /* LACP, STATIC */
220 };
221
222 struct media_info {
223 int mi_active;
224 int mi_status;
225 };
226
227 enum {
228 ReceiveState_none = 0,
229 ReceiveState_INITIALIZE = 1,
230 ReceiveState_PORT_DISABLED = 2,
231 ReceiveState_EXPIRED = 3,
232 ReceiveState_LACP_DISABLED = 4,
233 ReceiveState_DEFAULTED = 5,
234 ReceiveState_CURRENT = 6,
235 };
236
237 typedef u_char ReceiveState;
238
239 enum {
240 SelectedState_UNSELECTED = IF_BOND_STATUS_SELECTED_STATE_UNSELECTED,
241 SelectedState_SELECTED = IF_BOND_STATUS_SELECTED_STATE_SELECTED,
242 SelectedState_STANDBY = IF_BOND_STATUS_SELECTED_STATE_STANDBY
243 };
244 typedef u_char SelectedState;
245
246 static __inline__ const char *
247 SelectedStateString(SelectedState s)
248 {
249 static const char * names[] = { "UNSELECTED", "SELECTED", "STANDBY" };
250
251 if (s <= SelectedState_STANDBY) {
252 return (names[s]);
253 }
254 return ("<unknown>");
255 }
256
257 enum {
258 MuxState_none = 0,
259 MuxState_DETACHED = 1,
260 MuxState_WAITING = 2,
261 MuxState_ATTACHED = 3,
262 MuxState_COLLECTING_DISTRIBUTING = 4,
263 };
264
265 typedef u_char MuxState;
266
267 struct bondport_s {
268 TAILQ_ENTRY(bondport_s) po_port_list;
269 ifbond_ref po_bond;
270 struct multicast_list po_multicast;
271 struct ifnet * po_ifp;
272 struct ether_addr po_saved_addr;
273 int po_enabled;
274 char po_name[IFNAMSIZ];
275 struct ifdevmtu po_devmtu;
276
277 /* LACP */
278 TAILQ_ENTRY(bondport_s) po_lag_port_list;
279 devtimer_ref po_current_while_timer;
280 devtimer_ref po_periodic_timer;
281 devtimer_ref po_wait_while_timer;
282 devtimer_ref po_transmit_timer;
283 partner_state po_partner_state;
284 lacp_port_priority po_priority;
285 lacp_actor_partner_state po_actor_state;
286 u_char po_flags;
287 u_char po_periodic_interval;
288 u_char po_n_transmit;
289 ReceiveState po_receive_state;
290 MuxState po_mux_state;
291 SelectedState po_selected;
292 int32_t po_last_transmit_secs;
293 struct media_info po_media_info;
294 LAG_ref po_lag;
295 };
296
297 #define IFBF_PROMISC 0x1 /* promiscuous mode */
298 #define IFBF_IF_DETACHING 0x2 /* interface is detaching */
299 #define IFBF_LLADDR 0x4 /* specific link address requested */
300 #define IFBF_CHANGE_IN_PROGRESS 0x8 /* interface add/remove in progress */
301
302 static int bond_get_status(ifbond_ref ifb, struct if_bond_req * ibr_p,
303 user_addr_t datap);
304
305 static __inline__ int
306 ifbond_flags_if_detaching(ifbond_ref ifb)
307 {
308 return ((ifb->ifb_flags & IFBF_IF_DETACHING) != 0);
309 }
310
311 static __inline__ void
312 ifbond_flags_set_if_detaching(ifbond_ref ifb)
313 {
314 ifb->ifb_flags |= IFBF_IF_DETACHING;
315 return;
316 }
317
318 static __inline__ int
319 ifbond_flags_lladdr(ifbond_ref ifb)
320 {
321 return ((ifb->ifb_flags & IFBF_LLADDR) != 0);
322 }
323
324 static __inline__ int
325 ifbond_flags_change_in_progress(ifbond_ref ifb)
326 {
327 return ((ifb->ifb_flags & IFBF_CHANGE_IN_PROGRESS) != 0);
328 }
329
330 static __inline__ void
331 ifbond_flags_set_change_in_progress(ifbond_ref ifb)
332 {
333 ifb->ifb_flags |= IFBF_CHANGE_IN_PROGRESS;
334 return;
335 }
336
337 static __inline__ void
338 ifbond_flags_clear_change_in_progress(ifbond_ref ifb)
339 {
340 ifb->ifb_flags &= ~IFBF_CHANGE_IN_PROGRESS;
341 return;
342 }
343
344 /*
345 * bondport_ref->po_flags bits
346 */
347 #define BONDPORT_FLAGS_NTT 0x01
348 #define BONDPORT_FLAGS_READY 0x02
349 #define BONDPORT_FLAGS_SELECTED_CHANGED 0x04
350 #define BONDPORT_FLAGS_MUX_ATTACHED 0x08
351 #define BONDPORT_FLAGS_DISTRIBUTING 0x10
352 #define BONDPORT_FLAGS_UNUSED2 0x20
353 #define BONDPORT_FLAGS_UNUSED3 0x40
354 #define BONDPORT_FLAGS_UNUSED4 0x80
355
356 static __inline__ void
357 bondport_flags_set_ntt(bondport_ref p)
358 {
359 p->po_flags |= BONDPORT_FLAGS_NTT;
360 return;
361 }
362
363 static __inline__ void
364 bondport_flags_clear_ntt(bondport_ref p)
365 {
366 p->po_flags &= ~BONDPORT_FLAGS_NTT;
367 return;
368 }
369
370 static __inline__ int
371 bondport_flags_ntt(bondport_ref p)
372 {
373 return ((p->po_flags & BONDPORT_FLAGS_NTT) != 0);
374 }
375
376 static __inline__ void
377 bondport_flags_set_ready(bondport_ref p)
378 {
379 p->po_flags |= BONDPORT_FLAGS_READY;
380 return;
381 }
382
383 static __inline__ void
384 bondport_flags_clear_ready(bondport_ref p)
385 {
386 p->po_flags &= ~BONDPORT_FLAGS_READY;
387 return;
388 }
389
390 static __inline__ int
391 bondport_flags_ready(bondport_ref p)
392 {
393 return ((p->po_flags & BONDPORT_FLAGS_READY) != 0);
394 }
395
396 static __inline__ void
397 bondport_flags_set_selected_changed(bondport_ref p)
398 {
399 p->po_flags |= BONDPORT_FLAGS_SELECTED_CHANGED;
400 return;
401 }
402
403 static __inline__ void
404 bondport_flags_clear_selected_changed(bondport_ref p)
405 {
406 p->po_flags &= ~BONDPORT_FLAGS_SELECTED_CHANGED;
407 return;
408 }
409
410 static __inline__ int
411 bondport_flags_selected_changed(bondport_ref p)
412 {
413 return ((p->po_flags & BONDPORT_FLAGS_SELECTED_CHANGED) != 0);
414 }
415
416 static __inline__ void
417 bondport_flags_set_mux_attached(bondport_ref p)
418 {
419 p->po_flags |= BONDPORT_FLAGS_MUX_ATTACHED;
420 return;
421 }
422
423 static __inline__ void
424 bondport_flags_clear_mux_attached(bondport_ref p)
425 {
426 p->po_flags &= ~BONDPORT_FLAGS_MUX_ATTACHED;
427 return;
428 }
429
430 static __inline__ int
431 bondport_flags_mux_attached(bondport_ref p)
432 {
433 return ((p->po_flags & BONDPORT_FLAGS_MUX_ATTACHED) != 0);
434 }
435
436 static __inline__ void
437 bondport_flags_set_distributing(bondport_ref p)
438 {
439 p->po_flags |= BONDPORT_FLAGS_DISTRIBUTING;
440 return;
441 }
442
443 static __inline__ void
444 bondport_flags_clear_distributing(bondport_ref p)
445 {
446 p->po_flags &= ~BONDPORT_FLAGS_DISTRIBUTING;
447 return;
448 }
449
450 static __inline__ int
451 bondport_flags_distributing(bondport_ref p)
452 {
453 return ((p->po_flags & BONDPORT_FLAGS_DISTRIBUTING) != 0);
454 }
455
456 typedef struct bond_globals_s {
457 struct ifbond_list ifbond_list;
458 lacp_system system;
459 lacp_system_priority system_priority;
460 int verbose;
461 } * bond_globals_ref;
462
463 static bond_globals_ref g_bond;
464
465 /**
466 ** packet_buffer routines
467 ** - thin wrapper for mbuf
468 **/
469
470 typedef struct mbuf * packet_buffer_ref;
471
472 static packet_buffer_ref
473 packet_buffer_allocate(int length)
474 {
475 packet_buffer_ref m;
476 int size;
477
478 /* leave room for ethernet header */
479 size = length + sizeof(struct ether_header);
480 if (size > (int)MHLEN) {
481 if (size > (int)MCLBYTES) {
482 printf("bond: packet_buffer_allocate size %d > max %u\n",
483 size, MCLBYTES);
484 return (NULL);
485 }
486 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
487 } else {
488 m = m_gethdr(M_WAITOK, MT_DATA);
489 }
490 if (m == NULL) {
491 return (NULL);
492 }
493 m->m_len = size;
494 m->m_pkthdr.len = size;
495 return (m);
496 }
497
498 static void *
499 packet_buffer_byteptr(packet_buffer_ref buf)
500 {
501 return (buf->m_data + sizeof(struct ether_header));
502 }
503
504 typedef enum {
505 LAEventStart,
506 LAEventTimeout,
507 LAEventPacket,
508 LAEventMediaChange,
509 LAEventSelectedChange,
510 LAEventPortMoved,
511 LAEventReady
512 } LAEvent;
513
514 /**
515 ** Receive machine
516 **/
517 static void
518 bondport_receive_machine(bondport_ref p, LAEvent event,
519 void * event_data);
520 /**
521 ** Periodic Transmission machine
522 **/
523 static void
524 bondport_periodic_transmit_machine(bondport_ref p, LAEvent event,
525 void * event_data);
526
527 /**
528 ** Transmit machine
529 **/
530 #define TRANSMIT_MACHINE_TX_IMMEDIATE ((void *)1)
531
532 static void
533 bondport_transmit_machine(bondport_ref p, LAEvent event,
534 void * event_data);
535
536 /**
537 ** Mux machine
538 **/
539 static void
540 bondport_mux_machine(bondport_ref p, LAEvent event,
541 void * event_data);
542
543 /**
544 ** bond, LAG
545 **/
546 static void
547 ifbond_activate_LAG(ifbond_ref bond, LAG_ref lag, int active_media);
548
549 static void
550 ifbond_deactivate_LAG(ifbond_ref bond, LAG_ref lag);
551
552 static int
553 ifbond_all_ports_ready(ifbond_ref bond);
554
555 static LAG_ref
556 ifbond_find_best_LAG(ifbond_ref bond, int * active_media);
557
558 static int
559 LAG_get_aggregatable_port_count(LAG_ref lag, int * active_media);
560
561 static int
562 ifbond_selection(ifbond_ref bond);
563
564
565 /**
566 ** bondport
567 **/
568
569 static void
570 bondport_receive_lacpdu(bondport_ref p, lacpdu_ref in_lacpdu_p);
571
572 static void
573 bondport_slow_proto_transmit(bondport_ref p, packet_buffer_ref buf);
574
575 static bondport_ref
576 bondport_create(struct ifnet * port_ifp, lacp_port_priority priority,
577 int active, int short_timeout, int * error);
578 static void
579 bondport_start(bondport_ref p);
580
581 static void
582 bondport_free(bondport_ref p);
583
584 static int
585 bondport_aggregatable(bondport_ref p);
586
587 static int
588 bondport_remove_from_LAG(bondport_ref p);
589
590 static void
591 bondport_set_selected(bondport_ref p, SelectedState s);
592
593 static int
594 bondport_matches_LAG(bondport_ref p, LAG_ref lag);
595
596 static void
597 bondport_link_status_changed(bondport_ref p);
598
599 static void
600 bondport_enable_distributing(bondport_ref p);
601
602 static void
603 bondport_disable_distributing(bondport_ref p);
604
605 static __inline__ int
606 bondport_collecting(bondport_ref p)
607 {
608 if (p->po_bond->ifb_mode == IF_BOND_MODE_LACP) {
609 return (lacp_actor_partner_state_collecting(p->po_actor_state));
610 }
611 return (TRUE);
612 }
613
614 /**
615 ** bond interface/dlil specific routines
616 **/
617 static int bond_clone_create(struct if_clone *, u_int32_t, void *);
618 static int bond_clone_destroy(struct ifnet *);
619 static int bond_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t m,
620 char *frame_header);
621 static int bond_output(struct ifnet *ifp, struct mbuf *m);
622 static int bond_ioctl(struct ifnet *ifp, u_long cmd, void * addr);
623 static int bond_set_bpf_tap(struct ifnet * ifp, bpf_tap_mode mode,
624 bpf_packet_func func);
625 static int bond_attach_protocol(struct ifnet *ifp);
626 static int bond_detach_protocol(struct ifnet *ifp);
627 static int bond_setmulti(struct ifnet *ifp);
628 static int bond_add_interface(struct ifnet * ifp, struct ifnet * port_ifp);
629 static int bond_remove_interface(ifbond_ref ifb, struct ifnet * port_ifp);
630 static void bond_if_free(struct ifnet * ifp);
631
632 static struct if_clone bond_cloner = IF_CLONE_INITIALIZER(BONDNAME,
633 bond_clone_create,
634 bond_clone_destroy,
635 0,
636 BOND_MAXUNIT);
637 static void interface_link_event(struct ifnet * ifp, u_int32_t event_code);
638
639 static int
640 siocsifmtu(struct ifnet * ifp, int mtu)
641 {
642 struct ifreq ifr;
643
644 bzero(&ifr, sizeof(ifr));
645 ifr.ifr_mtu = mtu;
646 return (ifnet_ioctl(ifp, 0, SIOCSIFMTU, &ifr));
647 }
648
649 static int
650 siocgifdevmtu(struct ifnet * ifp, struct ifdevmtu * ifdm_p)
651 {
652 struct ifreq ifr;
653 int error;
654
655 bzero(&ifr, sizeof(ifr));
656 error = ifnet_ioctl(ifp, 0, SIOCGIFDEVMTU, &ifr);
657 if (error == 0) {
658 *ifdm_p = ifr.ifr_devmtu;
659 }
660 return (error);
661 }
662
663 static __inline__ void
664 ether_addr_copy(void * dest, const void * source)
665 {
666 bcopy(source, dest, ETHER_ADDR_LEN);
667 return;
668 }
669
670 static __inline__ void
671 ifbond_retain(ifbond_ref ifb)
672 {
673 OSIncrementAtomic(&ifb->ifb_retain_count);
674 }
675
676 static __inline__ void
677 ifbond_release(ifbond_ref ifb)
678 {
679 UInt32 old_retain_count;
680
681 old_retain_count = OSDecrementAtomic(&ifb->ifb_retain_count);
682 switch (old_retain_count) {
683 case 0:
684 panic("ifbond_release: retain count is 0\n");
685 break;
686 case 1:
687 if (g_bond->verbose) {
688 printf("ifbond_release(%s)\n", ifb->ifb_name);
689 }
690 if (ifb->ifb_ifma_slow_proto != NULL) {
691 if (g_bond->verbose) {
692 printf("ifbond_release(%s) removing multicast\n",
693 ifb->ifb_name);
694 }
695 (void) if_delmulti_anon(ifb->ifb_ifma_slow_proto->ifma_ifp,
696 ifb->ifb_ifma_slow_proto->ifma_addr);
697 IFMA_REMREF(ifb->ifb_ifma_slow_proto);
698 }
699 if (ifb->ifb_distributing_array != NULL) {
700 FREE(ifb->ifb_distributing_array, M_BOND);
701 }
702 FREE(ifb, M_BOND);
703 break;
704 default:
705 break;
706 }
707 return;
708 }
709
710 /*
711 * Function: ifbond_wait
712 * Purpose:
713 * Allows a single thread to gain exclusive access to the ifbond
714 * data structure. Some operations take a long time to complete,
715 * and some have side-effects that we can't predict. Holding the
716 * bond_lock() across such operations is not possible.
717 *
718 * For example:
719 * 1) The SIOCSIFLLADDR ioctl takes a long time (several seconds) to
720 * complete. Simply holding the bond_lock() would freeze all other
721 * data structure accesses during that time.
722 * 2) When we attach our protocol to the interface, a dlil event is
723 * generated and invokes our bond_event() function. bond_event()
724 * needs to take the bond_lock(), but we're already holding it, so
725 * we're deadlocked against ourselves.
726 * Notes:
727 * Before calling, you must be holding the bond_lock and have taken
728 * a reference on the ifbond_ref.
729 */
730 static void
731 ifbond_wait(ifbond_ref ifb, const char * msg)
732 {
733 int waited = 0;
734
735 /* other add/remove in progress */
736 while (ifbond_flags_change_in_progress(ifb)) {
737 if (g_bond->verbose) {
738 printf("%s: %s msleep\n", ifb->ifb_name, msg);
739 }
740 waited = 1;
741 (void)msleep(ifb, bond_lck_mtx, PZERO, msg, 0);
742 }
743 /* prevent other bond list remove/add from taking place */
744 ifbond_flags_set_change_in_progress(ifb);
745 if (g_bond->verbose && waited) {
746 printf("%s: %s woke up\n", ifb->ifb_name, msg);
747 }
748 return;
749 }
750
751 /*
752 * Function: ifbond_signal
753 * Purpose:
754 * Allows the thread that previously invoked ifbond_wait() to
755 * give up exclusive access to the ifbond data structure, and wake up
756 * any other threads waiting to access
757 * Notes:
758 * Before calling, you must be holding the bond_lock and have taken
759 * a reference on the ifbond_ref.
760 */
761 static void
762 ifbond_signal(ifbond_ref ifb, const char * msg)
763 {
764 ifbond_flags_clear_change_in_progress(ifb);
765 wakeup((caddr_t)ifb);
766 if (g_bond->verbose) {
767 printf("%s: %s wakeup\n", ifb->ifb_name, msg);
768 }
769 return;
770 }
771
772 /**
773 ** Media information
774 **/
775
776 static int
777 link_speed(int active)
778 {
779 switch (IFM_SUBTYPE(active)) {
780 case IFM_10_T:
781 case IFM_10_2:
782 case IFM_10_5:
783 case IFM_10_STP:
784 case IFM_10_FL:
785 return (10);
786 case IFM_100_TX:
787 case IFM_100_FX:
788 case IFM_100_T4:
789 case IFM_100_VG:
790 case IFM_100_T2:
791 return (100);
792 case IFM_1000_SX:
793 case IFM_1000_LX:
794 case IFM_1000_CX:
795 case IFM_1000_TX:
796 return (1000);
797 case IFM_HPNA_1:
798 return (0);
799 default:
800 /* assume that new defined types are going to be at least 10GigE */
801 case IFM_10G_SR:
802 case IFM_10G_LR:
803 return (10000);
804 case IFM_2500_T:
805 return (2500);
806 case IFM_5000_T:
807 return (5000);
808 }
809 }
810
811 static __inline__ int
812 media_active(const struct media_info * mi)
813 {
814 if ((mi->mi_status & IFM_AVALID) == 0) {
815 return (1);
816 }
817 return ((mi->mi_status & IFM_ACTIVE) != 0);
818 }
819
820 static __inline__ int
821 media_full_duplex(const struct media_info * mi)
822 {
823 return ((mi->mi_active & IFM_FDX) != 0);
824 }
825
826 static __inline__ int
827 media_speed(const struct media_info * mi)
828 {
829 return (link_speed(mi->mi_active));
830 }
831
832 static struct media_info
833 interface_media_info(struct ifnet * ifp)
834 {
835 struct ifmediareq ifmr;
836 struct media_info mi;
837
838 bzero(&mi, sizeof(mi));
839 bzero(&ifmr, sizeof(ifmr));
840 if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
841 if (ifmr.ifm_count != 0) {
842 mi.mi_status = ifmr.ifm_status;
843 mi.mi_active = ifmr.ifm_active;
844 }
845 }
846 return (mi);
847 }
848
849 static int
850 if_siflladdr(struct ifnet * ifp, const struct ether_addr * ea_p)
851 {
852 struct ifreq ifr;
853
854 /*
855 * XXX setting the sa_len to ETHER_ADDR_LEN is wrong, but the driver
856 * currently expects it that way
857 */
858 ifr.ifr_addr.sa_family = AF_UNSPEC;
859 ifr.ifr_addr.sa_len = ETHER_ADDR_LEN;
860 ether_addr_copy(ifr.ifr_addr.sa_data, ea_p);
861 return (ifnet_ioctl(ifp, 0, SIOCSIFLLADDR, &ifr));
862 }
863
864 /**
865 ** bond_globals
866 **/
867 static bond_globals_ref
868 bond_globals_create(lacp_system_priority sys_pri,
869 lacp_system_ref sys)
870 {
871 bond_globals_ref b;
872
873 b = _MALLOC(sizeof(*b), M_BOND, M_WAITOK | M_ZERO);
874 if (b == NULL) {
875 return (NULL);
876 }
877 TAILQ_INIT(&b->ifbond_list);
878 b->system = *sys;
879 b->system_priority = sys_pri;
880 return (b);
881 }
882
883 static int
884 bond_globals_init(void)
885 {
886 bond_globals_ref b;
887 int i;
888 struct ifnet * ifp;
889
890 bond_assert_lock_not_held();
891
892 if (g_bond != NULL) {
893 return (0);
894 }
895
896 /*
897 * use en0's ethernet address as the system identifier, and if it's not
898 * there, use en1 .. en3
899 */
900 ifp = NULL;
901 for (i = 0; i < 4; i++) {
902 char ifname[IFNAMSIZ+1];
903 snprintf(ifname, sizeof(ifname), "en%d", i);
904 ifp = ifunit(ifname);
905 if (ifp != NULL) {
906 break;
907 }
908 }
909 b = NULL;
910 if (ifp != NULL) {
911 b = bond_globals_create(0x8000, (lacp_system_ref)IF_LLADDR(ifp));
912 }
913 bond_lock();
914 if (g_bond != NULL) {
915 bond_unlock();
916 _FREE(b, M_BOND);
917 return (0);
918 }
919 g_bond = b;
920 bond_unlock();
921 if (ifp == NULL) {
922 return (ENXIO);
923 }
924 if (b == NULL) {
925 return (ENOMEM);
926 }
927 return (0);
928 }
929
930 static void
931 bond_bpf_vlan(struct ifnet * ifp, struct mbuf * m,
932 const struct ether_header * eh_p,
933 u_int16_t vlan_tag, bpf_packet_func func)
934 {
935 struct ether_vlan_header * vlh_p;
936 struct mbuf * vl_m;
937
938 vl_m = m_get(M_DONTWAIT, MT_DATA);
939 if (vl_m == NULL) {
940 return;
941 }
942 /* populate a new mbuf containing the vlan ethernet header */
943 vl_m->m_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
944 vlh_p = mtod(vl_m, struct ether_vlan_header *);
945 bcopy(eh_p, vlh_p, offsetof(struct ether_header, ether_type));
946 vlh_p->evl_encap_proto = htons(ETHERTYPE_VLAN);
947 vlh_p->evl_tag = htons(vlan_tag);
948 vlh_p->evl_proto = eh_p->ether_type;
949 vl_m->m_next = m;
950 (*func)(ifp, vl_m);
951 vl_m->m_next = NULL;
952 m_free(vl_m);
953 return;
954 }
955
956 static __inline__ void
957 bond_bpf_output(struct ifnet * ifp, struct mbuf * m,
958 bpf_packet_func func)
959 {
960 if (func != NULL) {
961 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) {
962 const struct ether_header * eh_p;
963 eh_p = mtod(m, const struct ether_header *);
964 m->m_data += ETHER_HDR_LEN;
965 m->m_len -= ETHER_HDR_LEN;
966 bond_bpf_vlan(ifp, m, eh_p, m->m_pkthdr.vlan_tag, func);
967 m->m_data -= ETHER_HDR_LEN;
968 m->m_len += ETHER_HDR_LEN;
969 } else {
970 (*func)(ifp, m);
971 }
972 }
973 return;
974 }
975
976 static __inline__ void
977 bond_bpf_input(ifnet_t ifp, mbuf_t m, const struct ether_header * eh_p,
978 bpf_packet_func func)
979 {
980 if (func != NULL) {
981 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) {
982 bond_bpf_vlan(ifp, m, eh_p, m->m_pkthdr.vlan_tag, func);
983 } else {
984 /* restore the header */
985 m->m_data -= ETHER_HDR_LEN;
986 m->m_len += ETHER_HDR_LEN;
987 (*func)(ifp, m);
988 m->m_data += ETHER_HDR_LEN;
989 m->m_len -= ETHER_HDR_LEN;
990 }
991 }
992 return;
993 }
994
995 /*
996 * Function: bond_setmulti
997 * Purpose:
998 * Enable multicast reception on "our" interface by enabling multicasts on
999 * each of the member ports.
1000 */
1001 static int
1002 bond_setmulti(struct ifnet * ifp)
1003 {
1004 ifbond_ref ifb;
1005 int error;
1006 int result = 0;
1007 bondport_ref p;
1008
1009 bond_lock();
1010 ifb = ifnet_softc(ifp);
1011 if (ifb == NULL || ifbond_flags_if_detaching(ifb)
1012 || TAILQ_EMPTY(&ifb->ifb_port_list)) {
1013 bond_unlock();
1014 return (0);
1015 }
1016 ifbond_retain(ifb);
1017 ifbond_wait(ifb, "bond_setmulti");
1018
1019 if (ifbond_flags_if_detaching(ifb)) {
1020 /* someone destroyed the bond while we were waiting */
1021 result = EBUSY;
1022 goto signal_done;
1023 }
1024 bond_unlock();
1025
1026 /* ifbond_wait() let's us safely walk the list without holding the lock */
1027 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
1028 struct ifnet * port_ifp = p->po_ifp;
1029
1030 error = multicast_list_program(&p->po_multicast,
1031 ifp, port_ifp);
1032 if (error != 0) {
1033 printf("bond_setmulti(%s): "
1034 "multicast_list_program(%s%d) failed, %d\n",
1035 ifb->ifb_name, ifnet_name(port_ifp),
1036 ifnet_unit(port_ifp), error);
1037 result = error;
1038 }
1039 }
1040 bond_lock();
1041 signal_done:
1042 ifbond_signal(ifb, "bond_setmulti");
1043 bond_unlock();
1044 ifbond_release(ifb);
1045 return (result);
1046 }
1047
1048 static int
1049 bond_clone_attach(void)
1050 {
1051 int error;
1052
1053 if ((error = if_clone_attach(&bond_cloner)) != 0)
1054 return error;
1055 bond_lock_init();
1056 return 0;
1057 }
1058
1059 static int
1060 ifbond_add_slow_proto_multicast(ifbond_ref ifb)
1061 {
1062 int error;
1063 struct ifmultiaddr * ifma = NULL;
1064 struct sockaddr_dl sdl;
1065
1066 bond_assert_lock_not_held();
1067
1068 bzero(&sdl, sizeof(sdl));
1069 sdl.sdl_len = sizeof(sdl);
1070 sdl.sdl_family = AF_LINK;
1071 sdl.sdl_type = IFT_ETHER;
1072 sdl.sdl_nlen = 0;
1073 sdl.sdl_alen = sizeof(slow_proto_multicast);
1074 bcopy(&slow_proto_multicast, sdl.sdl_data, sizeof(slow_proto_multicast));
1075 error = if_addmulti_anon(ifb->ifb_ifp, (struct sockaddr *)&sdl, &ifma);
1076 if (error == 0) {
1077 ifb->ifb_ifma_slow_proto = ifma;
1078 }
1079 return (error);
1080 }
1081
1082 static int
1083 bond_clone_create(struct if_clone * ifc, u_int32_t unit, __unused void *params)
1084 {
1085 int error;
1086 ifbond_ref ifb;
1087 ifnet_t ifp;
1088 struct ifnet_init_eparams bond_init;
1089
1090 error = bond_globals_init();
1091 if (error != 0) {
1092 return (error);
1093 }
1094
1095 ifb = _MALLOC(sizeof(ifbond), M_BOND, M_WAITOK | M_ZERO);
1096 if (ifb == NULL) {
1097 return (ENOMEM);
1098 }
1099
1100 ifbond_retain(ifb);
1101 TAILQ_INIT(&ifb->ifb_port_list);
1102 TAILQ_INIT(&ifb->ifb_lag_list);
1103 ifb->ifb_key = unit + 1;
1104
1105 /* use the interface name as the unique id for ifp recycle */
1106 if ((u_int32_t)snprintf(ifb->ifb_name, sizeof(ifb->ifb_name), "%s%d",
1107 ifc->ifc_name, unit) >= sizeof(ifb->ifb_name)) {
1108 ifbond_release(ifb);
1109 return (EINVAL);
1110 }
1111
1112 bzero(&bond_init, sizeof(bond_init));
1113 bond_init.ver = IFNET_INIT_CURRENT_VERSION;
1114 bond_init.len = sizeof (bond_init);
1115 bond_init.flags = IFNET_INIT_LEGACY;
1116 bond_init.uniqueid = ifb->ifb_name;
1117 bond_init.uniqueid_len = strlen(ifb->ifb_name);
1118 bond_init.name = ifc->ifc_name;
1119 bond_init.unit = unit;
1120 bond_init.family = IFNET_FAMILY_BOND;
1121 bond_init.type = IFT_IEEE8023ADLAG;
1122 bond_init.output = bond_output;
1123 bond_init.demux = ether_demux;
1124 bond_init.add_proto = ether_add_proto;
1125 bond_init.del_proto = ether_del_proto;
1126 bond_init.check_multi = ether_check_multi;
1127 bond_init.framer_extended = ether_frameout_extended;
1128 bond_init.ioctl = bond_ioctl;
1129 bond_init.set_bpf_tap = bond_set_bpf_tap;
1130 bond_init.detach = bond_if_free;
1131 bond_init.broadcast_addr = etherbroadcastaddr;
1132 bond_init.broadcast_len = ETHER_ADDR_LEN;
1133 bond_init.softc = ifb;
1134 error = ifnet_allocate_extended(&bond_init, &ifp);
1135
1136 if (error) {
1137 ifbond_release(ifb);
1138 return (error);
1139 }
1140
1141 ifb->ifb_ifp = ifp;
1142 ifnet_set_offload(ifp, 0);
1143 ifnet_set_addrlen(ifp, ETHER_ADDR_LEN); /* XXX ethernet specific */
1144 ifnet_set_flags(ifp, IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX, 0xffff);
1145 ifnet_set_baudrate(ifp, 0);
1146 ifnet_set_mtu(ifp, 0);
1147
1148 error = ifnet_attach(ifp, NULL);
1149 if (error != 0) {
1150 ifnet_release(ifp);
1151 ifbond_release(ifb);
1152 return (error);
1153 }
1154 error = ifbond_add_slow_proto_multicast(ifb);
1155 if (error != 0) {
1156 printf("bond_clone_create(%s): "
1157 "failed to add slow_proto multicast, %d\n",
1158 ifb->ifb_name, error);
1159 }
1160
1161 /* attach as ethernet */
1162 bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header));
1163
1164 bond_lock();
1165 TAILQ_INSERT_HEAD(&g_bond->ifbond_list, ifb, ifb_bond_list);
1166 bond_unlock();
1167
1168 return (0);
1169 }
1170
1171 static void
1172 bond_remove_all_interfaces(ifbond_ref ifb)
1173 {
1174 bondport_ref p;
1175
1176 bond_assert_lock_held();
1177
1178 /*
1179 * do this in reverse order to avoid re-programming the mac address
1180 * as each head interface is removed
1181 */
1182 while ((p = TAILQ_LAST(&ifb->ifb_port_list, port_list)) != NULL) {
1183 bond_remove_interface(ifb, p->po_ifp);
1184 }
1185 return;
1186 }
1187
1188 static void
1189 bond_remove(ifbond_ref ifb)
1190 {
1191 bond_assert_lock_held();
1192 ifbond_flags_set_if_detaching(ifb);
1193 TAILQ_REMOVE(&g_bond->ifbond_list, ifb, ifb_bond_list);
1194 bond_remove_all_interfaces(ifb);
1195 return;
1196 }
1197
1198 static void
1199 bond_if_detach(struct ifnet * ifp)
1200 {
1201 int error;
1202
1203 error = ifnet_detach(ifp);
1204 if (error) {
1205 printf("bond_if_detach %s%d: ifnet_detach failed, %d\n",
1206 ifnet_name(ifp), ifnet_unit(ifp), error);
1207 }
1208
1209 return;
1210 }
1211
1212 static int
1213 bond_clone_destroy(struct ifnet * ifp)
1214 {
1215 ifbond_ref ifb;
1216
1217 bond_lock();
1218 ifb = ifnet_softc(ifp);
1219 if (ifb == NULL || ifnet_type(ifp) != IFT_IEEE8023ADLAG) {
1220 bond_unlock();
1221 return 0;
1222 }
1223 if (ifbond_flags_if_detaching(ifb)) {
1224 bond_unlock();
1225 return 0;
1226 }
1227 bond_remove(ifb);
1228 bond_unlock();
1229 bond_if_detach(ifp);
1230 return 0;
1231 }
1232
1233 static int
1234 bond_set_bpf_tap(struct ifnet * ifp, bpf_tap_mode mode, bpf_packet_func func)
1235 {
1236 ifbond_ref ifb;
1237
1238 bond_lock();
1239 ifb = ifnet_softc(ifp);
1240 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
1241 bond_unlock();
1242 return (ENODEV);
1243 }
1244 switch (mode) {
1245 case BPF_TAP_DISABLE:
1246 ifb->ifb_bpf_input = ifb->ifb_bpf_output = NULL;
1247 break;
1248
1249 case BPF_TAP_INPUT:
1250 ifb->ifb_bpf_input = func;
1251 break;
1252
1253 case BPF_TAP_OUTPUT:
1254 ifb->ifb_bpf_output = func;
1255 break;
1256
1257 case BPF_TAP_INPUT_OUTPUT:
1258 ifb->ifb_bpf_input = ifb->ifb_bpf_output = func;
1259 break;
1260 default:
1261 break;
1262 }
1263 bond_unlock();
1264 return 0;
1265 }
1266
1267 static uint32_t
1268 ether_header_hash(struct ether_header * eh_p)
1269 {
1270 uint32_t h;
1271
1272 /* get 32-bits from destination ether and ether type */
1273 h = (*((uint16_t *)&eh_p->ether_dhost[4]) << 16)
1274 | eh_p->ether_type;
1275 h ^= *((uint32_t *)&eh_p->ether_dhost[0]);
1276 return (h);
1277 }
1278
1279 static struct mbuf *
1280 S_mbuf_skip_to_offset(struct mbuf * m, int32_t * offset)
1281 {
1282 int len;
1283
1284 len = m->m_len;
1285 while (*offset >= len) {
1286 *offset -= len;
1287 m = m->m_next;
1288 if (m == NULL) {
1289 break;
1290 }
1291 len = m->m_len;
1292 }
1293 return (m);
1294 }
1295
1296 #if BYTE_ORDER == BIG_ENDIAN
1297 static __inline__ uint32_t
1298 make_uint32(u_char c0, u_char c1, u_char c2, u_char c3)
1299 {
1300 return (((uint32_t)c0 << 24) | ((uint32_t)c1 << 16)
1301 | ((uint32_t)c2 << 8) | (uint32_t)c3);
1302 }
1303 #else /* BYTE_ORDER == LITTLE_ENDIAN */
1304 static __inline__ uint32_t
1305 make_uint32(u_char c0, u_char c1, u_char c2, u_char c3)
1306 {
1307 return (((uint32_t)c3 << 24) | ((uint32_t)c2 << 16)
1308 | ((uint32_t)c1 << 8) | (uint32_t)c0);
1309 }
1310 #endif /* BYTE_ORDER == LITTLE_ENDIAN */
1311
1312 static int
1313 S_mbuf_copy_uint32(struct mbuf * m, int32_t offset, uint32_t * val)
1314 {
1315 struct mbuf * current;
1316 u_char * current_data;
1317 struct mbuf * next;
1318 u_char * next_data;
1319 int space_current;
1320
1321 current = S_mbuf_skip_to_offset(m, &offset);
1322 if (current == NULL) {
1323 return (1);
1324 }
1325 current_data = mtod(current, u_char *) + offset;
1326 space_current = current->m_len - offset;
1327 if (space_current >= (int)sizeof(uint32_t)) {
1328 *val = *((uint32_t *)current_data);
1329 return (0);
1330 }
1331 next = current->m_next;
1332 if (next == NULL || (next->m_len + space_current) < (int)sizeof(uint32_t)) {
1333 return (1);
1334 }
1335 next_data = mtod(next, u_char *);
1336 switch (space_current) {
1337 case 1:
1338 *val = make_uint32(current_data[0], next_data[0],
1339 next_data[1], next_data[2]);
1340 break;
1341 case 2:
1342 *val = make_uint32(current_data[0], current_data[1],
1343 next_data[0], next_data[1]);
1344 break;
1345 default:
1346 *val = make_uint32(current_data[0], current_data[1],
1347 current_data[2], next_data[0]);
1348 break;
1349 }
1350 return (0);
1351 }
1352
1353 #define IP_SRC_OFFSET (offsetof(struct ip, ip_src) - offsetof(struct ip, ip_p))
1354 #define IP_DST_OFFSET (offsetof(struct ip, ip_dst) - offsetof(struct ip, ip_p))
1355
1356 static uint32_t
1357 ip_header_hash(struct mbuf * m)
1358 {
1359 u_char * data;
1360 struct in_addr ip_dst;
1361 struct in_addr ip_src;
1362 u_char ip_p;
1363 int32_t offset;
1364 struct mbuf * orig_m = m;
1365
1366 /* find the IP protocol field relative to the start of the packet */
1367 offset = offsetof(struct ip, ip_p) + sizeof(struct ether_header);
1368 m = S_mbuf_skip_to_offset(m, &offset);
1369 if (m == NULL || m->m_len < 1) {
1370 goto bad_ip_packet;
1371 }
1372 data = mtod(m, u_char *) + offset;
1373 ip_p = *data;
1374
1375 /* find the IP src relative to the IP protocol */
1376 if ((m->m_len - offset)
1377 >= (int)(IP_SRC_OFFSET + sizeof(struct in_addr) * 2)) {
1378 /* this should be the normal case */
1379 ip_src = *(struct in_addr *)(data + IP_SRC_OFFSET);
1380 ip_dst = *(struct in_addr *)(data + IP_DST_OFFSET);
1381 }
1382 else {
1383 if (S_mbuf_copy_uint32(m, offset + IP_SRC_OFFSET,
1384 (uint32_t *)&ip_src.s_addr)) {
1385 goto bad_ip_packet;
1386 }
1387 if (S_mbuf_copy_uint32(m, offset + IP_DST_OFFSET,
1388 (uint32_t *)&ip_dst.s_addr)) {
1389 goto bad_ip_packet;
1390 }
1391 }
1392 return (ntohl(ip_dst.s_addr) ^ ntohl(ip_src.s_addr) ^ ((uint32_t)ip_p));
1393
1394 bad_ip_packet:
1395 return (ether_header_hash(mtod(orig_m, struct ether_header *)));
1396 }
1397
1398 #define IP6_ADDRS_LEN (sizeof(struct in6_addr) * 2)
1399 static uint32_t
1400 ipv6_header_hash(struct mbuf * m)
1401 {
1402 u_char * data;
1403 int i;
1404 int32_t offset;
1405 struct mbuf * orig_m = m;
1406 uint32_t * scan;
1407 uint32_t val;
1408
1409 /* find the IP protocol field relative to the start of the packet */
1410 offset = offsetof(struct ip6_hdr, ip6_src) + sizeof(struct ether_header);
1411 m = S_mbuf_skip_to_offset(m, &offset);
1412 if (m == NULL) {
1413 goto bad_ipv6_packet;
1414 }
1415 data = mtod(m, u_char *) + offset;
1416 val = 0;
1417 if ((m->m_len - offset) >= (int)IP6_ADDRS_LEN) {
1418 /* this should be the normal case */
1419 for (i = 0, scan = (uint32_t *)data;
1420 i < (int)(IP6_ADDRS_LEN / sizeof(uint32_t));
1421 i++, scan++) {
1422 val ^= *scan;
1423 }
1424 }
1425 else {
1426 for (i = 0; i < (int)(IP6_ADDRS_LEN / sizeof(uint32_t)); i++) {
1427 uint32_t tmp;
1428 if (S_mbuf_copy_uint32(m, offset + i * sizeof(uint32_t),
1429 (uint32_t *)&tmp)) {
1430 goto bad_ipv6_packet;
1431 }
1432 val ^= tmp;
1433 }
1434 }
1435 return (ntohl(val));
1436
1437 bad_ipv6_packet:
1438 return (ether_header_hash(mtod(orig_m, struct ether_header *)));
1439 }
1440
1441 static int
1442 bond_output(struct ifnet * ifp, struct mbuf * m)
1443 {
1444 bpf_packet_func bpf_func;
1445 uint32_t h;
1446 ifbond_ref ifb;
1447 struct ifnet * port_ifp = NULL;
1448 int err;
1449 struct flowadv adv = { FADV_SUCCESS };
1450
1451 if (m == 0) {
1452 return (0);
1453 }
1454 if ((m->m_flags & M_PKTHDR) == 0) {
1455 m_freem(m);
1456 return (0);
1457 }
1458 if (m->m_pkthdr.pkt_flowid != 0) {
1459 h = m->m_pkthdr.pkt_flowid;
1460 }
1461 else {
1462 struct ether_header * eh_p;
1463
1464 eh_p = mtod(m, struct ether_header *);
1465 switch (ntohs(eh_p->ether_type)) {
1466 case ETHERTYPE_IP:
1467 h = ip_header_hash(m);
1468 break;
1469 case ETHERTYPE_IPV6:
1470 h = ipv6_header_hash(m);
1471 break;
1472 default:
1473 h = ether_header_hash(eh_p);
1474 break;
1475 }
1476 }
1477 bond_lock();
1478 ifb = ifnet_softc(ifp);
1479 if (ifb == NULL || ifbond_flags_if_detaching(ifb)
1480 || ifb->ifb_distributing_count == 0) {
1481 goto done;
1482 }
1483 h %= ifb->ifb_distributing_count;
1484 port_ifp = ifb->ifb_distributing_array[h]->po_ifp;
1485 bpf_func = ifb->ifb_bpf_output;
1486 bond_unlock();
1487
1488 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) {
1489 (void)ifnet_stat_increment_out(ifp, 1,
1490 m->m_pkthdr.len + ETHER_VLAN_ENCAP_LEN,
1491 0);
1492 } else {
1493 (void)ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
1494 }
1495 bond_bpf_output(ifp, m, bpf_func);
1496
1497 err = dlil_output(port_ifp, PF_BOND, m, NULL, NULL, 1, &adv);
1498
1499 if (err == 0) {
1500 if (adv.code == FADV_FLOW_CONTROLLED) {
1501 err = EQFULL;
1502 } else if (adv.code == FADV_SUSPENDED) {
1503 err = EQSUSPENDED;
1504 }
1505 }
1506
1507 return (err);
1508
1509 done:
1510 bond_unlock();
1511 m_freem(m);
1512 return (0);
1513 }
1514
1515 static bondport_ref
1516 ifbond_lookup_port(ifbond_ref ifb, struct ifnet * port_ifp)
1517 {
1518 bondport_ref p;
1519 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
1520 if (p->po_ifp == port_ifp) {
1521 return (p);
1522 }
1523 }
1524 return (NULL);
1525 }
1526
1527 static bondport_ref
1528 bond_lookup_port(struct ifnet * port_ifp)
1529 {
1530 ifbond_ref ifb;
1531 bondport_ref port;
1532
1533 TAILQ_FOREACH(ifb, &g_bond->ifbond_list, ifb_bond_list) {
1534 port = ifbond_lookup_port(ifb, port_ifp);
1535 if (port != NULL) {
1536 return (port);
1537 }
1538 }
1539 return (NULL);
1540 }
1541
1542 static void
1543 bond_receive_lacpdu(struct mbuf * m, struct ifnet * port_ifp)
1544 {
1545 struct ifnet * bond_ifp = NULL;
1546 ifbond_ref ifb;
1547 int event_code = 0;
1548 bondport_ref p;
1549
1550 bond_lock();
1551 if ((ifnet_eflags(port_ifp) & IFEF_BOND) == 0) {
1552 goto done;
1553 }
1554 p = bond_lookup_port(port_ifp);
1555 if (p == NULL) {
1556 goto done;
1557 }
1558 if (p->po_enabled == 0) {
1559 goto done;
1560 }
1561 ifb = p->po_bond;
1562 if (ifb->ifb_mode != IF_BOND_MODE_LACP) {
1563 goto done;
1564 }
1565 bondport_receive_lacpdu(p, (lacpdu_ref)m->m_data);
1566 if (ifbond_selection(ifb)) {
1567 event_code = (ifb->ifb_active_lag == NULL)
1568 ? KEV_DL_LINK_OFF
1569 : KEV_DL_LINK_ON;
1570 /* XXX need to take a reference on bond_ifp */
1571 bond_ifp = ifb->ifb_ifp;
1572 ifb->ifb_last_link_event = event_code;
1573 }
1574 else {
1575 event_code = (ifb->ifb_active_lag == NULL)
1576 ? KEV_DL_LINK_OFF
1577 : KEV_DL_LINK_ON;
1578 if (event_code != ifb->ifb_last_link_event) {
1579 if (g_bond->verbose) {
1580 timestamp_printf("%s: (receive) generating LINK event\n",
1581 ifb->ifb_name);
1582 }
1583 bond_ifp = ifb->ifb_ifp;
1584 ifb->ifb_last_link_event = event_code;
1585 }
1586 }
1587
1588 done:
1589 bond_unlock();
1590 if (bond_ifp != NULL) {
1591 interface_link_event(bond_ifp, event_code);
1592 }
1593 m_freem(m);
1594 return;
1595 }
1596
1597 static void
1598 bond_receive_la_marker_pdu(struct mbuf * m, struct ifnet * port_ifp)
1599 {
1600 la_marker_pdu_ref marker_p;
1601 bondport_ref p;
1602
1603 marker_p = (la_marker_pdu_ref)(m->m_data + ETHER_HDR_LEN);
1604 if (marker_p->lm_marker_tlv_type != LA_MARKER_TLV_TYPE_MARKER) {
1605 goto failed;
1606 }
1607 bond_lock();
1608 if ((ifnet_eflags(port_ifp) & IFEF_BOND) == 0) {
1609 bond_unlock();
1610 goto failed;
1611 }
1612 p = bond_lookup_port(port_ifp);
1613 if (p == NULL || p->po_enabled == 0
1614 || p->po_bond->ifb_mode != IF_BOND_MODE_LACP) {
1615 bond_unlock();
1616 goto failed;
1617 }
1618 /* echo back the same packet as a marker response */
1619 marker_p->lm_marker_tlv_type = LA_MARKER_TLV_TYPE_MARKER_RESPONSE;
1620 bondport_slow_proto_transmit(p, (packet_buffer_ref)m);
1621 bond_unlock();
1622 return;
1623
1624 failed:
1625 m_freem(m);
1626 return;
1627 }
1628
1629 static int
1630 bond_input(ifnet_t port_ifp, __unused protocol_family_t protocol, mbuf_t m,
1631 char * frame_header)
1632 {
1633 bpf_packet_func bpf_func;
1634 const struct ether_header * eh_p;
1635 ifbond_ref ifb;
1636 struct ifnet * ifp;
1637 bondport_ref p;
1638
1639 eh_p = (const struct ether_header *)frame_header;
1640 if ((m->m_flags & M_MCAST) != 0
1641 && bcmp(eh_p->ether_dhost, &slow_proto_multicast,
1642 sizeof(eh_p->ether_dhost)) == 0
1643 && ntohs(eh_p->ether_type) == IEEE8023AD_SLOW_PROTO_ETHERTYPE) {
1644 u_char subtype = *mtod(m, u_char *);
1645
1646 if (subtype == IEEE8023AD_SLOW_PROTO_SUBTYPE_LACP) {
1647 if (m->m_pkthdr.len < (int)offsetof(lacpdu, la_reserved)) {
1648 m_freem(m);
1649 return (0);
1650 }
1651 /* send to lacp */
1652 if (m->m_len < (int)offsetof(lacpdu, la_reserved)) {
1653 m = m_pullup(m, offsetof(lacpdu, la_reserved));
1654 if (m == NULL) {
1655 return (0);
1656 }
1657 }
1658 bond_receive_lacpdu(m, port_ifp);
1659 return (0);
1660 }
1661 else if (subtype == IEEE8023AD_SLOW_PROTO_SUBTYPE_LA_MARKER_PROTOCOL) {
1662 int min_size;
1663
1664 /* restore the ethernet header pointer in the mbuf */
1665 m->m_pkthdr.len += ETHER_HDR_LEN;
1666 m->m_data -= ETHER_HDR_LEN;
1667 m->m_len += ETHER_HDR_LEN;
1668 min_size = ETHER_HDR_LEN + offsetof(la_marker_pdu, lm_reserved);
1669 if (m->m_pkthdr.len < min_size) {
1670 m_freem(m);
1671 return (0);
1672 }
1673 /* send to lacp */
1674 if (m->m_len < min_size) {
1675 m = m_pullup(m, min_size);
1676 if (m == NULL) {
1677 return (0);
1678 }
1679 }
1680 /* send to marker responder */
1681 bond_receive_la_marker_pdu(m, port_ifp);
1682 return (0);
1683 }
1684 else if (subtype == 0
1685 || subtype > IEEE8023AD_SLOW_PROTO_SUBTYPE_RESERVED_END) {
1686 /* invalid subtype, discard the frame */
1687 m_freem(m);
1688 return (0);
1689 }
1690 }
1691 bond_lock();
1692 if ((ifnet_eflags(port_ifp) & IFEF_BOND) == 0) {
1693 goto done;
1694 }
1695 p = bond_lookup_port(port_ifp);
1696 if (p == NULL || bondport_collecting(p) == 0) {
1697 goto done;
1698 }
1699
1700 /* make the packet appear as if it arrived on the bonded interface */
1701 ifb = p->po_bond;
1702 ifp = ifb->ifb_ifp;
1703 bpf_func = ifb->ifb_bpf_input;
1704 bond_unlock();
1705
1706 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) {
1707 (void)ifnet_stat_increment_in(ifp, 1,
1708 (m->m_pkthdr.len + ETHER_HDR_LEN
1709 + ETHER_VLAN_ENCAP_LEN), 0);
1710 }
1711 else {
1712 (void)ifnet_stat_increment_in(ifp, 1,
1713 (m->m_pkthdr.len + ETHER_HDR_LEN), 0);
1714 }
1715 m->m_pkthdr.rcvif = ifp;
1716 bond_bpf_input(ifp, m, eh_p, bpf_func);
1717 m->m_pkthdr.pkt_hdr = frame_header;
1718 dlil_input_packet_list(ifp, m);
1719 return 0;
1720
1721 done:
1722 bond_unlock();
1723 m_freem(m);
1724 return (0);
1725 }
1726
1727 static __inline__ const char *
1728 bondport_get_name(bondport_ref p)
1729 {
1730 return (p->po_name);
1731 }
1732
1733 static __inline__ int
1734 bondport_get_index(bondport_ref p)
1735 {
1736 return (ifnet_index(p->po_ifp));
1737 }
1738
1739 static void
1740 bondport_slow_proto_transmit(bondport_ref p, packet_buffer_ref buf)
1741 {
1742 struct ether_header * eh_p;
1743 int error;
1744
1745 /* packet_buffer_allocate leaves room for ethernet header */
1746 eh_p = mtod(buf, struct ether_header *);
1747 bcopy(&slow_proto_multicast, &eh_p->ether_dhost, sizeof(eh_p->ether_dhost));
1748 bcopy(&p->po_saved_addr, eh_p->ether_shost, sizeof(eh_p->ether_shost));
1749 eh_p->ether_type = htons(IEEE8023AD_SLOW_PROTO_ETHERTYPE);
1750 error = ifnet_output_raw(p->po_ifp, PF_BOND, buf);
1751 if (error != 0) {
1752 printf("bondport_slow_proto_transmit(%s) failed %d\n",
1753 bondport_get_name(p), error);
1754 }
1755 return;
1756 }
1757
1758 static void
1759 bondport_timer_process_func(devtimer_ref timer,
1760 devtimer_process_func_event event)
1761 {
1762 bondport_ref p;
1763
1764 switch (event) {
1765 case devtimer_process_func_event_lock:
1766 bond_lock();
1767 devtimer_retain(timer);
1768 break;
1769 case devtimer_process_func_event_unlock:
1770 if (devtimer_valid(timer)) {
1771 /* as long as the devtimer is valid, we can look at arg0 */
1772 int event_code = 0;
1773 struct ifnet * bond_ifp = NULL;
1774
1775 p = (bondport_ref)devtimer_arg0(timer);
1776 if (ifbond_selection(p->po_bond)) {
1777 event_code = (p->po_bond->ifb_active_lag == NULL)
1778 ? KEV_DL_LINK_OFF
1779 : KEV_DL_LINK_ON;
1780 /* XXX need to take a reference on bond_ifp */
1781 bond_ifp = p->po_bond->ifb_ifp;
1782 p->po_bond->ifb_last_link_event = event_code;
1783 }
1784 else {
1785 event_code = (p->po_bond->ifb_active_lag == NULL)
1786 ? KEV_DL_LINK_OFF
1787 : KEV_DL_LINK_ON;
1788 if (event_code != p->po_bond->ifb_last_link_event) {
1789 if (g_bond->verbose) {
1790 timestamp_printf("%s: (timer) generating LINK event\n",
1791 p->po_bond->ifb_name);
1792 }
1793 bond_ifp = p->po_bond->ifb_ifp;
1794 p->po_bond->ifb_last_link_event = event_code;
1795 }
1796 }
1797 devtimer_release(timer);
1798 bond_unlock();
1799 if (bond_ifp != NULL) {
1800 interface_link_event(bond_ifp, event_code);
1801 }
1802 }
1803 else {
1804 /* timer is going away */
1805 devtimer_release(timer);
1806 bond_unlock();
1807 }
1808 break;
1809 default:
1810 break;
1811 }
1812 }
1813
1814 static bondport_ref
1815 bondport_create(struct ifnet * port_ifp, lacp_port_priority priority,
1816 int active, int short_timeout, int * ret_error)
1817 {
1818 int error = 0;
1819 bondport_ref p = NULL;
1820 lacp_actor_partner_state s;
1821
1822 *ret_error = 0;
1823 p = _MALLOC(sizeof(*p), M_BOND, M_WAITOK | M_ZERO);
1824 if (p == NULL) {
1825 *ret_error = ENOMEM;
1826 return (NULL);
1827 }
1828 multicast_list_init(&p->po_multicast);
1829 if ((u_int32_t)snprintf(p->po_name, sizeof(p->po_name), "%s%d",
1830 ifnet_name(port_ifp), ifnet_unit(port_ifp))
1831 >= sizeof(p->po_name)) {
1832 printf("if_bond: name too large\n");
1833 *ret_error = EINVAL;
1834 goto failed;
1835 }
1836 error = siocgifdevmtu(port_ifp, &p->po_devmtu);
1837 if (error != 0) {
1838 printf("if_bond: SIOCGIFDEVMTU %s failed, %d\n",
1839 bondport_get_name(p), error);
1840 goto failed;
1841 }
1842 /* remember the current interface MTU so it can be restored */
1843 p->po_devmtu.ifdm_current = ifnet_mtu(port_ifp);
1844 p->po_ifp = port_ifp;
1845 p->po_media_info = interface_media_info(port_ifp);
1846 p->po_current_while_timer = devtimer_create(bondport_timer_process_func, p);
1847 if (p->po_current_while_timer == NULL) {
1848 *ret_error = ENOMEM;
1849 goto failed;
1850 }
1851 p->po_periodic_timer = devtimer_create(bondport_timer_process_func, p);
1852 if (p->po_periodic_timer == NULL) {
1853 *ret_error = ENOMEM;
1854 goto failed;
1855 }
1856 p->po_wait_while_timer = devtimer_create(bondport_timer_process_func, p);
1857 if (p->po_wait_while_timer == NULL) {
1858 *ret_error = ENOMEM;
1859 goto failed;
1860 }
1861 p->po_transmit_timer = devtimer_create(bondport_timer_process_func, p);
1862 if (p->po_transmit_timer == NULL) {
1863 *ret_error = ENOMEM;
1864 goto failed;
1865 }
1866 p->po_receive_state = ReceiveState_none;
1867 p->po_mux_state = MuxState_none;
1868 p->po_priority = priority;
1869 s = 0;
1870 s = lacp_actor_partner_state_set_aggregatable(s);
1871 if (short_timeout) {
1872 s = lacp_actor_partner_state_set_short_timeout(s);
1873 }
1874 if (active) {
1875 s = lacp_actor_partner_state_set_active_lacp(s);
1876 }
1877 p->po_actor_state = s;
1878 return (p);
1879
1880 failed:
1881 bondport_free(p);
1882 return (NULL);
1883 }
1884
1885 static void
1886 bondport_start(bondport_ref p)
1887 {
1888 bondport_receive_machine(p, LAEventStart, NULL);
1889 bondport_mux_machine(p, LAEventStart, NULL);
1890 bondport_periodic_transmit_machine(p, LAEventStart, NULL);
1891 bondport_transmit_machine(p, LAEventStart, NULL);
1892 return;
1893 }
1894
1895 /*
1896 * Function: bondport_invalidate_timers
1897 * Purpose:
1898 * Invalidate all of the timers for the bondport.
1899 */
1900 static void
1901 bondport_invalidate_timers(bondport_ref p)
1902 {
1903 devtimer_invalidate(p->po_current_while_timer);
1904 devtimer_invalidate(p->po_periodic_timer);
1905 devtimer_invalidate(p->po_wait_while_timer);
1906 devtimer_invalidate(p->po_transmit_timer);
1907 }
1908
1909 /*
1910 * Function: bondport_cancel_timers
1911 * Purpose:
1912 * Cancel all of the timers for the bondport.
1913 */
1914 static void
1915 bondport_cancel_timers(bondport_ref p)
1916 {
1917 devtimer_cancel(p->po_current_while_timer);
1918 devtimer_cancel(p->po_periodic_timer);
1919 devtimer_cancel(p->po_wait_while_timer);
1920 devtimer_cancel(p->po_transmit_timer);
1921 }
1922
1923 static void
1924 bondport_free(bondport_ref p)
1925 {
1926 multicast_list_remove(&p->po_multicast);
1927 devtimer_release(p->po_current_while_timer);
1928 devtimer_release(p->po_periodic_timer);
1929 devtimer_release(p->po_wait_while_timer);
1930 devtimer_release(p->po_transmit_timer);
1931 FREE(p, M_BOND);
1932 return;
1933 }
1934
1935 #define BOND_ADD_PROGRESS_IN_LIST 0x1
1936 #define BOND_ADD_PROGRESS_PROTO_ATTACHED 0x2
1937 #define BOND_ADD_PROGRESS_LLADDR_SET 0x4
1938 #define BOND_ADD_PROGRESS_MTU_SET 0x8
1939
1940 static __inline__ int
1941 bond_device_mtu(struct ifnet * ifp, ifbond_ref ifb)
1942 {
1943 return (((int)ifnet_mtu(ifp) > ifb->ifb_altmtu)
1944 ? (int)ifnet_mtu(ifp) : ifb->ifb_altmtu);
1945 }
1946
1947 static int
1948 bond_add_interface(struct ifnet * ifp, struct ifnet * port_ifp)
1949 {
1950 int devmtu;
1951 int error = 0;
1952 int event_code = 0;
1953 int first = FALSE;
1954 ifbond_ref ifb;
1955 bondport_ref * new_array = NULL;
1956 bondport_ref * old_array = NULL;
1957 bondport_ref p;
1958 int progress = 0;
1959
1960 /* pre-allocate space for new port */
1961 p = bondport_create(port_ifp, 0x8000, 1, 0, &error);
1962 if (p == NULL) {
1963 return (error);
1964 }
1965 bond_lock();
1966 ifb = (ifbond_ref)ifnet_softc(ifp);
1967 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
1968 bond_unlock();
1969 bondport_free(p);
1970 return ((ifb == NULL ? EOPNOTSUPP : EBUSY));
1971 }
1972
1973 /* make sure this interface can handle our current MTU */
1974 devmtu = bond_device_mtu(ifp, ifb);
1975 if (devmtu != 0
1976 && (devmtu > p->po_devmtu.ifdm_max || devmtu < p->po_devmtu.ifdm_min)) {
1977 bond_unlock();
1978 printf("if_bond: interface %s doesn't support mtu %d",
1979 bondport_get_name(p), devmtu);
1980 bondport_free(p);
1981 return (EINVAL);
1982 }
1983
1984 /* make sure ifb doesn't get de-allocated while we wait */
1985 ifbond_retain(ifb);
1986
1987 /* wait for other add or remove to complete */
1988 ifbond_wait(ifb, "bond_add_interface");
1989
1990 if (ifbond_flags_if_detaching(ifb)) {
1991 /* someone destroyed the bond while we were waiting */
1992 error = EBUSY;
1993 goto signal_done;
1994 }
1995 if (bond_lookup_port(port_ifp) != NULL) {
1996 /* port is already part of a bond */
1997 error = EBUSY;
1998 goto signal_done;
1999 }
2000 ifnet_lock_exclusive(port_ifp);
2001 if ((ifnet_eflags(port_ifp) & (IFEF_VLAN | IFEF_BOND)) != 0) {
2002 /* interface already has VLAN's, or is part of bond */
2003 ifnet_lock_done(port_ifp);
2004 error = EBUSY;
2005 goto signal_done;
2006 }
2007
2008 /* mark the interface busy */
2009 /* can't use ifnet_set_eflags because that takes the lock */
2010 port_ifp->if_eflags |= IFEF_BOND;
2011 ifnet_lock_done(port_ifp);
2012
2013 if (TAILQ_EMPTY(&ifb->ifb_port_list)) {
2014 ifnet_set_offload(ifp, ifnet_offload(port_ifp));
2015 ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
2016 if (ifbond_flags_lladdr(ifb) == FALSE) {
2017 first = TRUE;
2018 }
2019 } else {
2020 ifnet_offload_t ifp_offload;
2021 ifnet_offload_t port_ifp_offload;
2022
2023 ifp_offload = ifnet_offload(ifp);
2024 port_ifp_offload = ifnet_offload(port_ifp);
2025 if (ifp_offload != port_ifp_offload) {
2026 ifnet_offload_t offload;
2027
2028 offload = ifp_offload & port_ifp_offload;
2029 printf("bond_add_interface(%s, %s) "
2030 "hwassist values don't match 0x%x != 0x%x, using 0x%x instead\n",
2031 ifb->ifb_name, bondport_get_name(p),
2032 ifp_offload, port_ifp_offload, offload);
2033 /*
2034 * XXX
2035 * if the bond has VLAN's, we can't simply change the hwassist
2036 * field behind its back: this needs work
2037 */
2038 ifnet_set_offload(ifp, offload);
2039 }
2040 }
2041 p->po_bond = ifb;
2042
2043 /* remember the port's ethernet address so it can be restored */
2044 ether_addr_copy(&p->po_saved_addr, IF_LLADDR(port_ifp));
2045
2046 /* add it to the list of ports */
2047 TAILQ_INSERT_TAIL(&ifb->ifb_port_list, p, po_port_list);
2048 ifb->ifb_port_count++;
2049
2050 /* set the default MTU */
2051 if (ifnet_mtu(ifp) == 0) {
2052 ifnet_set_mtu(ifp, ETHERMTU);
2053 }
2054 bond_unlock();
2055
2056
2057 /* first port added to bond determines bond's ethernet address */
2058 if (first) {
2059 ifnet_set_lladdr_and_type(ifp, IF_LLADDR(port_ifp), ETHER_ADDR_LEN,
2060 IFT_ETHER);
2061 }
2062
2063 progress |= BOND_ADD_PROGRESS_IN_LIST;
2064
2065 /* allocate a larger distributing array */
2066 new_array = (bondport_ref *)
2067 _MALLOC(sizeof(*new_array) * ifb->ifb_port_count, M_BOND, M_WAITOK);
2068 if (new_array == NULL) {
2069 error = ENOMEM;
2070 goto failed;
2071 }
2072
2073 /* attach our BOND "protocol" to the interface */
2074 error = bond_attach_protocol(port_ifp);
2075 if (error) {
2076 goto failed;
2077 }
2078 progress |= BOND_ADD_PROGRESS_PROTO_ATTACHED;
2079
2080 /* set the interface MTU */
2081 devmtu = bond_device_mtu(ifp, ifb);
2082 error = siocsifmtu(port_ifp, devmtu);
2083 if (error != 0) {
2084 printf("bond_add_interface(%s, %s):"
2085 " SIOCSIFMTU %d failed %d\n",
2086 ifb->ifb_name, bondport_get_name(p), devmtu, error);
2087 goto failed;
2088 }
2089 progress |= BOND_ADD_PROGRESS_MTU_SET;
2090
2091 /* program the port with our multicast addresses */
2092 error = multicast_list_program(&p->po_multicast, ifp, port_ifp);
2093 if (error) {
2094 printf("bond_add_interface(%s, %s):"
2095 " multicast_list_program failed %d\n",
2096 ifb->ifb_name, bondport_get_name(p), error);
2097 goto failed;
2098 }
2099
2100 /* mark the interface up */
2101 ifnet_set_flags(port_ifp, IFF_UP, IFF_UP);
2102
2103 error = ifnet_ioctl(port_ifp, 0, SIOCSIFFLAGS, NULL);
2104 if (error != 0) {
2105 printf("bond_add_interface(%s, %s): SIOCSIFFLAGS failed %d\n",
2106 ifb->ifb_name, bondport_get_name(p), error);
2107 goto failed;
2108 }
2109
2110 /* re-program the port's ethernet address */
2111 error = if_siflladdr(port_ifp,
2112 (const struct ether_addr *)IF_LLADDR(ifp));
2113 if (error != 0) {
2114 /* port doesn't support setting the link address */
2115 printf("bond_add_interface(%s, %s): if_siflladdr failed %d\n",
2116 ifb->ifb_name, bondport_get_name(p), error);
2117 goto failed;
2118 }
2119 progress |= BOND_ADD_PROGRESS_LLADDR_SET;
2120
2121 bond_lock();
2122
2123 /* no failures past this point */
2124 p->po_enabled = 1;
2125
2126 /* copy the contents of the existing distributing array */
2127 if (ifb->ifb_distributing_count) {
2128 bcopy(ifb->ifb_distributing_array, new_array,
2129 sizeof(*new_array) * ifb->ifb_distributing_count);
2130 }
2131 old_array = ifb->ifb_distributing_array;
2132 ifb->ifb_distributing_array = new_array;
2133
2134 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2135 bondport_start(p);
2136
2137 /* check if we need to generate a link status event */
2138 if (ifbond_selection(ifb)) {
2139 event_code = (ifb->ifb_active_lag == NULL)
2140 ? KEV_DL_LINK_OFF
2141 : KEV_DL_LINK_ON;
2142 ifb->ifb_last_link_event = event_code;
2143 }
2144 }
2145 else {
2146 /* are we adding the first distributing interface? */
2147 if (media_active(&p->po_media_info)) {
2148 if (ifb->ifb_distributing_count == 0) {
2149 ifb->ifb_last_link_event = event_code = KEV_DL_LINK_ON;
2150 }
2151 bondport_enable_distributing(p);
2152 }
2153 else {
2154 bondport_disable_distributing(p);
2155 }
2156 }
2157 /* clear the busy state, and wakeup anyone waiting */
2158 ifbond_signal(ifb, "bond_add_interface");
2159 bond_unlock();
2160 if (event_code != 0) {
2161 interface_link_event(ifp, event_code);
2162 }
2163 if (old_array != NULL) {
2164 FREE(old_array, M_BOND);
2165 }
2166 return 0;
2167
2168 failed:
2169 bond_assert_lock_not_held();
2170
2171 /* if this was the first port to be added, clear our address */
2172 if (first) {
2173 ifnet_set_lladdr_and_type(ifp, NULL, 0, IFT_IEEE8023ADLAG);
2174 }
2175
2176 if (new_array != NULL) {
2177 FREE(new_array, M_BOND);
2178 }
2179 if ((progress & BOND_ADD_PROGRESS_LLADDR_SET) != 0) {
2180 int error1;
2181
2182 error1 = if_siflladdr(port_ifp, &p->po_saved_addr);
2183 if (error1 != 0) {
2184 printf("bond_add_interface(%s, %s): if_siflladdr failed %d\n",
2185 ifb->ifb_name, bondport_get_name(p), error1);
2186 }
2187 }
2188 if ((progress & BOND_ADD_PROGRESS_PROTO_ATTACHED) != 0) {
2189 (void)bond_detach_protocol(port_ifp);
2190 }
2191 if ((progress & BOND_ADD_PROGRESS_MTU_SET) != 0) {
2192 int error1;
2193
2194 error1 = siocsifmtu(port_ifp, p->po_devmtu.ifdm_current);
2195 if (error1 != 0) {
2196 printf("bond_add_interface(%s, %s): SIOCSIFMTU %d failed %d\n",
2197 ifb->ifb_name, bondport_get_name(p),
2198 p->po_devmtu.ifdm_current, error1);
2199 }
2200 }
2201 bond_lock();
2202 if ((progress & BOND_ADD_PROGRESS_IN_LIST) != 0) {
2203 TAILQ_REMOVE(&ifb->ifb_port_list, p, po_port_list);
2204 ifb->ifb_port_count--;
2205 }
2206 ifnet_set_eflags(ifp, 0, IFEF_BOND);
2207 if (TAILQ_EMPTY(&ifb->ifb_port_list)) {
2208 ifb->ifb_altmtu = 0;
2209 ifnet_set_mtu(ifp, 0);
2210 ifnet_set_offload(ifp, 0);
2211 }
2212
2213 signal_done:
2214 ifbond_signal(ifb, "bond_add_interface");
2215 bond_unlock();
2216 ifbond_release(ifb);
2217 bondport_free(p);
2218 return (error);
2219 }
2220
2221 static int
2222 bond_remove_interface(ifbond_ref ifb, struct ifnet * port_ifp)
2223 {
2224 int active_lag = 0;
2225 int error = 0;
2226 int event_code = 0;
2227 bondport_ref head_port;
2228 struct ifnet * ifp;
2229 int last = FALSE;
2230 int new_link_address = FALSE;
2231 bondport_ref p;
2232 lacp_actor_partner_state s;
2233 int was_distributing;
2234
2235 bond_assert_lock_held();
2236
2237 ifbond_retain(ifb);
2238 ifbond_wait(ifb, "bond_remove_interface");
2239
2240 p = ifbond_lookup_port(ifb, port_ifp);
2241 if (p == NULL) {
2242 error = ENXIO;
2243 /* it got removed by another thread */
2244 goto signal_done;
2245 }
2246
2247 /* de-select it and remove it from the lists */
2248 was_distributing = bondport_flags_distributing(p);
2249 bondport_disable_distributing(p);
2250 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2251 bondport_set_selected(p, SelectedState_UNSELECTED);
2252 active_lag = bondport_remove_from_LAG(p);
2253 /* invalidate timers here while holding the bond_lock */
2254 bondport_invalidate_timers(p);
2255
2256 /* announce that we're Individual now */
2257 s = p->po_actor_state;
2258 s = lacp_actor_partner_state_set_individual(s);
2259 s = lacp_actor_partner_state_set_not_collecting(s);
2260 s = lacp_actor_partner_state_set_not_distributing(s);
2261 s = lacp_actor_partner_state_set_out_of_sync(s);
2262 p->po_actor_state = s;
2263 bondport_flags_set_ntt(p);
2264 }
2265
2266 TAILQ_REMOVE(&ifb->ifb_port_list, p, po_port_list);
2267 ifb->ifb_port_count--;
2268
2269 ifp = ifb->ifb_ifp;
2270 head_port = TAILQ_FIRST(&ifb->ifb_port_list);
2271 if (head_port == NULL) {
2272 ifnet_set_flags(ifp, 0, IFF_RUNNING);
2273 if (ifbond_flags_lladdr(ifb) == FALSE) {
2274 last = TRUE;
2275 }
2276 ifnet_set_offload(ifp, 0);
2277 ifnet_set_mtu(ifp, 0);
2278 ifb->ifb_altmtu = 0;
2279 } else if (ifbond_flags_lladdr(ifb) == FALSE
2280 && bcmp(&p->po_saved_addr, IF_LLADDR(ifp),
2281 ETHER_ADDR_LEN) == 0) {
2282 new_link_address = TRUE;
2283 }
2284 /* check if we need to generate a link status event */
2285 if (ifb->ifb_mode == IF_BOND_MODE_LACP ) {
2286 if (ifbond_selection(ifb) || active_lag) {
2287 event_code = (ifb->ifb_active_lag == NULL)
2288 ? KEV_DL_LINK_OFF
2289 : KEV_DL_LINK_ON;
2290 ifb->ifb_last_link_event = event_code;
2291 }
2292 bondport_transmit_machine(p, LAEventStart,
2293 TRANSMIT_MACHINE_TX_IMMEDIATE);
2294 }
2295 else {
2296 /* are we removing the last distributing interface? */
2297 if (was_distributing && ifb->ifb_distributing_count == 0) {
2298 ifb->ifb_last_link_event = event_code = KEV_DL_LINK_OFF;
2299 }
2300 }
2301
2302 bond_unlock();
2303
2304 if (last) {
2305 ifnet_set_lladdr_and_type(ifp, NULL, 0, IFT_IEEE8023ADLAG);
2306 }
2307 else if (new_link_address) {
2308 struct ifnet * scan_ifp;
2309 bondport_ref scan_port;
2310
2311 /* ifbond_wait() allows port list traversal without holding the lock */
2312
2313 /* this port gave the bond its ethernet address, switch to new one */
2314 ifnet_set_lladdr_and_type(ifp,
2315 &head_port->po_saved_addr, ETHER_ADDR_LEN,
2316 IFT_ETHER);
2317
2318 /* re-program each port with the new link address */
2319 TAILQ_FOREACH(scan_port, &ifb->ifb_port_list, po_port_list) {
2320 scan_ifp = scan_port->po_ifp;
2321
2322 error = if_siflladdr(scan_ifp,
2323 (const struct ether_addr *) IF_LLADDR(ifp));
2324 if (error != 0) {
2325 printf("bond_remove_interface(%s, %s): "
2326 "if_siflladdr (%s) failed %d\n",
2327 ifb->ifb_name, bondport_get_name(p),
2328 bondport_get_name(scan_port), error);
2329 }
2330 }
2331 }
2332
2333 /* restore the port's ethernet address */
2334 error = if_siflladdr(port_ifp, &p->po_saved_addr);
2335 if (error != 0) {
2336 printf("bond_remove_interface(%s, %s): if_siflladdr failed %d\n",
2337 ifb->ifb_name, bondport_get_name(p), error);
2338 }
2339
2340 /* restore the port's MTU */
2341 error = siocsifmtu(port_ifp, p->po_devmtu.ifdm_current);
2342 if (error != 0) {
2343 printf("bond_remove_interface(%s, %s): SIOCSIFMTU %d failed %d\n",
2344 ifb->ifb_name, bondport_get_name(p),
2345 p->po_devmtu.ifdm_current, error);
2346 }
2347
2348 /* remove the bond "protocol" */
2349 bond_detach_protocol(port_ifp);
2350
2351 /* generate link event */
2352 if (event_code != 0) {
2353 interface_link_event(ifp, event_code);
2354 }
2355
2356 bond_lock();
2357 bondport_free(p);
2358 ifnet_set_eflags(port_ifp, 0, IFEF_BOND);
2359 /* release this bondport's reference to the ifbond */
2360 ifbond_release(ifb);
2361
2362 signal_done:
2363 ifbond_signal(ifb, "bond_remove_interface");
2364 ifbond_release(ifb);
2365 return (error);
2366 }
2367
2368 static void
2369 bond_set_lacp_mode(ifbond_ref ifb)
2370 {
2371 bondport_ref p;
2372
2373 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2374 bondport_disable_distributing(p);
2375 bondport_start(p);
2376 }
2377 return;
2378 }
2379
2380 static void
2381 bond_set_static_mode(ifbond_ref ifb)
2382 {
2383 bondport_ref p;
2384 lacp_actor_partner_state s;
2385
2386 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2387 bondport_disable_distributing(p);
2388 bondport_set_selected(p, SelectedState_UNSELECTED);
2389 (void)bondport_remove_from_LAG(p);
2390 bondport_cancel_timers(p);
2391
2392 /* announce that we're Individual now */
2393 s = p->po_actor_state;
2394 s = lacp_actor_partner_state_set_individual(s);
2395 s = lacp_actor_partner_state_set_not_collecting(s);
2396 s = lacp_actor_partner_state_set_not_distributing(s);
2397 s = lacp_actor_partner_state_set_out_of_sync(s);
2398 p->po_actor_state = s;
2399 bondport_flags_set_ntt(p);
2400 bondport_transmit_machine(p, LAEventStart,
2401 TRANSMIT_MACHINE_TX_IMMEDIATE);
2402 /* clear state */
2403 p->po_actor_state = 0;
2404 bzero(&p->po_partner_state, sizeof(p->po_partner_state));
2405
2406 if (media_active(&p->po_media_info)) {
2407 bondport_enable_distributing(p);
2408 }
2409 else {
2410 bondport_disable_distributing(p);
2411 }
2412 }
2413 return;
2414 }
2415
2416 static int
2417 bond_set_mode(struct ifnet * ifp, int mode)
2418 {
2419 int error = 0;
2420 int event_code = 0;
2421 ifbond_ref ifb;
2422
2423 bond_lock();
2424 ifb = (ifbond_ref)ifnet_softc(ifp);
2425 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2426 bond_unlock();
2427 return ((ifb == NULL) ? EOPNOTSUPP : EBUSY);
2428 }
2429 if (ifb->ifb_mode == mode) {
2430 bond_unlock();
2431 return (0);
2432 }
2433
2434 ifbond_retain(ifb);
2435 ifbond_wait(ifb, "bond_set_mode");
2436
2437 /* verify (again) that the mode is actually different */
2438 if (ifb->ifb_mode == mode) {
2439 /* nothing to do */
2440 goto signal_done;
2441 }
2442
2443 ifb->ifb_mode = mode;
2444 if (mode == IF_BOND_MODE_LACP) {
2445 bond_set_lacp_mode(ifb);
2446
2447 /* check if we need to generate a link status event */
2448 if (ifbond_selection(ifb)) {
2449 event_code = (ifb->ifb_active_lag == NULL)
2450 ? KEV_DL_LINK_OFF
2451 : KEV_DL_LINK_ON;
2452 }
2453 } else {
2454 bond_set_static_mode(ifb);
2455 event_code = (ifb->ifb_distributing_count == 0)
2456 ? KEV_DL_LINK_OFF
2457 : KEV_DL_LINK_ON;
2458 }
2459 ifb->ifb_last_link_event = event_code;
2460
2461 signal_done:
2462 ifbond_signal(ifb, "bond_set_mode");
2463 bond_unlock();
2464 ifbond_release(ifb);
2465
2466 if (event_code != 0) {
2467 interface_link_event(ifp, event_code);
2468 }
2469 return (error);
2470 }
2471
2472 static int
2473 bond_get_status(ifbond_ref ifb, struct if_bond_req * ibr_p, user_addr_t datap)
2474 {
2475 int count;
2476 user_addr_t dst;
2477 int error = 0;
2478 struct if_bond_status_req * ibsr;
2479 struct if_bond_status ibs;
2480 bondport_ref port;
2481
2482 ibsr = &(ibr_p->ibr_ibru.ibru_status);
2483 if (ibsr->ibsr_version != IF_BOND_STATUS_REQ_VERSION) {
2484 return (EINVAL);
2485 }
2486 ibsr->ibsr_key = ifb->ifb_key;
2487 ibsr->ibsr_mode = ifb->ifb_mode;
2488 ibsr->ibsr_total = ifb->ifb_port_count;
2489 dst = proc_is64bit(current_proc())
2490 ? ibsr->ibsr_ibsru.ibsru_buffer64
2491 : CAST_USER_ADDR_T(ibsr->ibsr_ibsru.ibsru_buffer);
2492 if (dst == USER_ADDR_NULL) {
2493 /* just want to know how many there are */
2494 goto done;
2495 }
2496 if (ibsr->ibsr_count < 0) {
2497 return (EINVAL);
2498 }
2499 count = (ifb->ifb_port_count < ibsr->ibsr_count)
2500 ? ifb->ifb_port_count : ibsr->ibsr_count;
2501 TAILQ_FOREACH(port, &ifb->ifb_port_list, po_port_list) {
2502 struct if_bond_partner_state * ibps_p;
2503 partner_state_ref ps;
2504
2505 if (count == 0) {
2506 break;
2507 }
2508 bzero(&ibs, sizeof(ibs));
2509 strlcpy(ibs.ibs_if_name, port->po_name, sizeof(ibs.ibs_if_name));
2510 ibs.ibs_port_priority = port->po_priority;
2511 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2512 ibs.ibs_state = port->po_actor_state;
2513 ibs.ibs_selected_state = port->po_selected;
2514 ps = &port->po_partner_state;
2515 ibps_p = &ibs.ibs_partner_state;
2516 ibps_p->ibps_system = ps->ps_lag_info.li_system;
2517 ibps_p->ibps_system_priority = ps->ps_lag_info.li_system_priority;
2518 ibps_p->ibps_key = ps->ps_lag_info.li_key;
2519 ibps_p->ibps_port = ps->ps_port;
2520 ibps_p->ibps_port_priority = ps->ps_port_priority;
2521 ibps_p->ibps_state = ps->ps_state;
2522 }
2523 else {
2524 /* fake the selected information */
2525 ibs.ibs_selected_state = bondport_flags_distributing(port)
2526 ? SelectedState_SELECTED : SelectedState_UNSELECTED;
2527 }
2528 error = copyout(&ibs, dst, sizeof(ibs));
2529 if (error != 0) {
2530 break;
2531 }
2532 dst += sizeof(ibs);
2533 count--;
2534 }
2535
2536 done:
2537 if (error == 0) {
2538 error = copyout(ibr_p, datap, sizeof(*ibr_p));
2539 }
2540 else {
2541 (void)copyout(ibr_p, datap, sizeof(*ibr_p));
2542 }
2543 return (error);
2544 }
2545
2546 static int
2547 bond_set_promisc(__unused struct ifnet *ifp)
2548 {
2549 int error = 0;
2550 return (error);
2551 }
2552
2553 static void
2554 bond_get_mtu_values(ifbond_ref ifb, int * ret_min, int * ret_max)
2555 {
2556 int mtu_min = 0;
2557 int mtu_max = 0;
2558 bondport_ref p;
2559
2560 if (TAILQ_FIRST(&ifb->ifb_port_list) != NULL) {
2561 mtu_min = IF_MINMTU;
2562 }
2563 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2564 struct ifdevmtu * devmtu_p = &p->po_devmtu;
2565
2566 if (devmtu_p->ifdm_min > mtu_min) {
2567 mtu_min = devmtu_p->ifdm_min;
2568 }
2569 if (mtu_max == 0 || devmtu_p->ifdm_max < mtu_max) {
2570 mtu_max = devmtu_p->ifdm_max;
2571 }
2572 }
2573 *ret_min = mtu_min;
2574 *ret_max = mtu_max;
2575 return;
2576 }
2577
2578 static int
2579 bond_set_mtu_on_ports(ifbond_ref ifb, int mtu)
2580 {
2581 int error = 0;
2582 bondport_ref p;
2583
2584 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2585 error = siocsifmtu(p->po_ifp, mtu);
2586 if (error != 0) {
2587 printf("if_bond(%s): SIOCSIFMTU %s failed, %d\n",
2588 ifb->ifb_name, bondport_get_name(p), error);
2589 break;
2590 }
2591 }
2592 return (error);
2593 }
2594
2595 static int
2596 bond_set_mtu(struct ifnet * ifp, int mtu, int isdevmtu)
2597 {
2598 int error = 0;
2599 ifbond_ref ifb;
2600 int mtu_min;
2601 int mtu_max;
2602 int new_max;
2603 int old_max;
2604
2605 bond_lock();
2606 ifb = (ifbond_ref)ifnet_softc(ifp);
2607 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2608 error = (ifb == NULL) ? EOPNOTSUPP : EBUSY;
2609 goto done;
2610 }
2611 ifbond_retain(ifb);
2612 ifbond_wait(ifb, "bond_set_mtu");
2613
2614 /* check again */
2615 if (ifnet_softc(ifp) == NULL || ifbond_flags_if_detaching(ifb)) {
2616 error = EBUSY;
2617 goto signal_done;
2618 }
2619 bond_get_mtu_values(ifb, &mtu_min, &mtu_max);
2620 if (mtu > mtu_max) {
2621 error = EINVAL;
2622 goto signal_done;
2623 }
2624 if (mtu < mtu_min && (isdevmtu == 0 || mtu != 0)) {
2625 /* allow SIOCSIFALTMTU to set the mtu to 0 */
2626 error = EINVAL;
2627 goto signal_done;
2628 }
2629 if (isdevmtu) {
2630 new_max = (mtu > (int)ifnet_mtu(ifp)) ? mtu : (int)ifnet_mtu(ifp);
2631 }
2632 else {
2633 new_max = (mtu > ifb->ifb_altmtu) ? mtu : ifb->ifb_altmtu;
2634 }
2635 old_max = ((int)ifnet_mtu(ifp) > ifb->ifb_altmtu)
2636 ? (int)ifnet_mtu(ifp) : ifb->ifb_altmtu;
2637 if (new_max != old_max) {
2638 /* we can safely walk the list of port without the lock held */
2639 bond_unlock();
2640 error = bond_set_mtu_on_ports(ifb, new_max);
2641 if (error != 0) {
2642 /* try our best to back out of it */
2643 (void)bond_set_mtu_on_ports(ifb, old_max);
2644 }
2645 bond_lock();
2646 }
2647 if (error == 0) {
2648 if (isdevmtu) {
2649 ifb->ifb_altmtu = mtu;
2650 }
2651 else {
2652 ifnet_set_mtu(ifp, mtu);
2653 }
2654 }
2655
2656 signal_done:
2657 ifbond_signal(ifb, "bond_set_mtu");
2658 ifbond_release(ifb);
2659
2660 done:
2661 bond_unlock();
2662 return (error);
2663 }
2664
2665 static int
2666 bond_ioctl(struct ifnet *ifp, u_long cmd, void * data)
2667 {
2668 int error = 0;
2669 struct if_bond_req ibr;
2670 struct ifaddr * ifa;
2671 ifbond_ref ifb;
2672 struct ifreq * ifr;
2673 struct ifmediareq *ifmr;
2674 struct ifnet * port_ifp = NULL;
2675 user_addr_t user_addr;
2676
2677 if (ifnet_type(ifp) != IFT_IEEE8023ADLAG) {
2678 return (EOPNOTSUPP);
2679 }
2680 ifr = (struct ifreq *)data;
2681 ifa = (struct ifaddr *)data;
2682
2683 switch (cmd) {
2684 case SIOCSIFADDR:
2685 ifnet_set_flags(ifp, IFF_UP, IFF_UP);
2686 break;
2687
2688 case SIOCGIFMEDIA32:
2689 case SIOCGIFMEDIA64:
2690 bond_lock();
2691 ifb = (ifbond_ref)ifnet_softc(ifp);
2692 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2693 bond_unlock();
2694 return (ifb == NULL ? EOPNOTSUPP : EBUSY);
2695 }
2696 ifmr = (struct ifmediareq *)data;
2697 ifmr->ifm_current = IFM_ETHER;
2698 ifmr->ifm_mask = 0;
2699 ifmr->ifm_status = IFM_AVALID;
2700 ifmr->ifm_active = IFM_ETHER;
2701 ifmr->ifm_count = 1;
2702 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2703 if (ifb->ifb_active_lag != NULL) {
2704 ifmr->ifm_active = ifb->ifb_active_lag->lag_active_media;
2705 ifmr->ifm_status |= IFM_ACTIVE;
2706 }
2707 }
2708 else if (ifb->ifb_distributing_count > 0) {
2709 ifmr->ifm_active
2710 = ifb->ifb_distributing_array[0]->po_media_info.mi_active;
2711 ifmr->ifm_status |= IFM_ACTIVE;
2712 }
2713 bond_unlock();
2714 user_addr = (cmd == SIOCGIFMEDIA64) ?
2715 ((struct ifmediareq64 *)ifmr)->ifmu_ulist :
2716 CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist);
2717 if (user_addr != USER_ADDR_NULL) {
2718 error = copyout(&ifmr->ifm_current,
2719 user_addr,
2720 sizeof(int));
2721 }
2722 break;
2723
2724 case SIOCSIFMEDIA:
2725 /* XXX send the SIFMEDIA to all children? Or force autoselect? */
2726 error = EINVAL;
2727 break;
2728
2729 case SIOCGIFDEVMTU:
2730 bond_lock();
2731 ifb = (ifbond_ref)ifnet_softc(ifp);
2732 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2733 bond_unlock();
2734 error = (ifb == NULL) ? EOPNOTSUPP : EBUSY;
2735 break;
2736 }
2737 ifr->ifr_devmtu.ifdm_current = bond_device_mtu(ifp, ifb);
2738 bond_get_mtu_values(ifb, &ifr->ifr_devmtu.ifdm_min,
2739 &ifr->ifr_devmtu.ifdm_max);
2740 bond_unlock();
2741 break;
2742
2743 case SIOCGIFALTMTU:
2744 bond_lock();
2745 ifb = (ifbond_ref)ifnet_softc(ifp);
2746 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2747 bond_unlock();
2748 error = (ifb == NULL) ? EOPNOTSUPP : EBUSY;
2749 break;
2750 }
2751 ifr->ifr_mtu = ifb->ifb_altmtu;
2752 bond_unlock();
2753 break;
2754
2755 case SIOCSIFALTMTU:
2756 error = bond_set_mtu(ifp, ifr->ifr_mtu, 1);
2757 break;
2758
2759 case SIOCSIFMTU:
2760 error = bond_set_mtu(ifp, ifr->ifr_mtu, 0);
2761 break;
2762
2763 case SIOCSIFBOND:
2764 user_addr = proc_is64bit(current_proc())
2765 ? ifr->ifr_data64 : CAST_USER_ADDR_T(ifr->ifr_data);
2766 error = copyin(user_addr, &ibr, sizeof(ibr));
2767 if (error) {
2768 break;
2769 }
2770 switch (ibr.ibr_op) {
2771 case IF_BOND_OP_ADD_INTERFACE:
2772 case IF_BOND_OP_REMOVE_INTERFACE:
2773 port_ifp = ifunit(ibr.ibr_ibru.ibru_if_name);
2774 if (port_ifp == NULL) {
2775 error = ENXIO;
2776 break;
2777 }
2778 if (ifnet_type(port_ifp) != IFT_ETHER) {
2779 error = EPROTONOSUPPORT;
2780 break;
2781 }
2782 break;
2783 case IF_BOND_OP_SET_VERBOSE:
2784 case IF_BOND_OP_SET_MODE:
2785 break;
2786 default:
2787 error = EOPNOTSUPP;
2788 break;
2789 }
2790 if (error != 0) {
2791 break;
2792 }
2793 switch (ibr.ibr_op) {
2794 case IF_BOND_OP_ADD_INTERFACE:
2795 error = bond_add_interface(ifp, port_ifp);
2796 break;
2797 case IF_BOND_OP_REMOVE_INTERFACE:
2798 bond_lock();
2799 ifb = (ifbond_ref)ifnet_softc(ifp);
2800 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2801 bond_unlock();
2802 return (ifb == NULL ? EOPNOTSUPP : EBUSY);
2803 }
2804 error = bond_remove_interface(ifb, port_ifp);
2805 bond_unlock();
2806 break;
2807 case IF_BOND_OP_SET_VERBOSE:
2808 bond_lock();
2809 if (g_bond == NULL) {
2810 bond_unlock();
2811 error = ENXIO;
2812 break;
2813 }
2814 g_bond->verbose = ibr.ibr_ibru.ibru_int_val;
2815 bond_unlock();
2816 break;
2817 case IF_BOND_OP_SET_MODE:
2818 switch (ibr.ibr_ibru.ibru_int_val) {
2819 case IF_BOND_MODE_LACP:
2820 case IF_BOND_MODE_STATIC:
2821 break;
2822 default:
2823 error = EINVAL;
2824 break;
2825 }
2826 if (error != 0) {
2827 break;
2828 }
2829 error = bond_set_mode(ifp, ibr.ibr_ibru.ibru_int_val);
2830 break;
2831 }
2832 break; /* SIOCSIFBOND */
2833
2834 case SIOCGIFBOND:
2835 user_addr = proc_is64bit(current_proc())
2836 ? ifr->ifr_data64 : CAST_USER_ADDR_T(ifr->ifr_data);
2837 error = copyin(user_addr, &ibr, sizeof(ibr));
2838 if (error) {
2839 break;
2840 }
2841 switch (ibr.ibr_op) {
2842 case IF_BOND_OP_GET_STATUS:
2843 break;
2844 default:
2845 error = EOPNOTSUPP;
2846 break;
2847 }
2848 if (error != 0) {
2849 break;
2850 }
2851 bond_lock();
2852 ifb = (ifbond_ref)ifnet_softc(ifp);
2853 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2854 bond_unlock();
2855 return (ifb == NULL ? EOPNOTSUPP : EBUSY);
2856 }
2857 switch (ibr.ibr_op) {
2858 case IF_BOND_OP_GET_STATUS:
2859 error = bond_get_status(ifb, &ibr, user_addr);
2860 break;
2861 }
2862 bond_unlock();
2863 break; /* SIOCGIFBOND */
2864
2865 case SIOCSIFLLADDR:
2866 error = EOPNOTSUPP;
2867 break;
2868
2869 case SIOCSIFFLAGS:
2870 /* enable/disable promiscuous mode */
2871 bond_lock();
2872 error = bond_set_promisc(ifp);
2873 bond_unlock();
2874 break;
2875
2876 case SIOCADDMULTI:
2877 case SIOCDELMULTI:
2878 error = bond_setmulti(ifp);
2879 break;
2880 default:
2881 error = EOPNOTSUPP;
2882 }
2883 return error;
2884 }
2885
2886 static void
2887 bond_if_free(struct ifnet * ifp)
2888 {
2889 ifbond_ref ifb;
2890
2891 if (ifp == NULL) {
2892 return;
2893 }
2894 bond_lock();
2895 ifb = (ifbond_ref)ifnet_softc(ifp);
2896 if (ifb == NULL) {
2897 bond_unlock();
2898 return;
2899 }
2900 ifbond_release(ifb);
2901 bond_unlock();
2902 ifnet_release(ifp);
2903 return;
2904 }
2905
2906 static void
2907 bond_handle_event(struct ifnet * port_ifp, int event_code)
2908 {
2909 struct ifnet * bond_ifp = NULL;
2910 ifbond_ref ifb;
2911 int old_distributing_count;
2912 bondport_ref p;
2913 struct media_info media_info = { 0, 0};
2914
2915 switch (event_code) {
2916 case KEV_DL_IF_DETACHED:
2917 break;
2918 case KEV_DL_LINK_OFF:
2919 case KEV_DL_LINK_ON:
2920 media_info = interface_media_info(port_ifp);
2921 break;
2922 default:
2923 return;
2924 }
2925 bond_lock();
2926 p = bond_lookup_port(port_ifp);
2927 if (p == NULL) {
2928 bond_unlock();
2929 return;
2930 }
2931 ifb = p->po_bond;
2932 old_distributing_count = ifb->ifb_distributing_count;
2933 switch (event_code) {
2934 case KEV_DL_IF_DETACHED:
2935 bond_remove_interface(ifb, p->po_ifp);
2936 break;
2937 case KEV_DL_LINK_OFF:
2938 case KEV_DL_LINK_ON:
2939 p->po_media_info = media_info;
2940 if (p->po_enabled) {
2941 bondport_link_status_changed(p);
2942 }
2943 break;
2944 }
2945 /* generate a link-event */
2946 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2947 if (ifbond_selection(ifb)) {
2948 event_code = (ifb->ifb_active_lag == NULL)
2949 ? KEV_DL_LINK_OFF
2950 : KEV_DL_LINK_ON;
2951 /* XXX need to take a reference on bond_ifp */
2952 bond_ifp = ifb->ifb_ifp;
2953 ifb->ifb_last_link_event = event_code;
2954 }
2955 else {
2956 event_code = (ifb->ifb_active_lag == NULL)
2957 ? KEV_DL_LINK_OFF
2958 : KEV_DL_LINK_ON;
2959 if (event_code != ifb->ifb_last_link_event) {
2960 if (g_bond->verbose) {
2961 timestamp_printf("%s: (event) generating LINK event\n",
2962 ifb->ifb_name);
2963 }
2964 bond_ifp = ifb->ifb_ifp;
2965 ifb->ifb_last_link_event = event_code;
2966 }
2967 }
2968 }
2969 else {
2970 /*
2971 * if the distributing array membership changed from 0 <-> !0
2972 * generate a link event
2973 */
2974 if (old_distributing_count == 0
2975 && ifb->ifb_distributing_count != 0) {
2976 event_code = KEV_DL_LINK_ON;
2977 }
2978 else if (old_distributing_count != 0
2979 && ifb->ifb_distributing_count == 0) {
2980 event_code = KEV_DL_LINK_OFF;
2981 }
2982 if (event_code != 0 && event_code != ifb->ifb_last_link_event) {
2983 bond_ifp = ifb->ifb_ifp;
2984 ifb->ifb_last_link_event = event_code;
2985 }
2986 }
2987
2988 bond_unlock();
2989 if (bond_ifp != NULL) {
2990 interface_link_event(bond_ifp, event_code);
2991 }
2992 return;
2993 }
2994
2995 static void
2996 bond_event(struct ifnet * port_ifp, __unused protocol_family_t protocol,
2997 const struct kev_msg * event)
2998 {
2999 int event_code;
3000
3001 if (event->vendor_code != KEV_VENDOR_APPLE
3002 || event->kev_class != KEV_NETWORK_CLASS
3003 || event->kev_subclass != KEV_DL_SUBCLASS) {
3004 return;
3005 }
3006 event_code = event->event_code;
3007 switch (event_code) {
3008 case KEV_DL_LINK_OFF:
3009 case KEV_DL_LINK_ON:
3010 /* we only care about link status changes */
3011 bond_handle_event(port_ifp, event_code);
3012 break;
3013 default:
3014 break;
3015 }
3016 return;
3017 }
3018
3019 static errno_t
3020 bond_detached(ifnet_t port_ifp, __unused protocol_family_t protocol)
3021 {
3022 bond_handle_event(port_ifp, KEV_DL_IF_DETACHED);
3023 return (0);
3024 }
3025
3026 static void
3027 interface_link_event(struct ifnet * ifp, u_int32_t event_code)
3028 {
3029 struct {
3030 struct kern_event_msg header;
3031 u_int32_t unit;
3032 char if_name[IFNAMSIZ];
3033 } event;
3034
3035 bzero(&event, sizeof(event));
3036 event.header.total_size = sizeof(event);
3037 event.header.vendor_code = KEV_VENDOR_APPLE;
3038 event.header.kev_class = KEV_NETWORK_CLASS;
3039 event.header.kev_subclass = KEV_DL_SUBCLASS;
3040 event.header.event_code = event_code;
3041 event.header.event_data[0] = ifnet_family(ifp);
3042 event.unit = (u_int32_t) ifnet_unit(ifp);
3043 strlcpy(event.if_name, ifnet_name(ifp), IFNAMSIZ);
3044 ifnet_event(ifp, &event.header);
3045 return;
3046 }
3047
3048 /*
3049 * Function: bond_attach_protocol
3050 * Purpose:
3051 * Attach a DLIL protocol to the interface.
3052 *
3053 * The ethernet demux special cases to always return PF_BOND if the
3054 * interface is bonded. That means we receive all traffic from that
3055 * interface without passing any of the traffic to any other attached
3056 * protocol.
3057 */
3058 static int
3059 bond_attach_protocol(struct ifnet *ifp)
3060 {
3061 int error;
3062 struct ifnet_attach_proto_param reg;
3063
3064 bzero(&reg, sizeof(reg));
3065 reg.input = bond_input;
3066 reg.event = bond_event;
3067 reg.detached = bond_detached;
3068
3069 error = ifnet_attach_protocol(ifp, PF_BOND, &reg);
3070 if (error) {
3071 printf("bond over %s%d: ifnet_attach_protocol failed, %d\n",
3072 ifnet_name(ifp), ifnet_unit(ifp), error);
3073 }
3074 return (error);
3075 }
3076
3077 /*
3078 * Function: bond_detach_protocol
3079 * Purpose:
3080 * Detach our DLIL protocol from an interface
3081 */
3082 static int
3083 bond_detach_protocol(struct ifnet *ifp)
3084 {
3085 int error;
3086
3087 error = ifnet_detach_protocol(ifp, PF_BOND);
3088 if (error) {
3089 printf("bond over %s%d: ifnet_detach_protocol failed, %d\n",
3090 ifnet_name(ifp), ifnet_unit(ifp), error);
3091 }
3092 return (error);
3093 }
3094
3095 /*
3096 * DLIL interface family functions
3097 */
3098 extern int ether_attach_inet(ifnet_t ifp, protocol_family_t protocol_family);
3099 extern void ether_detach_inet(ifnet_t ifp, protocol_family_t protocol_family);
3100 extern int ether_attach_inet6(ifnet_t ifp, protocol_family_t protocol_family);
3101 extern void ether_detach_inet6(ifnet_t ifp, protocol_family_t protocol_family);
3102 extern int ether_attach_at(ifnet_t ifp, protocol_family_t protocol_family);
3103 extern void ether_detach_at(ifnet_t ifp, protocol_family_t protocol_family);
3104
3105 __private_extern__ int
3106 bond_family_init(void)
3107 {
3108 int error=0;
3109
3110 error = proto_register_plumber(PF_INET, APPLE_IF_FAM_BOND,
3111 ether_attach_inet,
3112 ether_detach_inet);
3113 if (error != 0) {
3114 printf("bond: proto_register_plumber failed for AF_INET error=%d\n",
3115 error);
3116 goto done;
3117 }
3118 #if INET6
3119 error = proto_register_plumber(PF_INET6, APPLE_IF_FAM_BOND,
3120 ether_attach_inet6,
3121 ether_detach_inet6);
3122 if (error != 0) {
3123 printf("bond: proto_register_plumber failed for AF_INET6 error=%d\n",
3124 error);
3125 goto done;
3126 }
3127 #endif
3128 error = bond_clone_attach();
3129 if (error != 0) {
3130 printf("bond: proto_register_plumber failed bond_clone_attach error=%d\n",
3131 error);
3132 goto done;
3133 }
3134
3135 done:
3136 return (error);
3137 }
3138 /**
3139 **
3140 ** LACP routines:
3141 **
3142 **/
3143
3144 /**
3145 ** LACP ifbond_list routines
3146 **/
3147 static bondport_ref
3148 ifbond_list_find_moved_port(bondport_ref rx_port,
3149 const lacp_actor_partner_tlv_ref atlv)
3150 {
3151 ifbond_ref bond;
3152 bondport_ref p;
3153 partner_state_ref ps;
3154 LAG_info_ref ps_li;
3155
3156 TAILQ_FOREACH(bond, &g_bond->ifbond_list, ifb_bond_list) {
3157 TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) {
3158
3159 if (rx_port == p) {
3160 /* no point in comparing against ourselves */
3161 continue;
3162 }
3163 if (p->po_receive_state != ReceiveState_PORT_DISABLED) {
3164 /* it's not clear that we should be checking this */
3165 continue;
3166 }
3167 ps = &p->po_partner_state;
3168 if (lacp_actor_partner_state_defaulted(ps->ps_state)) {
3169 continue;
3170 }
3171 ps_li = &ps->ps_lag_info;
3172 if (ps->ps_port == lacp_actor_partner_tlv_get_port(atlv)
3173 && bcmp(&ps_li->li_system, atlv->lap_system,
3174 sizeof(ps_li->li_system)) == 0) {
3175 if (g_bond->verbose) {
3176 timestamp_printf("System " EA_FORMAT
3177 " Port 0x%x moved from %s to %s\n",
3178 EA_LIST(&ps_li->li_system), ps->ps_port,
3179 bondport_get_name(p),
3180 bondport_get_name(rx_port));
3181 }
3182 return (p);
3183 }
3184 }
3185 }
3186 return (NULL);
3187 }
3188
3189 /**
3190 ** LACP ifbond, LAG routines
3191 **/
3192
3193 static int
3194 ifbond_selection(ifbond_ref bond)
3195 {
3196 int all_ports_ready = 0;
3197 int active_media = 0;
3198 LAG_ref lag = NULL;
3199 int lag_changed = 0;
3200 bondport_ref p;
3201 int port_speed = 0;
3202
3203 lag = ifbond_find_best_LAG(bond, &active_media);
3204 if (lag != bond->ifb_active_lag) {
3205 if (bond->ifb_active_lag != NULL) {
3206 ifbond_deactivate_LAG(bond, bond->ifb_active_lag);
3207 bond->ifb_active_lag = NULL;
3208 }
3209 bond->ifb_active_lag = lag;
3210 if (lag != NULL) {
3211 ifbond_activate_LAG(bond, lag, active_media);
3212 }
3213 lag_changed = 1;
3214 }
3215 else if (lag != NULL) {
3216 if (lag->lag_active_media != active_media) {
3217 if (g_bond->verbose) {
3218 timestamp_printf("LAG PORT SPEED CHANGED from %d to %d\n",
3219 link_speed(lag->lag_active_media),
3220 link_speed(active_media));
3221 }
3222 ifbond_deactivate_LAG(bond, lag);
3223 ifbond_activate_LAG(bond, lag, active_media);
3224 lag_changed = 1;
3225 }
3226 }
3227 if (lag != NULL) {
3228 port_speed = link_speed(active_media);
3229 all_ports_ready = ifbond_all_ports_ready(bond);
3230 }
3231 TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) {
3232 if (lag != NULL && p->po_lag == lag
3233 && media_speed(&p->po_media_info) == port_speed
3234 && (p->po_mux_state == MuxState_DETACHED
3235 || p->po_selected == SelectedState_SELECTED
3236 || p->po_selected == SelectedState_STANDBY)
3237 && bondport_aggregatable(p)) {
3238 if (bond->ifb_max_active > 0) {
3239 if (lag->lag_selected_port_count < bond->ifb_max_active) {
3240 if (p->po_selected == SelectedState_STANDBY
3241 || p->po_selected == SelectedState_UNSELECTED) {
3242 bondport_set_selected(p, SelectedState_SELECTED);
3243 }
3244 }
3245 else if (p->po_selected == SelectedState_UNSELECTED) {
3246 bondport_set_selected(p, SelectedState_STANDBY);
3247 }
3248 }
3249 else {
3250 bondport_set_selected(p, SelectedState_SELECTED);
3251 }
3252 }
3253 if (bondport_flags_selected_changed(p)) {
3254 bondport_flags_clear_selected_changed(p);
3255 bondport_mux_machine(p, LAEventSelectedChange, NULL);
3256 }
3257 if (all_ports_ready
3258 && bondport_flags_ready(p)
3259 && p->po_mux_state == MuxState_WAITING) {
3260 bondport_mux_machine(p, LAEventReady, NULL);
3261 }
3262 bondport_transmit_machine(p, LAEventStart, NULL);
3263 }
3264 return (lag_changed);
3265 }
3266
3267 static LAG_ref
3268 ifbond_find_best_LAG(ifbond_ref bond, int * active_media)
3269 {
3270 int best_active = 0;
3271 LAG_ref best_lag = NULL;
3272 int best_count = 0;
3273 int best_speed = 0;
3274 LAG_ref lag;
3275
3276 if (bond->ifb_active_lag != NULL) {
3277 best_lag = bond->ifb_active_lag;
3278 best_count = LAG_get_aggregatable_port_count(best_lag, &best_active);
3279 if (bond->ifb_max_active > 0
3280 && best_count > bond->ifb_max_active) {
3281 best_count = bond->ifb_max_active;
3282 }
3283 best_speed = link_speed(best_active);
3284 }
3285 TAILQ_FOREACH(lag, &bond->ifb_lag_list, lag_list) {
3286 int active;
3287 int count;
3288 int speed;
3289
3290 if (lag == bond->ifb_active_lag) {
3291 /* we've already computed it */
3292 continue;
3293 }
3294 count = LAG_get_aggregatable_port_count(lag, &active);
3295 if (count == 0) {
3296 continue;
3297 }
3298 if (bond->ifb_max_active > 0
3299 && count > bond->ifb_max_active) {
3300 /* if there's a limit, don't count extra links */
3301 count = bond->ifb_max_active;
3302 }
3303 speed = link_speed(active);
3304 if ((count * speed) > (best_count * best_speed)) {
3305 best_count = count;
3306 best_speed = speed;
3307 best_active = active;
3308 best_lag = lag;
3309 }
3310 }
3311 if (best_count == 0) {
3312 return (NULL);
3313 }
3314 *active_media = best_active;
3315 return (best_lag);
3316 }
3317
3318 static void
3319 ifbond_deactivate_LAG(__unused ifbond_ref bond, LAG_ref lag)
3320 {
3321 bondport_ref p;
3322
3323 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3324 bondport_set_selected(p, SelectedState_UNSELECTED);
3325 }
3326 return;
3327 }
3328
3329 static void
3330 ifbond_activate_LAG(ifbond_ref bond, LAG_ref lag, int active_media)
3331 {
3332 int need = 0;
3333 bondport_ref p;
3334
3335 if (bond->ifb_max_active > 0) {
3336 need = bond->ifb_max_active;
3337 }
3338 lag->lag_active_media = active_media;
3339 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3340 if (bondport_aggregatable(p) == 0) {
3341 bondport_set_selected(p, SelectedState_UNSELECTED);
3342 }
3343 else if (media_speed(&p->po_media_info) != link_speed(active_media)) {
3344 bondport_set_selected(p, SelectedState_UNSELECTED);
3345 }
3346 else if (p->po_mux_state == MuxState_DETACHED) {
3347 if (bond->ifb_max_active > 0) {
3348 if (need > 0) {
3349 bondport_set_selected(p, SelectedState_SELECTED);
3350 need--;
3351 }
3352 else {
3353 bondport_set_selected(p, SelectedState_STANDBY);
3354 }
3355 }
3356 else {
3357 bondport_set_selected(p, SelectedState_SELECTED);
3358 }
3359 }
3360 else {
3361 bondport_set_selected(p, SelectedState_UNSELECTED);
3362 }
3363 }
3364 return;
3365 }
3366
3367 #if 0
3368 static void
3369 ifbond_set_max_active(ifbond_ref bond, int max_active)
3370 {
3371 LAG_ref lag = bond->ifb_active_lag;
3372
3373 bond->ifb_max_active = max_active;
3374 if (bond->ifb_max_active <= 0 || lag == NULL) {
3375 return;
3376 }
3377 if (lag->lag_selected_port_count > bond->ifb_max_active) {
3378 bondport_ref p;
3379 int remove_count;
3380
3381 remove_count = lag->lag_selected_port_count - bond->ifb_max_active;
3382 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3383 if (p->po_selected == SelectedState_SELECTED) {
3384 bondport_set_selected(p, SelectedState_UNSELECTED);
3385 remove_count--;
3386 if (remove_count == 0) {
3387 break;
3388 }
3389 }
3390 }
3391 }
3392 return;
3393 }
3394 #endif
3395
3396 static int
3397 ifbond_all_ports_ready(ifbond_ref bond)
3398 {
3399 int ready = 0;
3400 bondport_ref p;
3401
3402 if (bond->ifb_active_lag == NULL) {
3403 return (0);
3404 }
3405 TAILQ_FOREACH(p, &bond->ifb_active_lag->lag_port_list, po_lag_port_list) {
3406 if (p->po_mux_state == MuxState_WAITING
3407 && p->po_selected == SelectedState_SELECTED) {
3408 if (bondport_flags_ready(p) == 0) {
3409 return (0);
3410 }
3411 }
3412 /* note that there was at least one ready port */
3413 ready = 1;
3414 }
3415 return (ready);
3416 }
3417
3418 static int
3419 ifbond_all_ports_attached(ifbond_ref bond, bondport_ref this_port)
3420 {
3421 bondport_ref p;
3422
3423 TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) {
3424 if (this_port == p) {
3425 continue;
3426 }
3427 if (bondport_flags_mux_attached(p) == 0) {
3428 return (0);
3429 }
3430 }
3431 return (1);
3432 }
3433
3434 static LAG_ref
3435 ifbond_get_LAG_matching_port(ifbond_ref bond, bondport_ref p)
3436 {
3437 LAG_ref lag;
3438
3439 TAILQ_FOREACH(lag, &bond->ifb_lag_list, lag_list) {
3440 if (bcmp(&lag->lag_info, &p->po_partner_state.ps_lag_info,
3441 sizeof(lag->lag_info)) == 0) {
3442 return (lag);
3443 }
3444 }
3445 return (NULL);
3446 }
3447
3448 static int
3449 LAG_get_aggregatable_port_count(LAG_ref lag, int * active_media)
3450 {
3451 int active;
3452 int count;
3453 bondport_ref p;
3454 int speed;
3455
3456 active = 0;
3457 count = 0;
3458 speed = 0;
3459 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3460 if (bondport_aggregatable(p)) {
3461 int this_speed;
3462
3463 this_speed = media_speed(&p->po_media_info);
3464 if (this_speed == 0) {
3465 continue;
3466 }
3467 if (this_speed > speed) {
3468 active = p->po_media_info.mi_active;
3469 speed = this_speed;
3470 count = 1;
3471 }
3472 else if (this_speed == speed) {
3473 count++;
3474 }
3475 }
3476 }
3477 *active_media = active;
3478 return (count);
3479 }
3480
3481
3482 /**
3483 ** LACP bondport routines
3484 **/
3485 static void
3486 bondport_link_status_changed(bondport_ref p)
3487 {
3488 ifbond_ref bond = p->po_bond;
3489
3490 if (g_bond->verbose) {
3491 if (media_active(&p->po_media_info)) {
3492 timestamp_printf("[%s] Link UP %d Mbit/s %s duplex\n",
3493 bondport_get_name(p),
3494 media_speed(&p->po_media_info),
3495 media_full_duplex(&p->po_media_info)
3496 ? "full" : "half");
3497 }
3498 else {
3499 timestamp_printf("[%s] Link DOWN\n", bondport_get_name(p));
3500 }
3501 }
3502 if (bond->ifb_mode == IF_BOND_MODE_LACP) {
3503 if (media_active(&p->po_media_info)
3504 && bond->ifb_active_lag != NULL
3505 && p->po_lag == bond->ifb_active_lag
3506 && p->po_selected != SelectedState_UNSELECTED) {
3507 if (media_speed(&p->po_media_info) != p->po_lag->lag_active_media) {
3508 if (g_bond->verbose) {
3509 timestamp_printf("[%s] Port speed %d differs from LAG %d\n",
3510 bondport_get_name(p),
3511 media_speed(&p->po_media_info),
3512 link_speed(p->po_lag->lag_active_media));
3513 }
3514 bondport_set_selected(p, SelectedState_UNSELECTED);
3515 }
3516 }
3517 bondport_receive_machine(p, LAEventMediaChange, NULL);
3518 bondport_mux_machine(p, LAEventMediaChange, NULL);
3519 bondport_periodic_transmit_machine(p, LAEventMediaChange, NULL);
3520 }
3521 else {
3522 if (media_active(&p->po_media_info)) {
3523 bondport_enable_distributing(p);
3524 }
3525 else {
3526 bondport_disable_distributing(p);
3527 }
3528 }
3529 return;
3530 }
3531
3532 static int
3533 bondport_aggregatable(bondport_ref p)
3534 {
3535 partner_state_ref ps = &p->po_partner_state;
3536
3537 if (lacp_actor_partner_state_aggregatable(p->po_actor_state) == 0
3538 || lacp_actor_partner_state_aggregatable(ps->ps_state) == 0) {
3539 /* we and/or our partner are individual */
3540 return (0);
3541 }
3542 if (p->po_lag == NULL) {
3543 return (0);
3544 }
3545 switch (p->po_receive_state) {
3546 default:
3547 if (g_bond->verbose) {
3548 timestamp_printf("[%s] Port is not selectable\n",
3549 bondport_get_name(p));
3550 }
3551 return (0);
3552 case ReceiveState_CURRENT:
3553 case ReceiveState_EXPIRED:
3554 break;
3555 }
3556 return (1);
3557 }
3558
3559 static int
3560 bondport_matches_LAG(bondport_ref p, LAG_ref lag)
3561 {
3562 LAG_info_ref lag_li;
3563 partner_state_ref ps;
3564 LAG_info_ref ps_li;
3565
3566 ps = &p->po_partner_state;
3567 ps_li = &ps->ps_lag_info;
3568 lag_li = &lag->lag_info;
3569 if (ps_li->li_system_priority == lag_li->li_system_priority
3570 && ps_li->li_key == lag_li->li_key
3571 && (bcmp(&ps_li->li_system, &lag_li->li_system,
3572 sizeof(lag_li->li_system))
3573 == 0)) {
3574 return (1);
3575 }
3576 return (0);
3577 }
3578
3579 static int
3580 bondport_remove_from_LAG(bondport_ref p)
3581 {
3582 int active_lag = 0;
3583 ifbond_ref bond = p->po_bond;
3584 LAG_ref lag = p->po_lag;
3585
3586 if (lag == NULL) {
3587 return (0);
3588 }
3589 TAILQ_REMOVE(&lag->lag_port_list, p, po_lag_port_list);
3590 if (g_bond->verbose) {
3591 timestamp_printf("[%s] Removed from LAG (0x%04x," EA_FORMAT
3592 ",0x%04x)\n",
3593 bondport_get_name(p),
3594 lag->lag_info.li_system_priority,
3595 EA_LIST(&lag->lag_info.li_system),
3596 lag->lag_info.li_key);
3597 }
3598 p->po_lag = NULL;
3599 lag->lag_port_count--;
3600 if (lag->lag_port_count > 0) {
3601 return (bond->ifb_active_lag == lag);
3602 }
3603 if (g_bond->verbose) {
3604 timestamp_printf("Key 0x%04x: LAG Released (%04x," EA_FORMAT
3605 ",0x%04x)\n",
3606 bond->ifb_key,
3607 lag->lag_info.li_system_priority,
3608 EA_LIST(&lag->lag_info.li_system),
3609 lag->lag_info.li_key);
3610 }
3611 TAILQ_REMOVE(&bond->ifb_lag_list, lag, lag_list);
3612 if (bond->ifb_active_lag == lag) {
3613 bond->ifb_active_lag = NULL;
3614 active_lag = 1;
3615 }
3616 FREE(lag, M_BOND);
3617 return (active_lag);
3618 }
3619
3620 static void
3621 bondport_add_to_LAG(bondport_ref p, LAG_ref lag)
3622 {
3623 TAILQ_INSERT_TAIL(&lag->lag_port_list, p, po_lag_port_list);
3624 p->po_lag = lag;
3625 lag->lag_port_count++;
3626 if (g_bond->verbose) {
3627 timestamp_printf("[%s] Added to LAG (0x%04x," EA_FORMAT "0x%04x)\n",
3628 bondport_get_name(p),
3629 lag->lag_info.li_system_priority,
3630 EA_LIST(&lag->lag_info.li_system),
3631 lag->lag_info.li_key);
3632 }
3633 return;
3634 }
3635
3636 static void
3637 bondport_assign_to_LAG(bondport_ref p)
3638 {
3639 ifbond_ref bond = p->po_bond;
3640 LAG_ref lag;
3641
3642 if (lacp_actor_partner_state_defaulted(p->po_actor_state)) {
3643 bondport_remove_from_LAG(p);
3644 return;
3645 }
3646 lag = p->po_lag;
3647 if (lag != NULL) {
3648 if (bondport_matches_LAG(p, lag)) {
3649 /* still OK */
3650 return;
3651 }
3652 bondport_remove_from_LAG(p);
3653 }
3654 lag = ifbond_get_LAG_matching_port(bond, p);
3655 if (lag != NULL) {
3656 bondport_add_to_LAG(p, lag);
3657 return;
3658 }
3659 lag = (LAG_ref)_MALLOC(sizeof(*lag), M_BOND, M_WAITOK);
3660 TAILQ_INIT(&lag->lag_port_list);
3661 lag->lag_port_count = 0;
3662 lag->lag_selected_port_count = 0;
3663 lag->lag_info = p->po_partner_state.ps_lag_info;
3664 TAILQ_INSERT_TAIL(&bond->ifb_lag_list, lag, lag_list);
3665 if (g_bond->verbose) {
3666 timestamp_printf("Key 0x%04x: LAG Created (0x%04x," EA_FORMAT
3667 ",0x%04x)\n",
3668 bond->ifb_key,
3669 lag->lag_info.li_system_priority,
3670 EA_LIST(&lag->lag_info.li_system),
3671 lag->lag_info.li_key);
3672 }
3673 bondport_add_to_LAG(p, lag);
3674 return;
3675 }
3676
3677 static void
3678 bondport_receive_lacpdu(bondport_ref p, lacpdu_ref in_lacpdu_p)
3679 {
3680 bondport_ref moved_port;
3681
3682 moved_port
3683 = ifbond_list_find_moved_port(p, (const lacp_actor_partner_tlv_ref)
3684 &in_lacpdu_p->la_actor_tlv);
3685 if (moved_port != NULL) {
3686 bondport_receive_machine(moved_port, LAEventPortMoved, NULL);
3687 }
3688 bondport_receive_machine(p, LAEventPacket, in_lacpdu_p);
3689 bondport_mux_machine(p, LAEventPacket, in_lacpdu_p);
3690 bondport_periodic_transmit_machine(p, LAEventPacket, in_lacpdu_p);
3691 return;
3692 }
3693
3694 static void
3695 bondport_set_selected(bondport_ref p, SelectedState s)
3696 {
3697 if (s != p->po_selected) {
3698 ifbond_ref bond = p->po_bond;
3699 LAG_ref lag = p->po_lag;
3700
3701 bondport_flags_set_selected_changed(p);
3702 if (lag != NULL && bond->ifb_active_lag == lag) {
3703 if (p->po_selected == SelectedState_SELECTED) {
3704 lag->lag_selected_port_count--;
3705 }
3706 else if (s == SelectedState_SELECTED) {
3707 lag->lag_selected_port_count++;
3708 }
3709 if (g_bond->verbose) {
3710 timestamp_printf("[%s] SetSelected: %s (was %s)\n",
3711 bondport_get_name(p),
3712 SelectedStateString(s),
3713 SelectedStateString(p->po_selected));
3714 }
3715 }
3716 }
3717 p->po_selected = s;
3718 return;
3719 }
3720
3721 /**
3722 ** Receive machine
3723 **/
3724
3725 static void
3726 bondport_UpdateDefaultSelected(bondport_ref p)
3727 {
3728 bondport_set_selected(p, SelectedState_UNSELECTED);
3729 return;
3730 }
3731
3732 static void
3733 bondport_RecordDefault(bondport_ref p)
3734 {
3735 bzero(&p->po_partner_state, sizeof(p->po_partner_state));
3736 p->po_actor_state
3737 = lacp_actor_partner_state_set_defaulted(p->po_actor_state);
3738 bondport_assign_to_LAG(p);
3739 return;
3740 }
3741
3742 static void
3743 bondport_UpdateSelected(bondport_ref p, lacpdu_ref lacpdu_p)
3744 {
3745 lacp_actor_partner_tlv_ref actor;
3746 partner_state_ref ps;
3747 LAG_info_ref ps_li;
3748
3749 /* compare the PDU's Actor information to our Partner state */
3750 actor = (lacp_actor_partner_tlv_ref)lacpdu_p->la_actor_tlv;
3751 ps = &p->po_partner_state;
3752 ps_li = &ps->ps_lag_info;
3753 if (lacp_actor_partner_tlv_get_port(actor) != ps->ps_port
3754 || (lacp_actor_partner_tlv_get_port_priority(actor)
3755 != ps->ps_port_priority)
3756 || bcmp(actor->lap_system, &ps_li->li_system, sizeof(ps_li->li_system))
3757 || (lacp_actor_partner_tlv_get_system_priority(actor)
3758 != ps_li->li_system_priority)
3759 || (lacp_actor_partner_tlv_get_key(actor) != ps_li->li_key)
3760 || (lacp_actor_partner_state_aggregatable(actor->lap_state)
3761 != lacp_actor_partner_state_aggregatable(ps->ps_state))) {
3762 bondport_set_selected(p, SelectedState_UNSELECTED);
3763 if (g_bond->verbose) {
3764 timestamp_printf("[%s] updateSelected UNSELECTED\n",
3765 bondport_get_name(p));
3766 }
3767 }
3768 return;
3769 }
3770
3771 static void
3772 bondport_RecordPDU(bondport_ref p, lacpdu_ref lacpdu_p)
3773 {
3774 lacp_actor_partner_tlv_ref actor;
3775 ifbond_ref bond = p->po_bond;
3776 int lacp_maintain = 0;
3777 partner_state_ref ps;
3778 lacp_actor_partner_tlv_ref partner;
3779 LAG_info_ref ps_li;
3780
3781 /* copy the PDU's Actor information into our Partner state */
3782 actor = (lacp_actor_partner_tlv_ref)lacpdu_p->la_actor_tlv;
3783 ps = &p->po_partner_state;
3784 ps_li = &ps->ps_lag_info;
3785 ps->ps_port = lacp_actor_partner_tlv_get_port(actor);
3786 ps->ps_port_priority = lacp_actor_partner_tlv_get_port_priority(actor);
3787 ps_li->li_system = *((lacp_system_ref)actor->lap_system);
3788 ps_li->li_system_priority
3789 = lacp_actor_partner_tlv_get_system_priority(actor);
3790 ps_li->li_key = lacp_actor_partner_tlv_get_key(actor);
3791 ps->ps_state = lacp_actor_partner_state_set_out_of_sync(actor->lap_state);
3792 p->po_actor_state
3793 = lacp_actor_partner_state_set_not_defaulted(p->po_actor_state);
3794
3795 /* compare the PDU's Partner information to our own information */
3796 partner = (lacp_actor_partner_tlv_ref)lacpdu_p->la_partner_tlv;
3797
3798 if (lacp_actor_partner_state_active_lacp(ps->ps_state)
3799 || (lacp_actor_partner_state_active_lacp(p->po_actor_state)
3800 && lacp_actor_partner_state_active_lacp(partner->lap_state))) {
3801 if (g_bond->verbose) {
3802 timestamp_printf("[%s] recordPDU: LACP will maintain\n",
3803 bondport_get_name(p));
3804 }
3805 lacp_maintain = 1;
3806 }
3807 if ((lacp_actor_partner_tlv_get_port(partner)
3808 == bondport_get_index(p))
3809 && lacp_actor_partner_tlv_get_port_priority(partner) == p->po_priority
3810 && bcmp(partner->lap_system, &g_bond->system,
3811 sizeof(g_bond->system)) == 0
3812 && (lacp_actor_partner_tlv_get_system_priority(partner)
3813 == g_bond->system_priority)
3814 && lacp_actor_partner_tlv_get_key(partner) == bond->ifb_key
3815 && (lacp_actor_partner_state_aggregatable(partner->lap_state)
3816 == lacp_actor_partner_state_aggregatable(p->po_actor_state))
3817 && lacp_actor_partner_state_in_sync(actor->lap_state)
3818 && lacp_maintain) {
3819 ps->ps_state = lacp_actor_partner_state_set_in_sync(ps->ps_state);
3820 if (g_bond->verbose) {
3821 timestamp_printf("[%s] recordPDU: LACP partner in sync\n",
3822 bondport_get_name(p));
3823 }
3824 }
3825 else if (lacp_actor_partner_state_aggregatable(actor->lap_state) == 0
3826 && lacp_actor_partner_state_in_sync(actor->lap_state)
3827 && lacp_maintain) {
3828 ps->ps_state = lacp_actor_partner_state_set_in_sync(ps->ps_state);
3829 if (g_bond->verbose) {
3830 timestamp_printf("[%s] recordPDU: LACP partner in sync (ind)\n",
3831 bondport_get_name(p));
3832 }
3833 }
3834 bondport_assign_to_LAG(p);
3835 return;
3836 }
3837
3838 static __inline__ lacp_actor_partner_state
3839 updateNTTBits(lacp_actor_partner_state s)
3840 {
3841 return (s & (LACP_ACTOR_PARTNER_STATE_LACP_ACTIVITY
3842 | LACP_ACTOR_PARTNER_STATE_LACP_TIMEOUT
3843 | LACP_ACTOR_PARTNER_STATE_AGGREGATION
3844 | LACP_ACTOR_PARTNER_STATE_SYNCHRONIZATION));
3845 }
3846
3847 static void
3848 bondport_UpdateNTT(bondport_ref p, lacpdu_ref lacpdu_p)
3849 {
3850 ifbond_ref bond = p->po_bond;
3851 lacp_actor_partner_tlv_ref partner;
3852
3853 /* compare the PDU's Actor information to our Partner state */
3854 partner = (lacp_actor_partner_tlv_ref)lacpdu_p->la_partner_tlv;
3855 if ((lacp_actor_partner_tlv_get_port(partner) != bondport_get_index(p))
3856 || lacp_actor_partner_tlv_get_port_priority(partner) != p->po_priority
3857 || bcmp(partner->lap_system, &g_bond->system, sizeof(g_bond->system))
3858 || (lacp_actor_partner_tlv_get_system_priority(partner)
3859 != g_bond->system_priority)
3860 || lacp_actor_partner_tlv_get_key(partner) != bond->ifb_key
3861 || (updateNTTBits(partner->lap_state)
3862 != updateNTTBits(p->po_actor_state))) {
3863 bondport_flags_set_ntt(p);
3864 if (g_bond->verbose) {
3865 timestamp_printf("[%s] updateNTT: Need To Transmit\n",
3866 bondport_get_name(p));
3867 }
3868 }
3869 return;
3870 }
3871
3872 static void
3873 bondport_AttachMuxToAggregator(bondport_ref p)
3874 {
3875 if (bondport_flags_mux_attached(p) == 0) {
3876 if (g_bond->verbose) {
3877 timestamp_printf("[%s] Attached Mux To Aggregator\n",
3878 bondport_get_name(p));
3879 }
3880 bondport_flags_set_mux_attached(p);
3881 }
3882 return;
3883 }
3884
3885 static void
3886 bondport_DetachMuxFromAggregator(bondport_ref p)
3887 {
3888 if (bondport_flags_mux_attached(p)) {
3889 if (g_bond->verbose) {
3890 timestamp_printf("[%s] Detached Mux From Aggregator\n",
3891 bondport_get_name(p));
3892 }
3893 bondport_flags_clear_mux_attached(p);
3894 }
3895 return;
3896 }
3897
3898 static void
3899 bondport_enable_distributing(bondport_ref p)
3900 {
3901 if (bondport_flags_distributing(p) == 0) {
3902 ifbond_ref bond = p->po_bond;
3903
3904 bond->ifb_distributing_array[bond->ifb_distributing_count++] = p;
3905 if (g_bond->verbose) {
3906 timestamp_printf("[%s] Distribution Enabled\n",
3907 bondport_get_name(p));
3908 }
3909 bondport_flags_set_distributing(p);
3910 }
3911 return;
3912 }
3913
3914 static void
3915 bondport_disable_distributing(bondport_ref p)
3916 {
3917 if (bondport_flags_distributing(p)) {
3918 bondport_ref * array;
3919 ifbond_ref bond;
3920 int count;
3921 int i;
3922
3923 bond = p->po_bond;
3924 array = bond->ifb_distributing_array;
3925 count = bond->ifb_distributing_count;
3926 for (i = 0; i < count; i++) {
3927 if (array[i] == p) {
3928 int j;
3929
3930 for (j = i; j < (count - 1); j++) {
3931 array[j] = array[j + 1];
3932 }
3933 break;
3934 }
3935 }
3936 bond->ifb_distributing_count--;
3937 if (g_bond->verbose) {
3938 timestamp_printf("[%s] Distribution Disabled\n",
3939 bondport_get_name(p));
3940 }
3941 bondport_flags_clear_distributing(p);
3942 }
3943 return;
3944 }
3945
3946 /**
3947 ** Receive machine functions
3948 **/
3949 static void
3950 bondport_receive_machine_initialize(bondport_ref p, LAEvent event,
3951 void * event_data);
3952 static void
3953 bondport_receive_machine_port_disabled(bondport_ref p, LAEvent event,
3954 void * event_data);
3955 static void
3956 bondport_receive_machine_expired(bondport_ref p, LAEvent event,
3957 void * event_data);
3958 static void
3959 bondport_receive_machine_lacp_disabled(bondport_ref p, LAEvent event,
3960 void * event_data);
3961 static void
3962 bondport_receive_machine_defaulted(bondport_ref p, LAEvent event,
3963 void * event_data);
3964 static void
3965 bondport_receive_machine_current(bondport_ref p, LAEvent event,
3966 void * event_data);
3967
3968 static void
3969 bondport_receive_machine_event(bondport_ref p, LAEvent event,
3970 void * event_data)
3971 {
3972 switch (p->po_receive_state) {
3973 case ReceiveState_none:
3974 bondport_receive_machine_initialize(p, LAEventStart, NULL);
3975 break;
3976 case ReceiveState_INITIALIZE:
3977 bondport_receive_machine_initialize(p, event, event_data);
3978 break;
3979 case ReceiveState_PORT_DISABLED:
3980 bondport_receive_machine_port_disabled(p, event, event_data);
3981 break;
3982 case ReceiveState_EXPIRED:
3983 bondport_receive_machine_expired(p, event, event_data);
3984 break;
3985 case ReceiveState_LACP_DISABLED:
3986 bondport_receive_machine_lacp_disabled(p, event, event_data);
3987 break;
3988 case ReceiveState_DEFAULTED:
3989 bondport_receive_machine_defaulted(p, event, event_data);
3990 break;
3991 case ReceiveState_CURRENT:
3992 bondport_receive_machine_current(p, event, event_data);
3993 break;
3994 default:
3995 break;
3996 }
3997 return;
3998 }
3999
4000 static void
4001 bondport_receive_machine(bondport_ref p, LAEvent event,
4002 void * event_data)
4003 {
4004 switch (event) {
4005 case LAEventPacket:
4006 if (p->po_receive_state != ReceiveState_LACP_DISABLED) {
4007 bondport_receive_machine_current(p, event, event_data);
4008 }
4009 break;
4010 case LAEventMediaChange:
4011 if (media_active(&p->po_media_info)) {
4012 switch (p->po_receive_state) {
4013 case ReceiveState_PORT_DISABLED:
4014 case ReceiveState_LACP_DISABLED:
4015 bondport_receive_machine_port_disabled(p, LAEventMediaChange, NULL);
4016 break;
4017 default:
4018 break;
4019 }
4020 }
4021 else {
4022 bondport_receive_machine_port_disabled(p, LAEventStart, NULL);
4023 }
4024 break;
4025 default:
4026 bondport_receive_machine_event(p, event, event_data);
4027 break;
4028 }
4029 return;
4030 }
4031
4032 static void
4033 bondport_receive_machine_initialize(bondport_ref p, LAEvent event,
4034 __unused void * event_data)
4035 {
4036 switch (event) {
4037 case LAEventStart:
4038 devtimer_cancel(p->po_current_while_timer);
4039 if (g_bond->verbose) {
4040 timestamp_printf("[%s] Receive INITIALIZE\n",
4041 bondport_get_name(p));
4042 }
4043 p->po_receive_state = ReceiveState_INITIALIZE;
4044 bondport_set_selected(p, SelectedState_UNSELECTED);
4045 bondport_RecordDefault(p);
4046 p->po_actor_state
4047 = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4048 bondport_receive_machine_port_disabled(p, LAEventStart, NULL);
4049 break;
4050 default:
4051 break;
4052 }
4053 return;
4054 }
4055
4056 static void
4057 bondport_receive_machine_port_disabled(bondport_ref p, LAEvent event,
4058 __unused void * event_data)
4059 {
4060 partner_state_ref ps;
4061
4062 switch (event) {
4063 case LAEventStart:
4064 devtimer_cancel(p->po_current_while_timer);
4065 if (g_bond->verbose) {
4066 timestamp_printf("[%s] Receive PORT_DISABLED\n",
4067 bondport_get_name(p));
4068 }
4069 p->po_receive_state = ReceiveState_PORT_DISABLED;
4070 ps = &p->po_partner_state;
4071 ps->ps_state = lacp_actor_partner_state_set_out_of_sync(ps->ps_state);
4072 /* FALL THROUGH */
4073 case LAEventMediaChange:
4074 if (media_active(&p->po_media_info)) {
4075 if (media_full_duplex(&p->po_media_info)) {
4076 bondport_receive_machine_expired(p, LAEventStart, NULL);
4077 }
4078 else {
4079 bondport_receive_machine_lacp_disabled(p, LAEventStart, NULL);
4080 }
4081 }
4082 else if (p->po_selected == SelectedState_SELECTED) {
4083 struct timeval tv;
4084
4085 if (g_bond->verbose) {
4086 timestamp_printf("[%s] Receive PORT_DISABLED: "
4087 "link timer started\n",
4088 bondport_get_name(p));
4089 }
4090 tv.tv_sec = 1;
4091 tv.tv_usec = 0;
4092 devtimer_set_relative(p->po_current_while_timer, tv,
4093 (devtimer_timeout_func)
4094 bondport_receive_machine_port_disabled,
4095 (void *)LAEventTimeout, NULL);
4096 }
4097 else if (p->po_selected == SelectedState_STANDBY) {
4098 bondport_set_selected(p, SelectedState_UNSELECTED);
4099 }
4100 break;
4101 case LAEventTimeout:
4102 if (p->po_selected == SelectedState_SELECTED) {
4103 if (g_bond->verbose) {
4104 timestamp_printf("[%s] Receive PORT_DISABLED: "
4105 "link timer completed, marking UNSELECTED\n",
4106 bondport_get_name(p));
4107 }
4108 bondport_set_selected(p, SelectedState_UNSELECTED);
4109 }
4110 break;
4111 case LAEventPortMoved:
4112 bondport_receive_machine_initialize(p, LAEventStart, NULL);
4113 break;
4114 default:
4115 break;
4116 }
4117 return;
4118 }
4119
4120 static void
4121 bondport_receive_machine_expired(bondport_ref p, LAEvent event,
4122 __unused void * event_data)
4123 {
4124 lacp_actor_partner_state s;
4125 struct timeval tv;
4126
4127 switch (event) {
4128 case LAEventStart:
4129 devtimer_cancel(p->po_current_while_timer);
4130 if (g_bond->verbose) {
4131 timestamp_printf("[%s] Receive EXPIRED\n",
4132 bondport_get_name(p));
4133 }
4134 p->po_receive_state = ReceiveState_EXPIRED;
4135 s = p->po_partner_state.ps_state;
4136 s = lacp_actor_partner_state_set_out_of_sync(s);
4137 s = lacp_actor_partner_state_set_short_timeout(s);
4138 p->po_partner_state.ps_state = s;
4139 p->po_actor_state
4140 = lacp_actor_partner_state_set_expired(p->po_actor_state);
4141 /* start current_while timer */
4142 tv.tv_sec = LACP_SHORT_TIMEOUT_TIME;
4143 tv.tv_usec = 0;
4144 devtimer_set_relative(p->po_current_while_timer, tv,
4145 (devtimer_timeout_func)
4146 bondport_receive_machine_expired,
4147 (void *)LAEventTimeout, NULL);
4148
4149 break;
4150 case LAEventTimeout:
4151 bondport_receive_machine_defaulted(p, LAEventStart, NULL);
4152 break;
4153 default:
4154 break;
4155 }
4156 return;
4157 }
4158
4159 static void
4160 bondport_receive_machine_lacp_disabled(bondport_ref p, LAEvent event,
4161 __unused void * event_data)
4162 {
4163 partner_state_ref ps;
4164 switch (event) {
4165 case LAEventStart:
4166 devtimer_cancel(p->po_current_while_timer);
4167 if (g_bond->verbose) {
4168 timestamp_printf("[%s] Receive LACP_DISABLED\n",
4169 bondport_get_name(p));
4170 }
4171 p->po_receive_state = ReceiveState_LACP_DISABLED;
4172 bondport_set_selected(p, SelectedState_UNSELECTED);
4173 bondport_RecordDefault(p);
4174 ps = &p->po_partner_state;
4175 ps->ps_state = lacp_actor_partner_state_set_individual(ps->ps_state);
4176 p->po_actor_state
4177 = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4178 break;
4179 default:
4180 break;
4181 }
4182 return;
4183 }
4184
4185 static void
4186 bondport_receive_machine_defaulted(bondport_ref p, LAEvent event,
4187 __unused void * event_data)
4188 {
4189 switch (event) {
4190 case LAEventStart:
4191 devtimer_cancel(p->po_current_while_timer);
4192 if (g_bond->verbose) {
4193 timestamp_printf("[%s] Receive DEFAULTED\n",
4194 bondport_get_name(p));
4195 }
4196 p->po_receive_state = ReceiveState_DEFAULTED;
4197 bondport_UpdateDefaultSelected(p);
4198 bondport_RecordDefault(p);
4199 p->po_actor_state
4200 = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4201 break;
4202 default:
4203 break;
4204 }
4205 return;
4206 }
4207
4208 static void
4209 bondport_receive_machine_current(bondport_ref p, LAEvent event,
4210 void * event_data)
4211 {
4212 partner_state_ref ps;
4213 struct timeval tv;
4214
4215 switch (event) {
4216 case LAEventPacket:
4217 devtimer_cancel(p->po_current_while_timer);
4218 if (g_bond->verbose) {
4219 timestamp_printf("[%s] Receive CURRENT\n",
4220 bondport_get_name(p));
4221 }
4222 p->po_receive_state = ReceiveState_CURRENT;
4223 bondport_UpdateSelected(p, event_data);
4224 bondport_UpdateNTT(p, event_data);
4225 bondport_RecordPDU(p, event_data);
4226 p->po_actor_state
4227 = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4228 bondport_assign_to_LAG(p);
4229 /* start current_while timer */
4230 ps = &p->po_partner_state;
4231 if (lacp_actor_partner_state_short_timeout(ps->ps_state)) {
4232 tv.tv_sec = LACP_SHORT_TIMEOUT_TIME;
4233 }
4234 else {
4235 tv.tv_sec = LACP_LONG_TIMEOUT_TIME;
4236 }
4237 tv.tv_usec = 0;
4238 devtimer_set_relative(p->po_current_while_timer, tv,
4239 (devtimer_timeout_func)
4240 bondport_receive_machine_current,
4241 (void *)LAEventTimeout, NULL);
4242 break;
4243 case LAEventTimeout:
4244 bondport_receive_machine_expired(p, LAEventStart, NULL);
4245 break;
4246 default:
4247 break;
4248 }
4249 return;
4250 }
4251
4252 /**
4253 ** Periodic Transmission machine
4254 **/
4255
4256 static void
4257 bondport_periodic_transmit_machine(bondport_ref p, LAEvent event,
4258 __unused void * event_data)
4259 {
4260 int interval;
4261 partner_state_ref ps;
4262 struct timeval tv;
4263
4264 switch (event) {
4265 case LAEventStart:
4266 if (g_bond->verbose) {
4267 timestamp_printf("[%s] periodic_transmit Start\n",
4268 bondport_get_name(p));
4269 }
4270 /* FALL THROUGH */
4271 case LAEventMediaChange:
4272 devtimer_cancel(p->po_periodic_timer);
4273 p->po_periodic_interval = 0;
4274 if (media_active(&p->po_media_info) == 0
4275 || media_full_duplex(&p->po_media_info) == 0) {
4276 break;
4277 }
4278 case LAEventPacket:
4279 /* Neither Partner nor Actor are LACP Active, no periodic tx */
4280 ps = &p->po_partner_state;
4281 if (lacp_actor_partner_state_active_lacp(p->po_actor_state) == 0
4282 && (lacp_actor_partner_state_active_lacp(ps->ps_state)
4283 == 0)) {
4284 devtimer_cancel(p->po_periodic_timer);
4285 p->po_periodic_interval = 0;
4286 break;
4287 }
4288 if (lacp_actor_partner_state_short_timeout(ps->ps_state)) {
4289 interval = LACP_FAST_PERIODIC_TIME;
4290 }
4291 else {
4292 interval = LACP_SLOW_PERIODIC_TIME;
4293 }
4294 if (p->po_periodic_interval != interval) {
4295 if (interval == LACP_FAST_PERIODIC_TIME
4296 && p->po_periodic_interval == LACP_SLOW_PERIODIC_TIME) {
4297 if (g_bond->verbose) {
4298 timestamp_printf("[%s] periodic_transmit:"
4299 " Need To Transmit\n",
4300 bondport_get_name(p));
4301 }
4302 bondport_flags_set_ntt(p);
4303 }
4304 p->po_periodic_interval = interval;
4305 tv.tv_usec = 0;
4306 tv.tv_sec = interval;
4307 devtimer_set_relative(p->po_periodic_timer, tv,
4308 (devtimer_timeout_func)
4309 bondport_periodic_transmit_machine,
4310 (void *)LAEventTimeout, NULL);
4311 if (g_bond->verbose) {
4312 timestamp_printf("[%s] Periodic Transmission Timer: %d secs\n",
4313 bondport_get_name(p),
4314 p->po_periodic_interval);
4315 }
4316 }
4317 break;
4318 case LAEventTimeout:
4319 bondport_flags_set_ntt(p);
4320 tv.tv_sec = p->po_periodic_interval;
4321 tv.tv_usec = 0;
4322 devtimer_set_relative(p->po_periodic_timer, tv, (devtimer_timeout_func)
4323 bondport_periodic_transmit_machine,
4324 (void *)LAEventTimeout, NULL);
4325 if (g_bond->verbose > 1) {
4326 timestamp_printf("[%s] Periodic Transmission Timer: %d secs\n",
4327 bondport_get_name(p), p->po_periodic_interval);
4328 }
4329 break;
4330 default:
4331 break;
4332 }
4333 return;
4334 }
4335
4336 /**
4337 ** Transmit machine
4338 **/
4339 static int
4340 bondport_can_transmit(bondport_ref p, int32_t current_secs,
4341 __darwin_time_t * next_secs)
4342 {
4343 if (p->po_last_transmit_secs != current_secs) {
4344 p->po_last_transmit_secs = current_secs;
4345 p->po_n_transmit = 0;
4346 }
4347 if (p->po_n_transmit < LACP_PACKET_RATE) {
4348 p->po_n_transmit++;
4349 return (1);
4350 }
4351 if (next_secs != NULL) {
4352 *next_secs = current_secs + 1;
4353 }
4354 return (0);
4355 }
4356
4357 static void
4358 bondport_transmit_machine(bondport_ref p, LAEvent event,
4359 void * event_data)
4360 {
4361 lacp_actor_partner_tlv_ref aptlv;
4362 lacp_collector_tlv_ref ctlv;
4363 struct timeval next_tick_time = {0, 0};
4364 lacpdu_ref out_lacpdu_p;
4365 packet_buffer_ref pkt;
4366 partner_state_ref ps;
4367 LAG_info_ref ps_li;
4368
4369 switch (event) {
4370 case LAEventTimeout:
4371 case LAEventStart:
4372 if (p->po_periodic_interval == 0 || bondport_flags_ntt(p) == 0) {
4373 break;
4374 }
4375 if (event_data == TRANSMIT_MACHINE_TX_IMMEDIATE) {
4376 /* we're going away, transmit the packet no matter what */
4377 }
4378 else if (bondport_can_transmit(p, devtimer_current_secs(),
4379 &next_tick_time.tv_sec) == 0) {
4380 if (devtimer_enabled(p->po_transmit_timer)) {
4381 if (g_bond->verbose > 0) {
4382 timestamp_printf("[%s] Transmit Timer Already Set\n",
4383 bondport_get_name(p));
4384 }
4385 }
4386 else {
4387 devtimer_set_absolute(p->po_transmit_timer, next_tick_time,
4388 (devtimer_timeout_func)
4389 bondport_transmit_machine,
4390 (void *)LAEventTimeout, NULL);
4391 if (g_bond->verbose > 0) {
4392 timestamp_printf("[%s] Transmit Timer Deadline %d secs\n",
4393 bondport_get_name(p),
4394 (int)next_tick_time.tv_sec);
4395 }
4396 }
4397 break;
4398 }
4399 if (g_bond->verbose > 0) {
4400 if (event == LAEventTimeout) {
4401 timestamp_printf("[%s] Transmit Timer Complete\n",
4402 bondport_get_name(p));
4403 }
4404 }
4405 pkt = packet_buffer_allocate(sizeof(*out_lacpdu_p));
4406 if (pkt == NULL) {
4407 printf("[%s] Transmit: failed to allocate packet buffer\n",
4408 bondport_get_name(p));
4409 break;
4410 }
4411 out_lacpdu_p = (lacpdu_ref)packet_buffer_byteptr(pkt);
4412 bzero(out_lacpdu_p, sizeof(*out_lacpdu_p));
4413 out_lacpdu_p->la_subtype = IEEE8023AD_SLOW_PROTO_SUBTYPE_LACP;
4414 out_lacpdu_p->la_version = LACPDU_VERSION_1;
4415
4416 /* Actor */
4417 aptlv = (lacp_actor_partner_tlv_ref)out_lacpdu_p->la_actor_tlv;
4418 aptlv->lap_tlv_type = LACPDU_TLV_TYPE_ACTOR;
4419 aptlv->lap_length = LACPDU_ACTOR_TLV_LENGTH;
4420 *((lacp_system_ref)aptlv->lap_system) = g_bond->system;
4421 lacp_actor_partner_tlv_set_system_priority(aptlv,
4422 g_bond->system_priority);
4423 lacp_actor_partner_tlv_set_port_priority(aptlv, p->po_priority);
4424 lacp_actor_partner_tlv_set_port(aptlv, bondport_get_index(p));
4425 lacp_actor_partner_tlv_set_key(aptlv, p->po_bond->ifb_key);
4426 aptlv->lap_state = p->po_actor_state;
4427
4428 /* Partner */
4429 aptlv = (lacp_actor_partner_tlv_ref)out_lacpdu_p->la_partner_tlv;
4430 aptlv->lap_tlv_type = LACPDU_TLV_TYPE_PARTNER;
4431 aptlv->lap_length = LACPDU_PARTNER_TLV_LENGTH;
4432 ps = &p->po_partner_state;
4433 ps_li = &ps->ps_lag_info;
4434 lacp_actor_partner_tlv_set_port(aptlv, ps->ps_port);
4435 lacp_actor_partner_tlv_set_port_priority(aptlv, ps->ps_port_priority);
4436 *((lacp_system_ref)aptlv->lap_system) = ps_li->li_system;
4437 lacp_actor_partner_tlv_set_system_priority(aptlv,
4438 ps_li->li_system_priority);
4439 lacp_actor_partner_tlv_set_key(aptlv, ps_li->li_key);
4440 aptlv->lap_state = ps->ps_state;
4441
4442 /* Collector */
4443 ctlv = (lacp_collector_tlv_ref)out_lacpdu_p->la_collector_tlv;
4444 ctlv->lac_tlv_type = LACPDU_TLV_TYPE_COLLECTOR;
4445 ctlv->lac_length = LACPDU_COLLECTOR_TLV_LENGTH;
4446
4447 bondport_slow_proto_transmit(p, pkt);
4448 bondport_flags_clear_ntt(p);
4449 if (g_bond->verbose > 0) {
4450 timestamp_printf("[%s] Transmit Packet %d\n",
4451 bondport_get_name(p), p->po_n_transmit);
4452 }
4453 break;
4454 default:
4455 break;
4456 }
4457 return;
4458 }
4459
4460 /**
4461 ** Mux machine functions
4462 **/
4463
4464 static void
4465 bondport_mux_machine_detached(bondport_ref p, LAEvent event,
4466 void * event_data);
4467 static void
4468 bondport_mux_machine_waiting(bondport_ref p, LAEvent event,
4469 void * event_data);
4470 static void
4471 bondport_mux_machine_attached(bondport_ref p, LAEvent event,
4472 void * event_data);
4473
4474 static void
4475 bondport_mux_machine_collecting_distributing(bondport_ref p, LAEvent event,
4476 void * event_data);
4477
4478 static void
4479 bondport_mux_machine(bondport_ref p, LAEvent event, void * event_data)
4480 {
4481 switch (p->po_mux_state) {
4482 case MuxState_none:
4483 bondport_mux_machine_detached(p, LAEventStart, NULL);
4484 break;
4485 case MuxState_DETACHED:
4486 bondport_mux_machine_detached(p, event, event_data);
4487 break;
4488 case MuxState_WAITING:
4489 bondport_mux_machine_waiting(p, event, event_data);
4490 break;
4491 case MuxState_ATTACHED:
4492 bondport_mux_machine_attached(p, event, event_data);
4493 break;
4494 case MuxState_COLLECTING_DISTRIBUTING:
4495 bondport_mux_machine_collecting_distributing(p, event, event_data);
4496 break;
4497 default:
4498 break;
4499 }
4500 return;
4501 }
4502
4503 static void
4504 bondport_mux_machine_detached(bondport_ref p, LAEvent event,
4505 __unused void * event_data)
4506 {
4507 lacp_actor_partner_state s;
4508
4509 switch (event) {
4510 case LAEventStart:
4511 devtimer_cancel(p->po_wait_while_timer);
4512 if (g_bond->verbose) {
4513 timestamp_printf("[%s] Mux DETACHED\n",
4514 bondport_get_name(p));
4515 }
4516 p->po_mux_state = MuxState_DETACHED;
4517 bondport_flags_clear_ready(p);
4518 bondport_DetachMuxFromAggregator(p);
4519 bondport_disable_distributing(p);
4520 s = p->po_actor_state;
4521 s = lacp_actor_partner_state_set_out_of_sync(s);
4522 s = lacp_actor_partner_state_set_not_collecting(s);
4523 s = lacp_actor_partner_state_set_not_distributing(s);
4524 p->po_actor_state = s;
4525 bondport_flags_set_ntt(p);
4526 break;
4527 case LAEventSelectedChange:
4528 case LAEventPacket:
4529 case LAEventMediaChange:
4530 if (p->po_selected == SelectedState_SELECTED
4531 || p->po_selected == SelectedState_STANDBY) {
4532 bondport_mux_machine_waiting(p, LAEventStart, NULL);
4533 }
4534 break;
4535 default:
4536 break;
4537 }
4538 return;
4539 }
4540
4541 static void
4542 bondport_mux_machine_waiting(bondport_ref p, LAEvent event,
4543 __unused void * event_data)
4544 {
4545 struct timeval tv;
4546
4547 switch (event) {
4548 case LAEventStart:
4549 devtimer_cancel(p->po_wait_while_timer);
4550 if (g_bond->verbose) {
4551 timestamp_printf("[%s] Mux WAITING\n",
4552 bondport_get_name(p));
4553 }
4554 p->po_mux_state = MuxState_WAITING;
4555 /* FALL THROUGH */
4556 default:
4557 case LAEventSelectedChange:
4558 if (p->po_selected == SelectedState_UNSELECTED) {
4559 bondport_mux_machine_detached(p, LAEventStart, NULL);
4560 break;
4561 }
4562 if (p->po_selected == SelectedState_STANDBY) {
4563 devtimer_cancel(p->po_wait_while_timer);
4564 /* wait until state changes to SELECTED */
4565 if (g_bond->verbose) {
4566 timestamp_printf("[%s] Mux WAITING: Standby\n",
4567 bondport_get_name(p));
4568 }
4569 break;
4570 }
4571 if (bondport_flags_ready(p)) {
4572 if (g_bond->verbose) {
4573 timestamp_printf("[%s] Mux WAITING: Port is already ready\n",
4574 bondport_get_name(p));
4575 }
4576 break;
4577 }
4578 if (devtimer_enabled(p->po_wait_while_timer)) {
4579 if (g_bond->verbose) {
4580 timestamp_printf("[%s] Mux WAITING: Timer already set\n",
4581 bondport_get_name(p));
4582 }
4583 break;
4584 }
4585 if (ifbond_all_ports_attached(p->po_bond, p)) {
4586 devtimer_cancel(p->po_wait_while_timer);
4587 if (g_bond->verbose) {
4588 timestamp_printf("[%s] Mux WAITING: No waiting\n",
4589 bondport_get_name(p));
4590 }
4591 bondport_flags_set_ready(p);
4592 goto no_waiting;
4593 }
4594 if (g_bond->verbose) {
4595 timestamp_printf("[%s] Mux WAITING: 2 seconds\n",
4596 bondport_get_name(p));
4597 }
4598 tv.tv_sec = LACP_AGGREGATE_WAIT_TIME;
4599 tv.tv_usec = 0;
4600 devtimer_set_relative(p->po_wait_while_timer, tv,
4601 (devtimer_timeout_func)
4602 bondport_mux_machine_waiting,
4603 (void *)LAEventTimeout, NULL);
4604 break;
4605 case LAEventTimeout:
4606 if (g_bond->verbose) {
4607 timestamp_printf("[%s] Mux WAITING: Ready\n",
4608 bondport_get_name(p));
4609 }
4610 bondport_flags_set_ready(p);
4611 break;
4612 case LAEventReady:
4613 no_waiting:
4614 if (bondport_flags_ready(p)){
4615 if (g_bond->verbose) {
4616 timestamp_printf("[%s] Mux WAITING: All Ports Ready\n",
4617 bondport_get_name(p));
4618 }
4619 bondport_mux_machine_attached(p, LAEventStart, NULL);
4620 break;
4621 }
4622 break;
4623 }
4624 return;
4625 }
4626
4627 static void
4628 bondport_mux_machine_attached(bondport_ref p, LAEvent event,
4629 __unused void * event_data)
4630 {
4631 lacp_actor_partner_state s;
4632
4633 switch (event) {
4634 case LAEventStart:
4635 devtimer_cancel(p->po_wait_while_timer);
4636 if (g_bond->verbose) {
4637 timestamp_printf("[%s] Mux ATTACHED\n",
4638 bondport_get_name(p));
4639 }
4640 p->po_mux_state = MuxState_ATTACHED;
4641 bondport_AttachMuxToAggregator(p);
4642 s = p->po_actor_state;
4643 s = lacp_actor_partner_state_set_in_sync(s);
4644 s = lacp_actor_partner_state_set_not_collecting(s);
4645 s = lacp_actor_partner_state_set_not_distributing(s);
4646 bondport_disable_distributing(p);
4647 p->po_actor_state = s;
4648 bondport_flags_set_ntt(p);
4649 /* FALL THROUGH */
4650 default:
4651 switch (p->po_selected) {
4652 case SelectedState_SELECTED:
4653 s = p->po_partner_state.ps_state;
4654 if (lacp_actor_partner_state_in_sync(s)) {
4655 bondport_mux_machine_collecting_distributing(p, LAEventStart,
4656 NULL);
4657 }
4658 break;
4659 default:
4660 bondport_mux_machine_detached(p, LAEventStart, NULL);
4661 break;
4662 }
4663 break;
4664 }
4665 return;
4666 }
4667
4668 static void
4669 bondport_mux_machine_collecting_distributing(bondport_ref p,
4670 LAEvent event,
4671 __unused void * event_data)
4672 {
4673 lacp_actor_partner_state s;
4674
4675 switch (event) {
4676 case LAEventStart:
4677 devtimer_cancel(p->po_wait_while_timer);
4678 if (g_bond->verbose) {
4679 timestamp_printf("[%s] Mux COLLECTING_DISTRIBUTING\n",
4680 bondport_get_name(p));
4681 }
4682 p->po_mux_state = MuxState_COLLECTING_DISTRIBUTING;
4683 bondport_enable_distributing(p);
4684 s = p->po_actor_state;
4685 s = lacp_actor_partner_state_set_collecting(s);
4686 s = lacp_actor_partner_state_set_distributing(s);
4687 p->po_actor_state = s;
4688 bondport_flags_set_ntt(p);
4689 /* FALL THROUGH */
4690 default:
4691 s = p->po_partner_state.ps_state;
4692 if (lacp_actor_partner_state_in_sync(s) == 0) {
4693 bondport_mux_machine_attached(p, LAEventStart, NULL);
4694 break;
4695 }
4696 switch (p->po_selected) {
4697 case SelectedState_UNSELECTED:
4698 case SelectedState_STANDBY:
4699 bondport_mux_machine_attached(p, LAEventStart, NULL);
4700 break;
4701 default:
4702 break;
4703 }
4704 break;
4705 }
4706 return;
4707 }