]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/if_bond.c
xnu-792.21.3.tar.gz
[apple/xnu.git] / bsd / net / if_bond.c
1 /*
2 * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * if_bond.c
31 * - bond/failover interface
32 * - implements IEEE 802.3ad Link Aggregation
33 */
34
35 /*
36 * Modification History:
37 *
38 * April 29, 2004 Dieter Siegmund (dieter@apple.com)
39 * - created
40 */
41
42 #include <sys/param.h>
43 #include <sys/kernel.h>
44 #include <sys/malloc.h>
45 #include <sys/mbuf.h>
46 #include <sys/queue.h>
47 #include <sys/socket.h>
48 #include <sys/sockio.h>
49 #include <sys/sysctl.h>
50 #include <sys/systm.h>
51 #include <sys/kern_event.h>
52
53 #include <net/bpf.h>
54 #include <net/ethernet.h>
55 #include <net/if.h>
56 #include <net/kpi_interface.h>
57 #include <net/if_arp.h>
58 #include <net/if_dl.h>
59 #include <net/if_ether.h>
60 #include <net/if_types.h>
61 #include <net/if_bond_var.h>
62 #include <net/ieee8023ad.h>
63 #include <net/lacp.h>
64 #include <net/dlil.h>
65 #include <sys/time.h>
66 #include <net/devtimer.h>
67 #include <net/if_vlan_var.h>
68
69 #include <kern/locks.h>
70 #include <libkern/OSAtomic.h>
71
72 #include <netinet/in.h>
73 #include <netinet/if_ether.h>
74 #include <netinet/in_systm.h>
75 #include <netinet/ip.h>
76 #include <netinet/ip6.h>
77
78 #include <net/if_media.h>
79 #include <net/multicast_list.h>
80
81 extern int dlil_input_packet(struct ifnet *, struct mbuf *, char *);
82
83 static struct ether_addr slow_proto_multicast = {
84 IEEE8023AD_SLOW_PROTO_MULTICAST
85 };
86
87 #define BOND_MAXUNIT 128
88 #define BONDNAME "bond"
89 #define M_BOND M_DEVBUF
90
91 #define EA_FORMAT "%x:%x:%x:%x:%x:%x"
92 #define EA_CH(e, i) ((u_char)((u_char *)(e))[(i)])
93 #define EA_LIST(ea) EA_CH(ea,0),EA_CH(ea,1),EA_CH(ea,2),EA_CH(ea,3),EA_CH(ea,4),EA_CH(ea,5)
94
95 #define timestamp_printf printf
96
97 /**
98 ** bond locks
99 **/
100 static __inline__ lck_grp_t *
101 my_lck_grp_alloc_init(const char * grp_name)
102 {
103 lck_grp_t * grp;
104 lck_grp_attr_t * grp_attrs;
105
106 grp_attrs = lck_grp_attr_alloc_init();
107 lck_grp_attr_setdefault(grp_attrs);
108 lck_grp_attr_setdefault(grp_attrs);
109 grp = lck_grp_alloc_init(grp_name, grp_attrs);
110 lck_grp_attr_free(grp_attrs);
111 return (grp);
112 }
113
114 static __inline__ lck_mtx_t *
115 my_lck_mtx_alloc_init(lck_grp_t * lck_grp)
116 {
117 lck_attr_t * lck_attrs;
118 lck_mtx_t * lck_mtx;
119
120 lck_attrs = lck_attr_alloc_init();
121 lck_attr_setdefault(lck_attrs);
122 lck_mtx = lck_mtx_alloc_init(lck_grp, lck_attrs);
123 lck_attr_free(lck_attrs);
124 return (lck_mtx);
125 }
126
127 static lck_mtx_t * bond_lck_mtx;
128
129 static __inline__ void
130 bond_lock_init(void)
131 {
132 lck_grp_t * bond_lck_grp;
133
134 bond_lck_grp = my_lck_grp_alloc_init("if_bond");
135 bond_lck_mtx = my_lck_mtx_alloc_init(bond_lck_grp);
136 }
137
138 static __inline__ void
139 bond_assert_lock_held(void)
140 {
141 lck_mtx_assert(bond_lck_mtx, LCK_MTX_ASSERT_OWNED);
142 return;
143 }
144
145 static __inline__ void
146 bond_assert_lock_not_held(void)
147 {
148 lck_mtx_assert(bond_lck_mtx, LCK_MTX_ASSERT_NOTOWNED);
149 return;
150 }
151
152 static __inline__ void
153 bond_lock(void)
154 {
155 lck_mtx_lock(bond_lck_mtx);
156 return;
157 }
158
159 static __inline__ void
160 bond_unlock(void)
161 {
162 lck_mtx_unlock(bond_lck_mtx);
163 return;
164 }
165
166 /**
167 ** bond structures, types
168 **/
169
170 struct LAG_info_s {
171 lacp_system li_system;
172 lacp_system_priority li_system_priority;
173 lacp_key li_key;
174 };
175 typedef struct LAG_info_s LAG_info, * LAG_info_ref;
176
177 struct bondport_s;
178 TAILQ_HEAD(port_list, bondport_s);
179 struct ifbond_s;
180 TAILQ_HEAD(ifbond_list, ifbond_s);
181 struct LAG_s;
182 TAILQ_HEAD(lag_list, LAG_s);
183
184 typedef struct ifbond_s ifbond, * ifbond_ref;
185 typedef struct bondport_s bondport, * bondport_ref;
186
187 struct LAG_s {
188 TAILQ_ENTRY(LAG_s) lag_list;
189 struct port_list lag_port_list;
190 short lag_port_count;
191 short lag_selected_port_count;
192 int lag_active_media;
193 LAG_info lag_info;
194 };
195 typedef struct LAG_s LAG, * LAG_ref;
196
197 typedef struct partner_state_s {
198 LAG_info ps_lag_info;
199 lacp_port ps_port;
200 lacp_port_priority ps_port_priority;
201 lacp_actor_partner_state ps_state;
202 } partner_state, * partner_state_ref;
203
204 struct ifbond_s {
205 TAILQ_ENTRY(ifbond_s) ifb_bond_list;
206 int ifb_flags;
207 UInt32 ifb_retain_count;
208 char ifb_name[IFNAMSIZ];
209 struct ifnet * ifb_ifp;
210 bpf_packet_func ifb_bpf_input;
211 bpf_packet_func ifb_bpf_output;
212 int ifb_altmtu;
213 struct port_list ifb_port_list;
214 short ifb_port_count;
215 struct lag_list ifb_lag_list;
216 lacp_key ifb_key;
217 short ifb_max_active; /* 0 == unlimited */
218 LAG_ref ifb_active_lag;
219 struct ifmultiaddr * ifb_ifma_slow_proto;
220 bondport_ref * ifb_distributing_array;
221 int ifb_distributing_count;
222 };
223
224 struct media_info {
225 int mi_active;
226 int mi_status;
227 };
228
229 enum {
230 ReceiveState_none = 0,
231 ReceiveState_INITIALIZE = 1,
232 ReceiveState_PORT_DISABLED = 2,
233 ReceiveState_EXPIRED = 3,
234 ReceiveState_LACP_DISABLED = 4,
235 ReceiveState_DEFAULTED = 5,
236 ReceiveState_CURRENT = 6,
237 };
238
239 typedef u_char ReceiveState;
240
241 enum {
242 SelectedState_UNSELECTED = IF_BOND_STATUS_SELECTED_STATE_UNSELECTED,
243 SelectedState_SELECTED = IF_BOND_STATUS_SELECTED_STATE_SELECTED,
244 SelectedState_STANDBY = IF_BOND_STATUS_SELECTED_STATE_STANDBY
245 };
246 typedef u_char SelectedState;
247
248 static __inline__ const char *
249 SelectedStateString(SelectedState s)
250 {
251 static const char * names[] = { "UNSELECTED", "SELECTED", "STANDBY" };
252
253 if (s <= SelectedState_STANDBY) {
254 return (names[s]);
255 }
256 return ("<unknown>");
257 }
258
259 enum {
260 MuxState_none = 0,
261 MuxState_DETACHED = 1,
262 MuxState_WAITING = 2,
263 MuxState_ATTACHED = 3,
264 MuxState_COLLECTING_DISTRIBUTING = 4,
265 };
266
267 typedef u_char MuxState;
268
269 struct bondport_s {
270 TAILQ_ENTRY(bondport_s) po_port_list;
271 ifbond_ref po_bond;
272 struct multicast_list po_multicast;
273 struct ifnet * po_ifp;
274 struct ether_addr po_saved_addr;
275 int po_enabled;
276 char po_name[IFNAMSIZ];
277 struct ifdevmtu po_devmtu;
278
279 /* LACP */
280 TAILQ_ENTRY(bondport_s) po_lag_port_list;
281 devtimer_ref po_current_while_timer;
282 devtimer_ref po_periodic_timer;
283 devtimer_ref po_wait_while_timer;
284 devtimer_ref po_transmit_timer;
285 partner_state po_partner_state;
286 lacp_port_priority po_priority;
287 lacp_actor_partner_state po_actor_state;
288 u_char po_flags;
289 u_char po_periodic_interval;
290 u_char po_n_transmit;
291 ReceiveState po_receive_state;
292 MuxState po_mux_state;
293 SelectedState po_selected;
294 int32_t po_last_transmit_secs;
295 struct media_info po_media_info;
296 LAG_ref po_lag;
297 };
298
299 #define IFBF_PROMISC 0x1 /* promiscuous mode */
300 #define IFBF_IF_DETACHING 0x2 /* interface is detaching */
301 #define IFBF_LLADDR 0x4 /* specific link address requested */
302 #define IFBF_CHANGE_IN_PROGRESS 0x8 /* interface add/remove in progress */
303
304 static int bond_get_status(ifbond_ref ifb, struct if_bond_req * ibr_p,
305 user_addr_t datap);
306
307 static __inline__ int
308 ifbond_flags_promisc(ifbond_ref ifb)
309 {
310 return ((ifb->ifb_flags & IFBF_PROMISC) != 0);
311 }
312
313 static __inline__ void
314 ifbond_flags_set_promisc(ifbond_ref ifb)
315 {
316 ifb->ifb_flags |= IFBF_PROMISC;
317 return;
318 }
319
320 static __inline__ void
321 ifbond_flags_clear_promisc(ifbond_ref ifb)
322 {
323 ifb->ifb_flags &= ~IFBF_PROMISC;
324 return;
325 }
326
327 static __inline__ int
328 ifbond_flags_if_detaching(ifbond_ref ifb)
329 {
330 return ((ifb->ifb_flags & IFBF_IF_DETACHING) != 0);
331 }
332
333 static __inline__ void
334 ifbond_flags_set_if_detaching(ifbond_ref ifb)
335 {
336 ifb->ifb_flags |= IFBF_IF_DETACHING;
337 return;
338 }
339
340 static __inline__ int
341 ifbond_flags_lladdr(ifbond_ref ifb)
342 {
343 return ((ifb->ifb_flags & IFBF_LLADDR) != 0);
344 }
345
346 static __inline__ void
347 ifbond_flags_set_lladdr(ifbond_ref ifb)
348 {
349 ifb->ifb_flags |= IFBF_LLADDR;
350 return;
351 }
352
353 static __inline__ void
354 ifbond_flags_clear_lladdr(ifbond_ref ifb)
355 {
356 ifb->ifb_flags &= ~IFBF_LLADDR;
357 return;
358 }
359
360 static __inline__ int
361 ifbond_flags_change_in_progress(ifbond_ref ifb)
362 {
363 return ((ifb->ifb_flags & IFBF_CHANGE_IN_PROGRESS) != 0);
364 }
365
366 static __inline__ void
367 ifbond_flags_set_change_in_progress(ifbond_ref ifb)
368 {
369 ifb->ifb_flags |= IFBF_CHANGE_IN_PROGRESS;
370 return;
371 }
372
373 static __inline__ void
374 ifbond_flags_clear_change_in_progress(ifbond_ref ifb)
375 {
376 ifb->ifb_flags &= ~IFBF_CHANGE_IN_PROGRESS;
377 return;
378 }
379
380 /*
381 * bondport_ref->po_flags bits
382 */
383 #define BONDPORT_FLAGS_NTT 0x01
384 #define BONDPORT_FLAGS_READY 0x02
385 #define BONDPORT_FLAGS_SELECTED_CHANGED 0x04
386 #define BONDPORT_FLAGS_MUX_ATTACHED 0x08
387 #define BONDPORT_FLAGS_DISTRIBUTING 0x10
388 #define BONDPORT_FLAGS_UNUSED2 0x20
389 #define BONDPORT_FLAGS_UNUSED3 0x40
390 #define BONDPORT_FLAGS_UNUSED4 0x80
391
392 static __inline__ void
393 bondport_flags_set_ntt(bondport_ref p)
394 {
395 p->po_flags |= BONDPORT_FLAGS_NTT;
396 return;
397 }
398
399 static __inline__ void
400 bondport_flags_clear_ntt(bondport_ref p)
401 {
402 p->po_flags &= ~BONDPORT_FLAGS_NTT;
403 return;
404 }
405
406 static __inline__ int
407 bondport_flags_ntt(bondport_ref p)
408 {
409 return ((p->po_flags & BONDPORT_FLAGS_NTT) != 0);
410 }
411
412 static __inline__ void
413 bondport_flags_set_ready(bondport_ref p)
414 {
415 p->po_flags |= BONDPORT_FLAGS_READY;
416 return;
417 }
418
419 static __inline__ void
420 bondport_flags_clear_ready(bondport_ref p)
421 {
422 p->po_flags &= ~BONDPORT_FLAGS_READY;
423 return;
424 }
425
426 static __inline__ int
427 bondport_flags_ready(bondport_ref p)
428 {
429 return ((p->po_flags & BONDPORT_FLAGS_READY) != 0);
430 }
431
432 static __inline__ void
433 bondport_flags_set_selected_changed(bondport_ref p)
434 {
435 p->po_flags |= BONDPORT_FLAGS_SELECTED_CHANGED;
436 return;
437 }
438
439 static __inline__ void
440 bondport_flags_clear_selected_changed(bondport_ref p)
441 {
442 p->po_flags &= ~BONDPORT_FLAGS_SELECTED_CHANGED;
443 return;
444 }
445
446 static __inline__ int
447 bondport_flags_selected_changed(bondport_ref p)
448 {
449 return ((p->po_flags & BONDPORT_FLAGS_SELECTED_CHANGED) != 0);
450 }
451
452 static __inline__ void
453 bondport_flags_set_mux_attached(bondport_ref p)
454 {
455 p->po_flags |= BONDPORT_FLAGS_MUX_ATTACHED;
456 return;
457 }
458
459 static __inline__ void
460 bondport_flags_clear_mux_attached(bondport_ref p)
461 {
462 p->po_flags &= ~BONDPORT_FLAGS_MUX_ATTACHED;
463 return;
464 }
465
466 static __inline__ int
467 bondport_flags_mux_attached(bondport_ref p)
468 {
469 return ((p->po_flags & BONDPORT_FLAGS_MUX_ATTACHED) != 0);
470 }
471
472 static __inline__ void
473 bondport_flags_set_distributing(bondport_ref p)
474 {
475 p->po_flags |= BONDPORT_FLAGS_DISTRIBUTING;
476 return;
477 }
478
479 static __inline__ void
480 bondport_flags_clear_distributing(bondport_ref p)
481 {
482 p->po_flags &= ~BONDPORT_FLAGS_DISTRIBUTING;
483 return;
484 }
485
486 static __inline__ int
487 bondport_flags_distributing(bondport_ref p)
488 {
489 return ((p->po_flags & BONDPORT_FLAGS_DISTRIBUTING) != 0);
490 }
491
492 typedef struct bond_globals_s {
493 struct ifbond_list ifbond_list;
494 lacp_system system;
495 lacp_system_priority system_priority;
496 int verbose;
497 } * bond_globals_ref;
498
499 static bond_globals_ref g_bond;
500
501 /**
502 ** packet_buffer routines
503 ** - thin wrapper for mbuf
504 **/
505
506 typedef struct mbuf * packet_buffer_ref;
507
508 static packet_buffer_ref
509 packet_buffer_allocate(int length)
510 {
511 packet_buffer_ref m;
512 int size;
513
514 /* leave room for ethernet header */
515 size = length + sizeof(struct ether_header);
516 if (size > (int)MHLEN) {
517 /* XXX doesn't handle large payloads */
518 printf("bond: packet_buffer_allocate size %d > max %d\n", size, MHLEN);
519 return (NULL);
520 }
521 m = m_gethdr(M_WAITOK, MT_DATA);
522 if (m == NULL) {
523 return (NULL);
524 }
525 m->m_len = size;
526 m->m_pkthdr.len = size;
527 return (m);
528 }
529
530 static void *
531 packet_buffer_byteptr(packet_buffer_ref buf)
532 {
533 return (buf->m_data + sizeof(struct ether_header));
534 }
535
536 typedef enum {
537 LAEventStart,
538 LAEventTimeout,
539 LAEventPacket,
540 LAEventMediaChange,
541 LAEventSelectedChange,
542 LAEventPortMoved,
543 LAEventReady
544 } LAEvent;
545
546 /**
547 ** Receive machine
548 **/
549 static void
550 bondport_receive_machine(bondport_ref p, LAEvent event,
551 void * event_data);
552 /**
553 ** Periodic Transmission machine
554 **/
555 static void
556 bondport_periodic_transmit_machine(bondport_ref p, LAEvent event,
557 void * event_data);
558
559 /**
560 ** Transmit machine
561 **/
562 static void
563 bondport_transmit_machine(bondport_ref p, LAEvent event,
564 void * event_data);
565
566 /**
567 ** Mux machine
568 **/
569 static void
570 bondport_mux_machine(bondport_ref p, LAEvent event,
571 void * event_data);
572
573 /**
574 ** bond, LAG
575 **/
576 static void
577 ifbond_activate_LAG(ifbond_ref bond, LAG_ref lag, int active_media);
578
579 static void
580 ifbond_deactivate_LAG(ifbond_ref bond, LAG_ref lag);
581
582 static int
583 ifbond_all_ports_ready(ifbond_ref bond);
584
585 static LAG_ref
586 ifbond_find_best_LAG(ifbond_ref bond, int * active_media);
587
588 static int
589 LAG_get_aggregatable_port_count(LAG_ref lag, int * active_media);
590
591 static int
592 ifbond_selection(ifbond_ref bond);
593
594
595 /**
596 ** bondport
597 **/
598
599 static void
600 bondport_receive_lacpdu(bondport_ref p, lacpdu_ref in_lacpdu_p);
601
602 static void
603 bondport_slow_proto_transmit(bondport_ref p, packet_buffer_ref buf);
604
605 static bondport_ref
606 bondport_create(struct ifnet * port_ifp, lacp_port_priority priority,
607 int active, int short_timeout, int * error);
608 static void
609 bondport_start(bondport_ref p);
610
611 static void
612 bondport_free(bondport_ref p);
613
614 static int
615 bondport_aggregatable(bondport_ref p);
616
617 static int
618 bondport_remove_from_LAG(bondport_ref p);
619
620 static void
621 bondport_set_selected(bondport_ref p, SelectedState s);
622
623 static int
624 bondport_matches_LAG(bondport_ref p, LAG_ref lag);
625
626 static void
627 bondport_link_status_changed(bondport_ref p);
628
629 static void
630 bondport_enable_distributing(bondport_ref p);
631
632 static void
633 bondport_disable_distributing(bondport_ref p);
634
635 static __inline__ int
636 bondport_collecting(bondport_ref p)
637 {
638 return (lacp_actor_partner_state_collecting(p->po_actor_state));
639 }
640
641 /**
642 ** bond interface/dlil specific routines
643 **/
644 static int bond_clone_create(struct if_clone *, int);
645 static void bond_clone_destroy(struct ifnet *);
646 static int bond_input(struct mbuf *m, char *frame_header, struct ifnet *ifp,
647 u_long protocol_family, int sync_ok);
648 static int bond_output(struct ifnet *ifp, struct mbuf *m);
649 static int bond_ioctl(struct ifnet *ifp, u_int32_t cmd, void * addr);
650 static int bond_set_bpf_tap(struct ifnet * ifp, bpf_tap_mode mode,
651 bpf_packet_func func);
652 static int bond_attach_protocol(struct ifnet *ifp);
653 static int bond_detach_protocol(struct ifnet *ifp);
654 static int bond_setmulti(struct ifnet *ifp);
655 static int bond_add_interface(struct ifnet * ifp, struct ifnet * port_ifp);
656 static int bond_remove_interface(ifbond_ref ifb, struct ifnet * port_ifp);
657 static void bond_if_free(struct ifnet * ifp);
658
659 static struct if_clone bond_cloner = IF_CLONE_INITIALIZER(BONDNAME,
660 bond_clone_create,
661 bond_clone_destroy,
662 0,
663 BOND_MAXUNIT);
664 static void interface_link_event(struct ifnet * ifp, u_long event_code);
665
666 static int
667 siocsifmtu(struct ifnet * ifp, int mtu)
668 {
669 struct ifreq ifr;
670
671 bzero(&ifr, sizeof(ifr));
672 ifr.ifr_mtu = mtu;
673 return (dlil_ioctl(0, ifp, SIOCSIFMTU, (caddr_t)&ifr));
674 }
675
676 static int
677 siocgifdevmtu(struct ifnet * ifp, struct ifdevmtu * ifdm_p)
678 {
679 struct ifreq ifr;
680 int error;
681
682 bzero(&ifr, sizeof(ifr));
683 error = dlil_ioctl(0, ifp, SIOCGIFDEVMTU, (caddr_t)&ifr);
684 if (error == 0) {
685 *ifdm_p = ifr.ifr_devmtu;
686 }
687 return (error);
688 }
689
690 static __inline__ void
691 ether_addr_copy(void * dest, const void * source)
692 {
693 bcopy(source, dest, ETHER_ADDR_LEN);
694 return;
695 }
696
697 static __inline__ void
698 ifbond_retain(ifbond_ref ifb)
699 {
700 OSIncrementAtomic(&ifb->ifb_retain_count);
701 }
702
703 static __inline__ void
704 ifbond_release(ifbond_ref ifb)
705 {
706 UInt32 old_retain_count;
707
708 old_retain_count = OSDecrementAtomic(&ifb->ifb_retain_count);
709 switch (old_retain_count) {
710 case 0:
711 panic("ifbond_release: retain count is 0\n");
712 break;
713 case 1:
714 if (g_bond->verbose) {
715 printf("ifbond_release(%s)\n", ifb->ifb_name);
716 }
717 if (ifb->ifb_ifma_slow_proto != NULL) {
718 if (g_bond->verbose) {
719 printf("ifbond_release(%s) removing multicast\n",
720 ifb->ifb_name);
721 }
722 (void)if_delmultiaddr(ifb->ifb_ifma_slow_proto, 0);
723 ifma_release(ifb->ifb_ifma_slow_proto);
724 }
725 if (ifb->ifb_distributing_array != NULL) {
726 FREE(ifb->ifb_distributing_array, M_BOND);
727 }
728 FREE(ifb, M_BOND);
729 break;
730 default:
731 break;
732 }
733 return;
734 }
735
736 /*
737 * Function: ifbond_wait
738 * Purpose:
739 * Allows a single thread to gain exclusive access to the ifbond
740 * data structure. Some operations take a long time to complete,
741 * and some have side-effects that we can't predict. Holding the
742 * bond_lock() across such operations is not possible.
743 *
744 * For example:
745 * 1) The SIOCSIFLLADDR ioctl takes a long time (several seconds) to
746 * complete. Simply holding the bond_lock() would freeze all other
747 * data structure accesses during that time.
748 * 2) When we attach our protocol to the interface, a dlil event is
749 * generated and invokes our bond_event() function. bond_event()
750 * needs to take the bond_lock(), but we're already holding it, so
751 * we're deadlocked against ourselves.
752 * Notes:
753 * Before calling, you must be holding the bond_lock and have taken
754 * a reference on the ifbond_ref.
755 */
756 static void
757 ifbond_wait(ifbond_ref ifb, const char * msg)
758 {
759 int waited = 0;
760
761 /* other add/remove in progress */
762 while (ifbond_flags_change_in_progress(ifb)) {
763 if (g_bond->verbose) {
764 printf("%s: %s msleep\n", ifb->ifb_name, msg);
765 }
766 waited = 1;
767 (void)msleep(ifb, bond_lck_mtx, PZERO, msg, 0);
768 }
769 /* prevent other bond list remove/add from taking place */
770 ifbond_flags_set_change_in_progress(ifb);
771 if (g_bond->verbose && waited) {
772 printf("%s: %s woke up\n", ifb->ifb_name, msg);
773 }
774 return;
775 }
776
777 /*
778 * Function: ifbond_signal
779 * Purpose:
780 * Allows the thread that previously invoked ifbond_wait() to
781 * give up exclusive access to the ifbond data structure, and wake up
782 * any other threads waiting to access
783 * Notes:
784 * Before calling, you must be holding the bond_lock and have taken
785 * a reference on the ifbond_ref.
786 */
787 static void
788 ifbond_signal(ifbond_ref ifb, const char * msg)
789 {
790 ifbond_flags_clear_change_in_progress(ifb);
791 wakeup((caddr_t)ifb);
792 if (g_bond->verbose) {
793 printf("%s: %s wakeup\n", ifb->ifb_name, msg);
794 }
795 return;
796 }
797
798 /**
799 ** Media information
800 **/
801
802 static int
803 link_speed(int active)
804 {
805 switch (IFM_SUBTYPE(active)) {
806 case IFM_10_T:
807 case IFM_10_2:
808 case IFM_10_5:
809 case IFM_10_STP:
810 case IFM_10_FL:
811 return (10);
812 case IFM_100_TX:
813 case IFM_100_FX:
814 case IFM_100_T4:
815 case IFM_100_VG:
816 case IFM_100_T2:
817 return (100);
818 case IFM_1000_SX:
819 case IFM_1000_LX:
820 case IFM_1000_CX:
821 case IFM_1000_TX:
822 return (1000);
823 case IFM_HPNA_1:
824 return (0);
825 default:
826 /* assume that new defined types are going to be at least 10GigE */
827 case IFM_10G_SR:
828 case IFM_10G_LR:
829 return (10000);
830 }
831 }
832
833 static __inline__ int
834 media_active(const struct media_info * mi)
835 {
836 if ((mi->mi_status & IFM_AVALID) == 0) {
837 return (1);
838 }
839 return ((mi->mi_status & IFM_ACTIVE) != 0);
840 }
841
842 static __inline__ int
843 media_full_duplex(const struct media_info * mi)
844 {
845 return ((mi->mi_active & IFM_FDX) != 0);
846 }
847
848 static __inline__ int
849 media_speed(const struct media_info * mi)
850 {
851 return (link_speed(mi->mi_active));
852 }
853
854 static struct media_info
855 interface_media_info(struct ifnet * ifp)
856 {
857 struct ifmediareq ifmr;
858 struct media_info mi;
859
860 bzero(&mi, sizeof(mi));
861 bzero(&ifmr, sizeof(ifmr));
862 if (dlil_ioctl(0, ifp, SIOCGIFMEDIA, (caddr_t)&ifmr) == 0) {
863 if (ifmr.ifm_count != 0) {
864 mi.mi_status = ifmr.ifm_status;
865 mi.mi_active = ifmr.ifm_active;
866 }
867 }
868 return (mi);
869 }
870
871 /**
872 ** interface utility functions
873 **/
874 static __inline__ struct ifaddr *
875 ifindex_get_ifaddr(int i)
876 {
877 if (i > if_index || i == 0) {
878 return (NULL);
879 }
880 return (ifnet_addrs[i - 1]);
881 }
882
883 static __inline__ struct ifaddr *
884 ifp_get_ifaddr(struct ifnet * ifp)
885 {
886 return (ifindex_get_ifaddr(ifp->if_index));
887 }
888
889 static __inline__ struct sockaddr_dl *
890 ifp_get_sdl(struct ifnet * ifp)
891 {
892 struct ifaddr * ifa;
893
894 ifa = ifp_get_ifaddr(ifp);
895 return ((struct sockaddr_dl *)(ifa->ifa_addr));
896 }
897
898 static int
899 if_siflladdr(struct ifnet * ifp, const struct ether_addr * ea_p)
900 {
901 struct ifreq ifr;
902
903 /*
904 * XXX setting the sa_len to ETHER_ADDR_LEN is wrong, but the driver
905 * currently expects it that way
906 */
907 ifr.ifr_addr.sa_family = AF_UNSPEC;
908 ifr.ifr_addr.sa_len = ETHER_ADDR_LEN;
909 ether_addr_copy(ifr.ifr_addr.sa_data, ea_p);
910 #if 0
911 snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d", ifp->if_name,
912 ifp->if_unit);
913 #endif 0
914 return (dlil_ioctl(0, ifp, SIOCSIFLLADDR, (caddr_t)&ifr));
915 }
916
917 /**
918 ** bond_globals
919 **/
920 static bond_globals_ref
921 bond_globals_create(lacp_system_priority sys_pri,
922 lacp_system_ref sys)
923 {
924 bond_globals_ref b;
925
926 b = _MALLOC(sizeof(*b), M_BOND, M_WAITOK);
927 if (b == NULL) {
928 return (NULL);
929 }
930 bzero(b, sizeof(*b));
931 TAILQ_INIT(&b->ifbond_list);
932 b->system = *sys;
933 b->system_priority = sys_pri;
934 #if 0
935 b->verbose = 1;
936 #endif 0
937 return (b);
938 }
939
940 static int
941 bond_globals_init(void)
942 {
943 bond_globals_ref b;
944 int i;
945 struct ifnet * ifp;
946
947 bond_assert_lock_not_held();
948
949 if (g_bond != NULL) {
950 return (0);
951 }
952
953 /*
954 * use en0's ethernet address as the system identifier, and if it's not
955 * there, use en1 .. en3
956 */
957 ifp = NULL;
958 for (i = 0; i < 4; i++) {
959 char ifname[IFNAMSIZ+1];
960 snprintf(ifname, sizeof(ifname), "en%d", i);
961 /* XXX ifunit() needs to return a reference on the ifp */
962 ifp = ifunit(ifname);
963 if (ifp != NULL) {
964 break;
965 }
966 }
967 b = NULL;
968 if (ifp != NULL) {
969 b = bond_globals_create(0x8000,
970 (lacp_system_ref)LLADDR(ifp_get_sdl(ifp)));
971 }
972 bond_lock();
973 if (g_bond != NULL) {
974 bond_unlock();
975 _FREE(b, M_BOND);
976 return (0);
977 }
978 g_bond = b;
979 bond_unlock();
980 if (ifp == NULL) {
981 return (ENXIO);
982 }
983 if (b == NULL) {
984 return (ENOMEM);
985 }
986 return (0);
987 }
988
989 static void
990 bond_bpf_vlan(struct ifnet * ifp, struct mbuf * m,
991 const struct ether_header * eh_p,
992 u_int16_t vlan_tag, bpf_packet_func func)
993 {
994 struct ether_vlan_header * vlh_p;
995 struct mbuf * vl_m;
996
997 vl_m = m_get(M_DONTWAIT, MT_DATA);
998 if (vl_m == NULL) {
999 return;
1000 }
1001 /* populate a new mbuf containing the vlan ethernet header */
1002 vl_m->m_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1003 vlh_p = mtod(vl_m, struct ether_vlan_header *);
1004 bcopy(eh_p, vlh_p, offsetof(struct ether_header, ether_type));
1005 vlh_p->evl_encap_proto = htons(ETHERTYPE_VLAN);
1006 vlh_p->evl_tag = htons(vlan_tag);
1007 vlh_p->evl_proto = eh_p->ether_type;
1008 vl_m->m_next = m;
1009 (*func)(ifp, vl_m);
1010 vl_m->m_next = NULL;
1011 m_free(vl_m);
1012 return;
1013 }
1014
1015 static __inline__ void
1016 bond_bpf_output(struct ifnet * ifp, struct mbuf * m,
1017 bpf_packet_func func)
1018 {
1019 if (func != NULL) {
1020 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) {
1021 const struct ether_header * eh_p;
1022 eh_p = mtod(m, const struct ether_header *);
1023 m->m_data += ETHER_HDR_LEN;
1024 m->m_len -= ETHER_HDR_LEN;
1025 bond_bpf_vlan(ifp, m, eh_p, m->m_pkthdr.vlan_tag, func);
1026 m->m_data -= ETHER_HDR_LEN;
1027 m->m_len += ETHER_HDR_LEN;
1028 } else {
1029 (*func)(ifp, m);
1030 }
1031 }
1032 return;
1033 }
1034
1035 static __inline__ void
1036 bond_bpf_input(ifnet_t ifp, mbuf_t m, const struct ether_header * eh_p,
1037 bpf_packet_func func)
1038 {
1039 if (func != NULL) {
1040 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) {
1041 bond_bpf_vlan(ifp, m, eh_p, m->m_pkthdr.vlan_tag, func);
1042 } else {
1043 /* restore the header */
1044 m->m_data -= ETHER_HDR_LEN;
1045 m->m_len += ETHER_HDR_LEN;
1046 (*func)(ifp, m);
1047 m->m_data += ETHER_HDR_LEN;
1048 m->m_len -= ETHER_HDR_LEN;
1049 }
1050 }
1051 return;
1052 }
1053
1054 /*
1055 * Function: bond_setmulti
1056 * Purpose:
1057 * Enable multicast reception on "our" interface by enabling multicasts on
1058 * each of the member ports.
1059 */
1060 static int
1061 bond_setmulti(struct ifnet * ifp)
1062 {
1063 ifbond_ref ifb;
1064 int error;
1065 int result = 0;
1066 bondport_ref p;
1067
1068 bond_lock();
1069 ifb = ifp->if_private;
1070 if (ifb == NULL || ifbond_flags_if_detaching(ifb)
1071 || TAILQ_EMPTY(&ifb->ifb_port_list)) {
1072 bond_unlock();
1073 return (0);
1074 }
1075 ifbond_retain(ifb);
1076 ifbond_wait(ifb, "bond_setmulti");
1077
1078 if (ifbond_flags_if_detaching(ifb)) {
1079 /* someone destroyed the bond while we were waiting */
1080 result = EBUSY;
1081 goto signal_done;
1082 }
1083 bond_unlock();
1084
1085 /* ifbond_wait() let's us safely walk the list without holding the lock */
1086 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
1087 struct ifnet * port_ifp = p->po_ifp;
1088
1089 error = multicast_list_program(&p->po_multicast,
1090 ifp, port_ifp);
1091 if (error != 0) {
1092 printf("bond_setmulti(%s): "
1093 "multicast_list_program(%s%d) failed, %d\n",
1094 ifb->ifb_name, port_ifp->if_name,
1095 port_ifp->if_unit, error);
1096 result = error;
1097 }
1098 }
1099 bond_lock();
1100 signal_done:
1101 ifbond_release(ifb);
1102 ifbond_signal(ifb, "bond_setmulti");
1103 bond_unlock();
1104 return (result);
1105 }
1106
1107 static void
1108 bond_clone_attach(void)
1109 {
1110 if_clone_attach(&bond_cloner);
1111 bond_lock_init();
1112 return;
1113 }
1114
1115 static int
1116 ifbond_add_slow_proto_multicast(ifbond_ref ifb)
1117 {
1118 int error;
1119 struct ifmultiaddr * ifma = NULL;
1120 struct sockaddr_dl sdl;
1121
1122 bond_assert_lock_not_held();
1123
1124 bzero(&sdl, sizeof(sdl));
1125 sdl.sdl_len = sizeof(sdl);
1126 sdl.sdl_family = AF_LINK;
1127 sdl.sdl_type = IFT_ETHER;
1128 sdl.sdl_nlen = 0;
1129 sdl.sdl_alen = sizeof(slow_proto_multicast);
1130 bcopy(&slow_proto_multicast, sdl.sdl_data, sizeof(slow_proto_multicast));
1131 error = if_addmulti(ifb->ifb_ifp, (struct sockaddr *)&sdl,
1132 &ifma);
1133 if (error == 0) {
1134 ifb->ifb_ifma_slow_proto = ifma;
1135 }
1136 return (error);
1137 }
1138
1139 static int
1140 bond_clone_create(struct if_clone * ifc, int unit)
1141 {
1142 int error;
1143 ifbond_ref ifb;
1144 struct ifnet * ifp;
1145
1146 error = bond_globals_init();
1147 if (error != 0) {
1148 return (error);
1149 }
1150
1151 ifb = _MALLOC(sizeof(ifbond), M_BOND, M_WAITOK);
1152 if (ifb == NULL) {
1153 return (ENOMEM);
1154 }
1155 bzero(ifb, sizeof(*ifb));
1156
1157 ifbond_retain(ifb);
1158 TAILQ_INIT(&ifb->ifb_port_list);
1159 TAILQ_INIT(&ifb->ifb_lag_list);
1160 ifb->ifb_key = unit + 1;
1161
1162 /* use the interface name as the unique id for ifp recycle */
1163 if ((u_long)snprintf(ifb->ifb_name, sizeof(ifb->ifb_name), "%s%d",
1164 ifc->ifc_name, unit) >= sizeof(ifb->ifb_name)) {
1165 ifbond_release(ifb);
1166 return (EINVAL);
1167 }
1168 error = dlil_if_acquire(APPLE_IF_FAM_BOND,
1169 ifb->ifb_name,
1170 strlen(ifb->ifb_name),
1171 &ifp);
1172 if (error) {
1173 ifbond_release(ifb);
1174 return (error);
1175 }
1176 ifb->ifb_ifp = ifp;
1177 ifp->if_name = ifc->ifc_name;
1178 ifp->if_unit = unit;
1179 ifp->if_family = APPLE_IF_FAM_BOND;
1180 ifp->if_private = NULL;
1181 ifp->if_ioctl = bond_ioctl;
1182 ifp->if_set_bpf_tap = bond_set_bpf_tap;
1183 ifp->if_free = bond_if_free;
1184 ifp->if_output = bond_output;
1185 ifp->if_hwassist = 0;
1186 ifp->if_addrlen = ETHER_ADDR_LEN;
1187 ifp->if_baudrate = 0;
1188 ifp->if_type = IFT_IEEE8023ADLAG;
1189 ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX;
1190 ifp->if_mtu = 0;
1191
1192 /* XXX ethernet specific */
1193 ifp->if_broadcast.length = ETHER_ADDR_LEN;
1194 bcopy(etherbroadcastaddr, ifp->if_broadcast.u.buffer, ETHER_ADDR_LEN);
1195
1196 error = dlil_if_attach(ifp);
1197 if (error != 0) {
1198 dlil_if_release(ifp);
1199 ifbond_release(ifb);
1200 return (error);
1201 }
1202 error = ifbond_add_slow_proto_multicast(ifb);
1203 if (error != 0) {
1204 printf("bond_clone_create(%s): "
1205 "failed to add slow_proto multicast, %d\n",
1206 ifb->ifb_name, error);
1207 }
1208
1209 /* attach as ethernet */
1210 bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header));
1211
1212 bond_lock();
1213 ifp->if_private = ifb;
1214 TAILQ_INSERT_HEAD(&g_bond->ifbond_list, ifb, ifb_bond_list);
1215 bond_unlock();
1216
1217 return (0);
1218 }
1219
1220 static void
1221 bond_remove_all_interfaces(ifbond_ref ifb)
1222 {
1223 bondport_ref p;
1224
1225 bond_assert_lock_held();
1226
1227 /*
1228 * do this in reverse order to avoid re-programming the mac address
1229 * as each head interface is removed
1230 */
1231 while ((p = TAILQ_LAST(&ifb->ifb_port_list, port_list)) != NULL) {
1232 bond_remove_interface(ifb, p->po_ifp);
1233 }
1234 return;
1235 }
1236
1237 static void
1238 bond_remove(ifbond_ref ifb)
1239 {
1240 bond_assert_lock_held();
1241 ifbond_flags_set_if_detaching(ifb);
1242 TAILQ_REMOVE(&g_bond->ifbond_list, ifb, ifb_bond_list);
1243 bond_remove_all_interfaces(ifb);
1244 return;
1245 }
1246
1247 static void
1248 bond_if_detach(struct ifnet * ifp)
1249 {
1250 int error;
1251
1252 error = dlil_if_detach(ifp);
1253 if (error != DLIL_WAIT_FOR_FREE) {
1254 if (error) {
1255 printf("bond_if_detach %s%d: dlil_if_detach failed, %d\n",
1256 ifp->if_name, ifp->if_unit, error);
1257 }
1258 bond_if_free(ifp);
1259 }
1260 return;
1261 }
1262
1263 static void
1264 bond_clone_destroy(struct ifnet * ifp)
1265 {
1266 ifbond_ref ifb;
1267
1268 bond_lock();
1269 ifb = ifp->if_private;
1270 if (ifb == NULL || ifp->if_type != IFT_IEEE8023ADLAG) {
1271 bond_unlock();
1272 return;
1273 }
1274 if (ifbond_flags_if_detaching(ifb)) {
1275 bond_unlock();
1276 return;
1277 }
1278 bond_remove(ifb);
1279 bond_unlock();
1280 bond_if_detach(ifp);
1281 return;
1282 }
1283
1284 static int
1285 bond_set_bpf_tap(struct ifnet * ifp, bpf_tap_mode mode, bpf_packet_func func)
1286 {
1287 ifbond_ref ifb;
1288
1289 bond_lock();
1290 ifb = ifp->if_private;
1291 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
1292 bond_unlock();
1293 return (ENODEV);
1294 }
1295 switch (mode) {
1296 case BPF_TAP_DISABLE:
1297 ifb->ifb_bpf_input = ifb->ifb_bpf_output = NULL;
1298 break;
1299
1300 case BPF_TAP_INPUT:
1301 ifb->ifb_bpf_input = func;
1302 break;
1303
1304 case BPF_TAP_OUTPUT:
1305 ifb->ifb_bpf_output = func;
1306 break;
1307
1308 case BPF_TAP_INPUT_OUTPUT:
1309 ifb->ifb_bpf_input = ifb->ifb_bpf_output = func;
1310 break;
1311 default:
1312 break;
1313 }
1314 bond_unlock();
1315 return 0;
1316 }
1317
1318 static uint32_t
1319 ether_header_hash(struct ether_header * eh_p)
1320 {
1321 uint32_t h;
1322
1323 /* get 32-bits from destination ether and ether type */
1324 h = (*((uint16_t *)&eh_p->ether_dhost[4]) << 16)
1325 | eh_p->ether_type;
1326 h ^= *((uint32_t *)&eh_p->ether_dhost[0]);
1327 return (h);
1328 }
1329
1330 static struct mbuf *
1331 S_mbuf_skip_to_offset(struct mbuf * m, long * offset)
1332 {
1333 int len;
1334
1335 len = m->m_len;
1336 while (*offset >= len) {
1337 *offset -= len;
1338 m = m->m_next;
1339 if (m == NULL) {
1340 break;
1341 }
1342 len = m->m_len;
1343 }
1344 return (m);
1345 }
1346
1347 #if BYTE_ORDER == BIG_ENDIAN
1348 static __inline__ uint32_t
1349 make_uint32(u_char c0, u_char c1, u_char c2, u_char c3)
1350 {
1351 return (((uint32_t)c0 << 24) | ((uint32_t)c1 << 16)
1352 | ((uint32_t)c2 << 8) | (uint32_t)c3);
1353 }
1354 #else /* BYTE_ORDER == LITTLE_ENDIAN */
1355 static __inline__ uint32_t
1356 make_uint32(u_char c0, u_char c1, u_char c2, u_char c3)
1357 {
1358 return (((uint32_t)c3 << 24) | ((uint32_t)c2 << 16)
1359 | ((uint32_t)c1 << 8) | (uint32_t)c0);
1360 }
1361 #endif /* BYTE_ORDER == LITTLE_ENDIAN */
1362
1363 static int
1364 S_mbuf_copy_uint32(struct mbuf * m, long offset, uint32_t * val)
1365 {
1366 struct mbuf * current;
1367 u_char * current_data;
1368 struct mbuf * next;
1369 u_char * next_data;
1370 int space_current;
1371
1372 current = S_mbuf_skip_to_offset(m, &offset);
1373 if (current == NULL) {
1374 return (1);
1375 }
1376 current_data = mtod(current, u_char *) + offset;
1377 space_current = current->m_len - offset;
1378 if (space_current >= (int)sizeof(uint32_t)) {
1379 *val = *((uint32_t *)current_data);
1380 return (0);
1381 }
1382 next = current->m_next;
1383 if (next == NULL || (next->m_len + space_current) < (int)sizeof(uint32_t)) {
1384 return (1);
1385 }
1386 next_data = mtod(next, u_char *);
1387 switch (space_current) {
1388 case 1:
1389 *val = make_uint32(current_data[0], next_data[0],
1390 next_data[1], next_data[2]);
1391 break;
1392 case 2:
1393 *val = make_uint32(current_data[0], current_data[1],
1394 next_data[0], next_data[1]);
1395 break;
1396 default:
1397 *val = make_uint32(current_data[0], current_data[1],
1398 current_data[2], next_data[0]);
1399 break;
1400 }
1401 return (0);
1402 }
1403
1404 #define IP_SRC_OFFSET (offsetof(struct ip, ip_src) - offsetof(struct ip, ip_p))
1405 #define IP_DST_OFFSET (offsetof(struct ip, ip_dst) - offsetof(struct ip, ip_p))
1406
1407 static uint32_t
1408 ip_header_hash(struct mbuf * m)
1409 {
1410 u_char * data;
1411 struct in_addr ip_dst;
1412 struct in_addr ip_src;
1413 u_char ip_p;
1414 long offset;
1415 struct mbuf * orig_m = m;
1416
1417 /* find the IP protocol field relative to the start of the packet */
1418 offset = offsetof(struct ip, ip_p) + sizeof(struct ether_header);
1419 m = S_mbuf_skip_to_offset(m, &offset);
1420 if (m == NULL || m->m_len < 1) {
1421 goto bad_ip_packet;
1422 }
1423 data = mtod(m, u_char *) + offset;
1424 ip_p = *data;
1425
1426 /* find the IP src relative to the IP protocol */
1427 if ((m->m_len - offset)
1428 >= (int)(IP_SRC_OFFSET + sizeof(struct in_addr) * 2)) {
1429 /* this should be the normal case */
1430 ip_src = *(struct in_addr *)(data + IP_SRC_OFFSET);
1431 ip_dst = *(struct in_addr *)(data + IP_DST_OFFSET);
1432 }
1433 else {
1434 if (S_mbuf_copy_uint32(m, offset + IP_SRC_OFFSET,
1435 (uint32_t *)&ip_src.s_addr)) {
1436 goto bad_ip_packet;
1437 }
1438 if (S_mbuf_copy_uint32(m, offset + IP_DST_OFFSET,
1439 (uint32_t *)&ip_dst.s_addr)) {
1440 goto bad_ip_packet;
1441 }
1442 }
1443 return (ntohl(ip_dst.s_addr) ^ ntohl(ip_src.s_addr) ^ ((uint32_t)ip_p));
1444
1445 bad_ip_packet:
1446 return (ether_header_hash(mtod(orig_m, struct ether_header *)));
1447 }
1448
1449 #define IP6_ADDRS_LEN (sizeof(struct in6_addr) * 2)
1450 static uint32_t
1451 ipv6_header_hash(struct mbuf * m)
1452 {
1453 u_char * data;
1454 int i;
1455 long offset;
1456 struct mbuf * orig_m = m;
1457 uint32_t * scan;
1458 uint32_t val;
1459
1460 /* find the IP protocol field relative to the start of the packet */
1461 offset = offsetof(struct ip6_hdr, ip6_src) + sizeof(struct ether_header);
1462 m = S_mbuf_skip_to_offset(m, &offset);
1463 if (m == NULL) {
1464 goto bad_ipv6_packet;
1465 }
1466 data = mtod(m, u_char *) + offset;
1467 val = 0;
1468 if ((m->m_len - offset) >= (int)IP6_ADDRS_LEN) {
1469 /* this should be the normal case */
1470 for (i = 0, scan = (uint32_t *)data;
1471 i < (int)(IP6_ADDRS_LEN / sizeof(uint32_t));
1472 i++, scan++) {
1473 val ^= *scan;
1474 }
1475 }
1476 else {
1477 for (i = 0; i < (int)(IP6_ADDRS_LEN / sizeof(uint32_t)); i++) {
1478 uint32_t tmp;
1479 if (S_mbuf_copy_uint32(m, offset + i * sizeof(uint32_t),
1480 (uint32_t *)&tmp)) {
1481 goto bad_ipv6_packet;
1482 }
1483 val ^= tmp;
1484 }
1485 }
1486 return (ntohl(val));
1487
1488 bad_ipv6_packet:
1489 return (ether_header_hash(mtod(orig_m, struct ether_header *)));
1490 }
1491
1492 static int
1493 bond_output(struct ifnet * ifp, struct mbuf * m)
1494 {
1495 bpf_packet_func bpf_func;
1496 uint32_t h;
1497 ifbond_ref ifb;
1498 struct ifnet * port_ifp = NULL;
1499
1500 if (m == 0) {
1501 return (0);
1502 }
1503 if ((m->m_flags & M_PKTHDR) == 0) {
1504 m_freem(m);
1505 return (0);
1506 }
1507 if (m->m_pkthdr.socket_id != 0) {
1508 h = m->m_pkthdr.socket_id;
1509 }
1510 else {
1511 struct ether_header * eh_p;
1512
1513 eh_p = mtod(m, struct ether_header *);
1514 switch (ntohs(eh_p->ether_type)) {
1515 case ETHERTYPE_IP:
1516 h = ip_header_hash(m);
1517 break;
1518 case ETHERTYPE_IPV6:
1519 h = ipv6_header_hash(m);
1520 break;
1521 default:
1522 h = ether_header_hash(eh_p);
1523 break;
1524 }
1525 }
1526 bond_lock();
1527 ifb = ifp->if_private;
1528 if (ifb == NULL || ifbond_flags_if_detaching(ifb)
1529 || ifb->ifb_distributing_count == 0) {
1530 goto done;
1531 }
1532 h %= ifb->ifb_distributing_count;
1533 port_ifp = ifb->ifb_distributing_array[h]->po_ifp;
1534 bpf_func = ifb->ifb_bpf_output;
1535 bond_unlock();
1536
1537 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) {
1538 (void)ifnet_stat_increment_out(ifp, 1,
1539 m->m_pkthdr.len + ETHER_VLAN_ENCAP_LEN,
1540 0);
1541 } else {
1542 (void)ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
1543 }
1544 bond_bpf_output(ifp, m, bpf_func);
1545
1546 return (dlil_output(port_ifp, 0, m, NULL, NULL, 1));
1547
1548 done:
1549 bond_unlock();
1550 m_freem(m);
1551 return (0);
1552 }
1553
1554 static bondport_ref
1555 ifbond_lookup_port(ifbond_ref ifb, struct ifnet * port_ifp)
1556 {
1557 bondport_ref p;
1558 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
1559 if (p->po_ifp == port_ifp) {
1560 return (p);
1561 }
1562 }
1563 return (NULL);
1564 }
1565
1566 static bondport_ref
1567 bond_lookup_port(struct ifnet * port_ifp)
1568 {
1569 ifbond_ref ifb;
1570 bondport_ref port;
1571
1572 TAILQ_FOREACH(ifb, &g_bond->ifbond_list, ifb_bond_list) {
1573 port = ifbond_lookup_port(ifb, port_ifp);
1574 if (port != NULL) {
1575 return (port);
1576 }
1577 }
1578 return (NULL);
1579 }
1580
1581 static void
1582 bond_receive_lacpdu(struct mbuf * m, struct ifnet * port_ifp)
1583 {
1584 struct ifnet * bond_ifp = NULL;
1585 int event_code = 0;
1586 bondport_ref p;
1587
1588 bond_lock();
1589 if ((port_ifp->if_eflags & IFEF_BOND) == 0) {
1590 goto done;
1591 }
1592 p = bond_lookup_port(port_ifp);
1593 if (p == NULL) {
1594 goto done;
1595 }
1596 if (p->po_enabled == 0) {
1597 goto done;
1598 }
1599 bondport_receive_lacpdu(p, (lacpdu_ref)m->m_data);
1600 if (ifbond_selection(p->po_bond)) {
1601 event_code = (p->po_bond->ifb_active_lag == NULL)
1602 ? KEV_DL_LINK_OFF
1603 : KEV_DL_LINK_ON;
1604 /* XXX need to take a reference on bond_ifp */
1605 bond_ifp = p->po_bond->ifb_ifp;
1606 }
1607
1608 done:
1609 bond_unlock();
1610 if (bond_ifp != NULL) {
1611 interface_link_event(bond_ifp, event_code);
1612 }
1613 m_freem(m);
1614 return;
1615 }
1616
1617 static void
1618 bond_receive_la_marker_pdu(struct mbuf * m, struct ifnet * port_ifp)
1619 {
1620 la_marker_pdu_ref marker_p;
1621 bondport_ref p;
1622
1623 marker_p = (la_marker_pdu_ref)(m->m_data + ETHER_HDR_LEN);
1624 if (marker_p->lm_marker_tlv_type != LA_MARKER_TLV_TYPE_MARKER) {
1625 goto failed;
1626 }
1627 bond_lock();
1628 if ((port_ifp->if_eflags & IFEF_BOND) == 0) {
1629 bond_unlock();
1630 goto failed;
1631 }
1632 p = bond_lookup_port(port_ifp);
1633 if (p == NULL || p->po_enabled == 0) {
1634 bond_unlock();
1635 goto failed;
1636 }
1637 /* echo back the same packet as a marker response */
1638 marker_p->lm_marker_tlv_type = LA_MARKER_TLV_TYPE_MARKER_RESPONSE;
1639 bondport_slow_proto_transmit(p, (packet_buffer_ref)m);
1640 bond_unlock();
1641 return;
1642
1643 failed:
1644 m_freem(m);
1645 return;
1646 }
1647
1648 static int
1649 bond_input(struct mbuf * m, char * frame_header, struct ifnet * port_ifp,
1650 __unused u_long protocol_family, __unused int sync_ok)
1651 {
1652 bpf_packet_func bpf_func;
1653 const struct ether_header * eh_p;
1654 ifbond_ref ifb;
1655 struct ifnet * ifp;
1656 bondport_ref p;
1657
1658 eh_p = (const struct ether_header *)frame_header;
1659 if ((m->m_flags & M_MCAST) != 0
1660 && bcmp(eh_p->ether_dhost, &slow_proto_multicast,
1661 sizeof(eh_p->ether_dhost)) == 0
1662 && ntohs(eh_p->ether_type) == IEEE8023AD_SLOW_PROTO_ETHERTYPE) {
1663 u_char subtype = *mtod(m, u_char *);
1664
1665 if (subtype == IEEE8023AD_SLOW_PROTO_SUBTYPE_LACP) {
1666 if (m->m_pkthdr.len < (int)offsetof(lacpdu, la_reserved)) {
1667 m_freem(m);
1668 return (0);
1669 }
1670 /* send to lacp */
1671 if (m->m_len < (int)offsetof(lacpdu, la_reserved)) {
1672 m = m_pullup(m, offsetof(lacpdu, la_reserved));
1673 if (m == NULL) {
1674 return (0);
1675 }
1676 }
1677 bond_receive_lacpdu(m, port_ifp);
1678 return (0);
1679 }
1680 else if (subtype == IEEE8023AD_SLOW_PROTO_SUBTYPE_LA_MARKER_PROTOCOL) {
1681 int min_size;
1682
1683 /* restore the ethernet header pointer in the mbuf */
1684 m->m_pkthdr.len += ETHER_HDR_LEN;
1685 m->m_data -= ETHER_HDR_LEN;
1686 m->m_len += ETHER_HDR_LEN;
1687 min_size = ETHER_HDR_LEN + offsetof(la_marker_pdu, lm_reserved);
1688 if (m->m_pkthdr.len < min_size) {
1689 m_freem(m);
1690 return (0);
1691 }
1692 /* send to lacp */
1693 if (m->m_len < min_size) {
1694 m = m_pullup(m, min_size);
1695 if (m == NULL) {
1696 return (0);
1697 }
1698 }
1699 /* send to marker responder */
1700 bond_receive_la_marker_pdu(m, port_ifp);
1701 return (0);
1702 }
1703 else if (subtype == 0
1704 || subtype > IEEE8023AD_SLOW_PROTO_SUBTYPE_RESERVED_END) {
1705 /* invalid subtype, discard the frame */
1706 m_freem(m);
1707 return (0);
1708 }
1709 }
1710 bond_lock();
1711 if ((port_ifp->if_eflags & IFEF_BOND) == 0) {
1712 goto done;
1713 }
1714 p = bond_lookup_port(port_ifp);
1715 if (p == NULL || bondport_collecting(p) == 0) {
1716 goto done;
1717 }
1718
1719 /* make the packet appear as if it arrived on the bonded interface */
1720 ifb = p->po_bond;
1721 ifp = ifb->ifb_ifp;
1722 bpf_func = ifb->ifb_bpf_input;
1723 bond_unlock();
1724
1725 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) {
1726 (void)ifnet_stat_increment_in(ifp, 1,
1727 (m->m_pkthdr.len + ETHER_HDR_LEN
1728 + ETHER_VLAN_ENCAP_LEN), 0);
1729 }
1730 else {
1731 (void)ifnet_stat_increment_in(ifp, 1,
1732 (m->m_pkthdr.len + ETHER_HDR_LEN), 0);
1733 }
1734 m->m_pkthdr.rcvif = ifp;
1735 bond_bpf_input(ifp, m, eh_p, bpf_func);
1736 dlil_input_packet(ifp, m, frame_header);
1737 return 0;
1738
1739 done:
1740 bond_unlock();
1741 m_freem(m);
1742 return (0);
1743 }
1744
1745 static __inline__ const char *
1746 bondport_get_name(bondport_ref p)
1747 {
1748 return (p->po_name);
1749 }
1750
1751 static __inline__ int
1752 bondport_get_index(bondport_ref p)
1753 {
1754 return (p->po_ifp->if_index);
1755 }
1756
1757 static void
1758 bondport_slow_proto_transmit(bondport_ref p, packet_buffer_ref buf)
1759 {
1760 struct ether_header * eh_p;
1761 int error;
1762
1763 /* packet_buffer_allocate leaves room for ethernet header */
1764 eh_p = mtod(buf, struct ether_header *);
1765 bcopy(&slow_proto_multicast, &eh_p->ether_dhost, sizeof(eh_p->ether_dhost));
1766 bcopy(&p->po_saved_addr, eh_p->ether_shost, sizeof(eh_p->ether_shost));
1767 eh_p->ether_type = htons(IEEE8023AD_SLOW_PROTO_ETHERTYPE);
1768 error = dlil_output(p->po_ifp, 0, buf, NULL, NULL, 1);
1769 if (error != 0) {
1770 printf("bondport_slow_proto_transmit(%s) failed %d\n",
1771 bondport_get_name(p), error);
1772 }
1773 return;
1774 }
1775
1776 static void
1777 bondport_timer_process_func(devtimer_ref timer,
1778 devtimer_process_func_event event)
1779 {
1780 bondport_ref p;
1781
1782 switch (event) {
1783 case devtimer_process_func_event_lock:
1784 bond_lock();
1785 devtimer_retain(timer);
1786 break;
1787 case devtimer_process_func_event_unlock:
1788 if (devtimer_valid(timer)) {
1789 /* as long as the devtimer is valid, we can look at arg0 */
1790 int event_code = 0;
1791 struct ifnet * bond_ifp = NULL;
1792
1793 p = (bondport_ref)devtimer_arg0(timer);
1794 if (ifbond_selection(p->po_bond)) {
1795 event_code = (p->po_bond->ifb_active_lag == NULL)
1796 ? KEV_DL_LINK_OFF
1797 : KEV_DL_LINK_ON;
1798 /* XXX need to take a reference on bond_ifp */
1799 bond_ifp = p->po_bond->ifb_ifp;
1800 }
1801 devtimer_release(timer);
1802 bond_unlock();
1803 if (bond_ifp != NULL) {
1804 interface_link_event(bond_ifp, event_code);
1805 }
1806 }
1807 else {
1808 /* timer is going away */
1809 devtimer_release(timer);
1810 bond_unlock();
1811 }
1812 break;
1813 default:
1814 break;
1815 }
1816 }
1817
1818 static bondport_ref
1819 bondport_create(struct ifnet * port_ifp, lacp_port_priority priority,
1820 int active, int short_timeout, int * ret_error)
1821 {
1822 int error = 0;
1823 bondport_ref p = NULL;
1824 lacp_actor_partner_state s;
1825
1826 *ret_error = 0;
1827 p = _MALLOC(sizeof(*p), M_BOND, M_WAITOK);
1828 if (p == NULL) {
1829 *ret_error = ENOMEM;
1830 return (NULL);
1831 }
1832 bzero(p, sizeof(*p));
1833 multicast_list_init(&p->po_multicast);
1834 if ((u_long)snprintf(p->po_name, sizeof(p->po_name), "%s%d",
1835 port_ifp->if_name, port_ifp->if_unit)
1836 >= sizeof(p->po_name)) {
1837 printf("if_bond: name too large\n");
1838 *ret_error = EINVAL;
1839 goto failed;
1840 }
1841 error = siocgifdevmtu(port_ifp, &p->po_devmtu);
1842 if (error != 0) {
1843 printf("if_bond: SIOCGIFDEVMTU %s failed, %d\n",
1844 bondport_get_name(p), error);
1845 goto failed;
1846 }
1847 /* remember the current interface MTU so it can be restored */
1848 p->po_devmtu.ifdm_current = port_ifp->if_mtu;
1849 p->po_ifp = port_ifp;
1850 p->po_media_info = interface_media_info(port_ifp);
1851 p->po_current_while_timer = devtimer_create(bondport_timer_process_func, p);
1852 if (p->po_current_while_timer == NULL) {
1853 *ret_error = ENOMEM;
1854 goto failed;
1855 }
1856 p->po_periodic_timer = devtimer_create(bondport_timer_process_func, p);
1857 if (p->po_periodic_timer == NULL) {
1858 *ret_error = ENOMEM;
1859 goto failed;
1860 }
1861 p->po_wait_while_timer = devtimer_create(bondport_timer_process_func, p);
1862 if (p->po_wait_while_timer == NULL) {
1863 *ret_error = ENOMEM;
1864 goto failed;
1865 }
1866 p->po_transmit_timer = devtimer_create(bondport_timer_process_func, p);
1867 if (p->po_transmit_timer == NULL) {
1868 *ret_error = ENOMEM;
1869 goto failed;
1870 }
1871 p->po_receive_state = ReceiveState_none;
1872 p->po_mux_state = MuxState_none;
1873 p->po_priority = priority;
1874 s = 0;
1875 s = lacp_actor_partner_state_set_aggregatable(s);
1876 if (short_timeout) {
1877 s = lacp_actor_partner_state_set_short_timeout(s);
1878 }
1879 if (active) {
1880 s = lacp_actor_partner_state_set_active_lacp(s);
1881 }
1882 p->po_actor_state = s;
1883 return (p);
1884
1885 failed:
1886 bondport_free(p);
1887 return (NULL);
1888 }
1889
1890 static void
1891 bondport_start(bondport_ref p)
1892 {
1893 bondport_receive_machine(p, LAEventStart, NULL);
1894 bondport_mux_machine(p, LAEventStart, NULL);
1895 bondport_periodic_transmit_machine(p, LAEventStart, NULL);
1896 bondport_transmit_machine(p, LAEventStart, NULL);
1897 return;
1898 }
1899
1900 /*
1901 * Function: bondport_invalidate_timers
1902 * Purpose:
1903 * Invalidate all of the timers for the bondport.
1904 */
1905 static void
1906 bondport_invalidate_timers(bondport_ref p)
1907 {
1908 devtimer_invalidate(p->po_current_while_timer);
1909 devtimer_invalidate(p->po_periodic_timer);
1910 devtimer_invalidate(p->po_wait_while_timer);
1911 devtimer_invalidate(p->po_transmit_timer);
1912 }
1913
1914 static void
1915 bondport_free(bondport_ref p)
1916 {
1917 multicast_list_remove(&p->po_multicast);
1918 devtimer_release(p->po_current_while_timer);
1919 devtimer_release(p->po_periodic_timer);
1920 devtimer_release(p->po_wait_while_timer);
1921 devtimer_release(p->po_transmit_timer);
1922 FREE(p, M_BOND);
1923 return;
1924 }
1925
1926 #define BOND_ADD_PROGRESS_IN_LIST 0x1
1927 #define BOND_ADD_PROGRESS_PROTO_ATTACHED 0x2
1928 #define BOND_ADD_PROGRESS_LLADDR_SET 0x4
1929 #define BOND_ADD_PROGRESS_MTU_SET 0x8
1930
1931 static __inline__ int
1932 bond_device_mtu(struct ifnet * ifp, ifbond_ref ifb)
1933 {
1934 return (((int)ifp->if_mtu > ifb->ifb_altmtu)
1935 ? (int)ifp->if_mtu : ifb->ifb_altmtu);
1936 }
1937
1938 static int
1939 bond_add_interface(struct ifnet * ifp, struct ifnet * port_ifp)
1940 {
1941 int devmtu;
1942 int error = 0;
1943 int event_code = 0;
1944 ifbond_ref ifb;
1945 struct sockaddr_dl * ifb_sdl;
1946 bondport_ref * new_array = NULL;
1947 bondport_ref * old_array = NULL;
1948 bondport_ref p;
1949 struct sockaddr_dl * port_sdl;
1950 int progress = 0;
1951
1952 /* pre-allocate space for new port */
1953 p = bondport_create(port_ifp, 0x8000, 1, 0, &error);
1954 if (p == NULL) {
1955 return (error);
1956 }
1957 bond_lock();
1958 ifb = (ifbond_ref)ifp->if_private;
1959 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
1960 bond_unlock();
1961 bondport_free(p);
1962 return ((ifb == NULL ? EOPNOTSUPP : EBUSY));
1963 }
1964
1965 /* make sure this interface can handle our current MTU */
1966 devmtu = bond_device_mtu(ifp, ifb);
1967 if (devmtu != 0
1968 && (devmtu > p->po_devmtu.ifdm_max || devmtu < p->po_devmtu.ifdm_min)) {
1969 bond_unlock();
1970 printf("if_bond: interface %s doesn't support mtu %d",
1971 bondport_get_name(p), devmtu);
1972 bondport_free(p);
1973 return (EINVAL);
1974 }
1975
1976 /* make sure ifb doesn't get de-allocated while we wait */
1977 ifbond_retain(ifb);
1978
1979 /* wait for other add or remove to complete */
1980 ifbond_wait(ifb, "bond_add_interface");
1981
1982 if (ifbond_flags_if_detaching(ifb)) {
1983 /* someone destroyed the bond while we were waiting */
1984 error = EBUSY;
1985 goto signal_done;
1986 }
1987 if (bond_lookup_port(port_ifp) != NULL) {
1988 /* port is already part of a bond */
1989 error = EBUSY;
1990 goto signal_done;
1991 }
1992 ifnet_lock_exclusive(port_ifp);
1993 if ((port_ifp->if_eflags & (IFEF_VLAN | IFEF_BOND)) != 0) {
1994 /* interface already has VLAN's, or is part of bond */
1995 ifnet_lock_done(port_ifp);
1996 error = EBUSY;
1997 goto signal_done;
1998 }
1999
2000 /* mark the interface busy */
2001 port_ifp->if_eflags |= IFEF_BOND;
2002 ifnet_lock_done(port_ifp);
2003
2004 port_sdl = ifp_get_sdl(port_ifp);
2005 ifb_sdl = ifp_get_sdl(ifp);
2006
2007 if (TAILQ_EMPTY(&ifb->ifb_port_list)) {
2008 ifp->if_hwassist = port_ifp->if_hwassist;
2009 ifp->if_flags |= IFF_RUNNING;
2010 if (ifbond_flags_lladdr(ifb) == FALSE) {
2011 /* first port added to bond determines bond's ethernet address */
2012 ether_addr_copy(LLADDR(ifb_sdl), LLADDR(port_sdl));
2013 ifb_sdl->sdl_type = IFT_ETHER;
2014 ifb_sdl->sdl_alen = ETHER_ADDR_LEN;
2015 }
2016 } else {
2017 if (ifp->if_hwassist != port_ifp->if_hwassist) {
2018 printf("bond_add_interface(%s, %s) "
2019 "hwassist values don't match 0x%x != 0x%x\n",
2020 ifb->ifb_name, bondport_get_name(p),
2021 ifp->if_hwassist, port_ifp->if_hwassist);
2022 /*
2023 * XXX
2024 * if the bond has VLAN's, we can't simply change the hwassist
2025 * field behind its back: this needs work
2026 */
2027 ifp->if_hwassist = 0;
2028 }
2029 }
2030 p->po_bond = ifb;
2031
2032 /* remember the port's ethernet address so it can be restored */
2033 ether_addr_copy(&p->po_saved_addr, LLADDR(port_sdl));
2034
2035 /* add it to the list of ports */
2036 TAILQ_INSERT_TAIL(&ifb->ifb_port_list, p, po_port_list);
2037 ifb->ifb_port_count++;
2038
2039 /* set the default MTU */
2040 if (ifp->if_mtu == 0) {
2041 ifp->if_mtu = ETHERMTU;
2042 }
2043 bond_unlock();
2044 progress |= BOND_ADD_PROGRESS_IN_LIST;
2045
2046 /* allocate a larger distributing array */
2047 new_array = (bondport_ref *)
2048 _MALLOC(sizeof(*new_array) * ifb->ifb_port_count, M_BOND, M_WAITOK);
2049 if (new_array == NULL) {
2050 error = ENOMEM;
2051 goto failed;
2052 }
2053
2054 /* attach our BOND "protocol" to the interface */
2055 error = bond_attach_protocol(port_ifp);
2056 if (error) {
2057 goto failed;
2058 }
2059 progress |= BOND_ADD_PROGRESS_PROTO_ATTACHED;
2060
2061 /* set the interface MTU */
2062 devmtu = bond_device_mtu(ifp, ifb);
2063 error = siocsifmtu(port_ifp, devmtu);
2064 if (error != 0) {
2065 printf("bond_add_interface(%s, %s):"
2066 " SIOCSIFMTU %d failed %d\n",
2067 ifb->ifb_name, bondport_get_name(p), devmtu, error);
2068 goto failed;
2069 }
2070 progress |= BOND_ADD_PROGRESS_MTU_SET;
2071
2072 /* program the port with our multicast addresses */
2073 error = multicast_list_program(&p->po_multicast, ifp, port_ifp);
2074 if (error) {
2075 printf("bond_add_interface(%s, %s):"
2076 " multicast_list_program failed %d\n",
2077 ifb->ifb_name, bondport_get_name(p), error);
2078 goto failed;
2079 }
2080
2081 /* mark the interface up */
2082 ifnet_set_flags(port_ifp, IFF_UP, IFF_UP);
2083
2084 error = dlil_ioctl(0, port_ifp, SIOCSIFFLAGS, (caddr_t)NULL);
2085 if (error != 0) {
2086 printf("bond_add_interface(%s, %s): SIOCSIFFLAGS failed %d\n",
2087 ifb->ifb_name, bondport_get_name(p), error);
2088 goto failed;
2089 }
2090
2091 /* re-program the port's ethernet address */
2092 error = if_siflladdr(port_ifp,
2093 (const struct ether_addr *)LLADDR(ifb_sdl));
2094 if (error != 0) {
2095 /* port doesn't support setting the link address */
2096 printf("bond_add_interface(%s, %s): if_siflladdr failed %d\n",
2097 ifb->ifb_name, bondport_get_name(p), error);
2098 goto failed;
2099 }
2100 progress |= BOND_ADD_PROGRESS_LLADDR_SET;
2101
2102 bond_lock();
2103
2104 /* no failures past this point */
2105 p->po_enabled = 1;
2106
2107 /* copy the contents of the existing distributing array */
2108 if (ifb->ifb_distributing_count) {
2109 bcopy(ifb->ifb_distributing_array, new_array,
2110 sizeof(*new_array) * ifb->ifb_distributing_count);
2111 }
2112 old_array = ifb->ifb_distributing_array;
2113 ifb->ifb_distributing_array = new_array;
2114
2115 /* clear the busy state, and wakeup anyone waiting */
2116 ifbond_signal(ifb, "bond_add_interface");
2117 bondport_start(p);
2118
2119 /* check if we need to generate a link status event */
2120 if (ifbond_selection(ifb)) {
2121 event_code = (ifb->ifb_active_lag == NULL)
2122 ? KEV_DL_LINK_OFF
2123 : KEV_DL_LINK_ON;
2124 }
2125 bond_unlock();
2126 if (event_code != 0) {
2127 interface_link_event(ifp, event_code);
2128 }
2129 if (old_array != NULL) {
2130 FREE(old_array, M_BOND);
2131 }
2132 return 0;
2133
2134 failed:
2135 bond_assert_lock_not_held();
2136
2137 if (new_array != NULL) {
2138 FREE(new_array, M_BOND);
2139 }
2140 if ((progress & BOND_ADD_PROGRESS_LLADDR_SET) != 0) {
2141 int error1;
2142
2143 error1 = if_siflladdr(port_ifp, &p->po_saved_addr);
2144 if (error1 != 0) {
2145 printf("bond_add_interface(%s, %s): if_siflladdr failed %d\n",
2146 ifb->ifb_name, bondport_get_name(p), error1);
2147 }
2148 }
2149 if ((progress & BOND_ADD_PROGRESS_PROTO_ATTACHED) != 0) {
2150 (void)bond_detach_protocol(port_ifp);
2151 }
2152 if ((progress & BOND_ADD_PROGRESS_MTU_SET) != 0) {
2153 int error1;
2154
2155 error1 = siocsifmtu(port_ifp, p->po_devmtu.ifdm_current);
2156 if (error1 != 0) {
2157 printf("bond_add_interface(%s, %s): SIOCSIFMTU %d failed %d\n",
2158 ifb->ifb_name, bondport_get_name(p), p->po_devmtu.ifdm_current,
2159 error1);
2160 }
2161 }
2162 bond_lock();
2163 if ((progress & BOND_ADD_PROGRESS_IN_LIST) != 0) {
2164 TAILQ_REMOVE(&ifb->ifb_port_list, p, po_port_list);
2165 ifb->ifb_port_count--;
2166 }
2167 ifnet_lock_exclusive(port_ifp);
2168 port_ifp->if_eflags &= ~IFEF_BOND;
2169 ifnet_lock_done(port_ifp);
2170 if (TAILQ_EMPTY(&ifb->ifb_port_list)) {
2171 ifb->ifb_altmtu = 0;
2172 ifp->if_mtu = 0;
2173 ifp->if_hwassist = 0;
2174 if (ifbond_flags_lladdr(ifb) == FALSE) {
2175 bzero(LLADDR(ifb_sdl), ETHER_ADDR_LEN);
2176 ifb_sdl->sdl_type = IFT_IEEE8023ADLAG;
2177 ifb_sdl->sdl_alen = 0;
2178 }
2179 }
2180
2181 signal_done:
2182 ifbond_release(ifb);
2183 ifbond_signal(ifb, "bond_add_interface");
2184 bond_unlock();
2185 bondport_free(p);
2186 return (error);
2187 }
2188
2189 static int
2190 bond_remove_interface(ifbond_ref ifb, struct ifnet * port_ifp)
2191 {
2192 int active_lag = 0;
2193 int error = 0;
2194 int event_code = 0;
2195 bondport_ref head_port;
2196 struct sockaddr_dl * ifb_sdl;
2197 struct ifnet * ifp;
2198 int new_link_address = 0;
2199 bondport_ref p;
2200 lacp_actor_partner_state s;
2201
2202 bond_assert_lock_held();
2203
2204 ifbond_retain(ifb);
2205 ifbond_wait(ifb, "bond_remove_interface");
2206
2207 p = ifbond_lookup_port(ifb, port_ifp);
2208 if (p == NULL) {
2209 error = ENXIO;
2210 /* it got removed by another thread */
2211 goto signal_done;
2212 }
2213
2214 /* de-select it and remove it from the lists */
2215 bondport_disable_distributing(p);
2216 bondport_set_selected(p, SelectedState_UNSELECTED);
2217 active_lag = bondport_remove_from_LAG(p);
2218 TAILQ_REMOVE(&ifb->ifb_port_list, p, po_port_list);
2219 ifb->ifb_port_count--;
2220
2221 /* invalidate timers here while holding the bond_lock */
2222 bondport_invalidate_timers(p);
2223
2224 /* announce that we're Individual now */
2225 s = p->po_actor_state;
2226 s = lacp_actor_partner_state_set_individual(s);
2227 s = lacp_actor_partner_state_set_not_collecting(s);
2228 s = lacp_actor_partner_state_set_not_distributing(s);
2229 s = lacp_actor_partner_state_set_out_of_sync(s);
2230 p->po_actor_state = s;
2231 bondport_flags_set_ntt(p);
2232
2233 ifp = ifb->ifb_ifp;
2234 ifb_sdl = ifp_get_sdl(ifp);
2235 head_port = TAILQ_FIRST(&ifb->ifb_port_list);
2236 if (head_port == NULL) {
2237 ifp->if_flags &= ~IFF_RUNNING;
2238 if (ifbond_flags_lladdr(ifb) == FALSE) {
2239 ifb_sdl->sdl_type = IFT_IEEE8023ADLAG;
2240 ifb_sdl->sdl_alen = 0;
2241 bzero(LLADDR(ifb_sdl), ETHER_ADDR_LEN);
2242 }
2243 ifp->if_hwassist = 0;
2244 ifp->if_mtu = 0;
2245 ifb->ifb_altmtu = 0;
2246 } else if (ifbond_flags_lladdr(ifb) == FALSE
2247 && bcmp(&p->po_saved_addr, LLADDR(ifb_sdl),
2248 ETHER_ADDR_LEN) == 0) {
2249 /* this port gave the bond its ethernet address, switch to new one */
2250 ether_addr_copy(LLADDR(ifb_sdl), &head_port->po_saved_addr);
2251 ifb_sdl->sdl_type = IFT_ETHER;
2252 ifb_sdl->sdl_alen = ETHER_ADDR_LEN;
2253 new_link_address = 1;
2254 }
2255 /* check if we need to generate a link status event */
2256 if (ifbond_selection(ifb) || active_lag) {
2257 event_code = (ifb->ifb_active_lag == NULL)
2258 ? KEV_DL_LINK_OFF
2259 : KEV_DL_LINK_ON;
2260 }
2261 bond_unlock();
2262
2263 bondport_transmit_machine(p, LAEventStart, (void *)1);
2264
2265 if (new_link_address) {
2266 struct ifnet * scan_ifp;
2267 bondport_ref scan_port;
2268
2269 /* ifbond_wait() allows port list traversal without holding the lock */
2270
2271 /* re-program each port with the new link address */
2272 TAILQ_FOREACH(scan_port, &ifb->ifb_port_list, po_port_list) {
2273 scan_ifp = scan_port->po_ifp;
2274
2275 error = if_siflladdr(scan_ifp,
2276 (const struct ether_addr *) LLADDR(ifb_sdl));
2277 if (error != 0) {
2278 printf("bond_remove_interface(%s, %s): "
2279 "if_siflladdr (%s) failed %d\n",
2280 ifb->ifb_name, bondport_get_name(p),
2281 bondport_get_name(scan_port), error);
2282 }
2283 }
2284 }
2285
2286 /* restore the port's ethernet address */
2287 error = if_siflladdr(port_ifp, &p->po_saved_addr);
2288 if (error != 0) {
2289 printf("bond_remove_interface(%s, %s): if_siflladdr failed %d\n",
2290 ifb->ifb_name, bondport_get_name(p), error);
2291 }
2292
2293 /* restore the port's MTU */
2294 error = siocsifmtu(port_ifp, p->po_devmtu.ifdm_current);
2295 if (error != 0) {
2296 printf("bond_remove_interface(%s, %s): SIOCSIFMTU %d failed %d\n",
2297 ifb->ifb_name, bondport_get_name(p),
2298 p->po_devmtu.ifdm_current, error);
2299 }
2300
2301 /* remove the bond "protocol" */
2302 bond_detach_protocol(port_ifp);
2303
2304 /* generate link event */
2305 if (event_code != 0) {
2306 interface_link_event(ifp, event_code);
2307 }
2308
2309 bond_lock();
2310 ifbond_release(ifb);
2311 bondport_free(p);
2312 ifnet_lock_exclusive(port_ifp);
2313 port_ifp->if_eflags &= ~IFEF_BOND;
2314 ifnet_lock_done(port_ifp);
2315
2316 signal_done:
2317 ifbond_signal(ifb, "bond_remove_interface");
2318 ifbond_release(ifb); /* a second release for the second reference */
2319 return (error);
2320 }
2321
2322 static int
2323 bond_get_status(ifbond_ref ifb, struct if_bond_req * ibr_p, user_addr_t datap)
2324 {
2325 int count;
2326 user_addr_t dst;
2327 int error = 0;
2328 struct if_bond_status_req * ibsr;
2329 struct if_bond_status ibs;
2330 bondport_ref port;
2331
2332 ibsr = &(ibr_p->ibr_ibru.ibru_status);
2333 if (ibsr->ibsr_version != IF_BOND_STATUS_REQ_VERSION) {
2334 return (EINVAL);
2335 }
2336 ibsr->ibsr_key = ifb->ifb_key;
2337 ibsr->ibsr_total = ifb->ifb_port_count;
2338 dst = proc_is64bit(current_proc())
2339 ? ibsr->ibsr_ibsru.ibsru_buffer64
2340 : CAST_USER_ADDR_T(ibsr->ibsr_ibsru.ibsru_buffer32);
2341 if (dst == USER_ADDR_NULL) {
2342 /* just want to know how many there are */
2343 goto done;
2344 }
2345 if (ibsr->ibsr_count < 0) {
2346 return (EINVAL);
2347 }
2348 count = (ifb->ifb_port_count < ibsr->ibsr_count)
2349 ? ifb->ifb_port_count : ibsr->ibsr_count;
2350 TAILQ_FOREACH(port, &ifb->ifb_port_list, po_port_list) {
2351 struct if_bond_partner_state * ibps_p;
2352 partner_state_ref ps;
2353
2354 if (count == 0) {
2355 break;
2356 }
2357 bzero(&ibs, sizeof(ibs));
2358 strncpy(ibs.ibs_if_name, port->po_name, sizeof(ibs.ibs_if_name));
2359 ibs.ibs_port_priority = port->po_priority;
2360 ibs.ibs_state = port->po_actor_state;
2361 ibs.ibs_selected_state = port->po_selected;
2362 ps = &port->po_partner_state;
2363 ibps_p = &ibs.ibs_partner_state;
2364 ibps_p->ibps_system = ps->ps_lag_info.li_system;
2365 ibps_p->ibps_system_priority = ps->ps_lag_info.li_system_priority;
2366 ibps_p->ibps_key = ps->ps_lag_info.li_key;
2367 ibps_p->ibps_port = ps->ps_port;
2368 ibps_p->ibps_port_priority = ps->ps_port_priority;
2369 ibps_p->ibps_state = ps->ps_state;
2370 error = copyout(&ibs, dst, sizeof(ibs));
2371 if (error != 0) {
2372 break;
2373 }
2374 dst += sizeof(ibs);
2375 count--;
2376 }
2377
2378 done:
2379 if (error == 0) {
2380 error = copyout(ibr_p, datap, sizeof(*ibr_p));
2381 }
2382 else {
2383 (void)copyout(ibr_p, datap, sizeof(*ibr_p));
2384 }
2385 return (error);
2386 }
2387
2388 static int
2389 bond_set_promisc(__unused struct ifnet *ifp)
2390 {
2391 int error = 0;
2392 #if 0
2393 ifbond_ref ifb = ifp->if_private;
2394
2395
2396 if ((ifp->if_flags & IFF_PROMISC) != 0) {
2397 if ((ifb->ifb_flags & IFBF_PROMISC) == 0) {
2398 error = ifnet_set_promiscuous(ifb->ifb_p, 1);
2399 if (error == 0)
2400 ifb->ifb_flags |= IFBF_PROMISC;
2401 }
2402 } else {
2403 if ((ifb->ifb_flags & IFBF_PROMISC) != 0) {
2404 error = ifnet_set_promiscuous(ifb->ifb_p, 0);
2405 if (error == 0)
2406 ifb->ifb_flags &= ~IFBF_PROMISC;
2407 }
2408 }
2409 #endif 0
2410 return (error);
2411 }
2412
2413 static void
2414 bond_get_mtu_values(ifbond_ref ifb, int * ret_min, int * ret_max)
2415 {
2416 int mtu_min = 0;
2417 int mtu_max = 0;
2418 bondport_ref p;
2419
2420 if (TAILQ_FIRST(&ifb->ifb_port_list) != NULL) {
2421 mtu_min = IF_MINMTU;
2422 }
2423 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2424 struct ifdevmtu * devmtu_p = &p->po_devmtu;
2425
2426 if (devmtu_p->ifdm_min > mtu_min) {
2427 mtu_min = devmtu_p->ifdm_min;
2428 }
2429 if (mtu_max == 0 || devmtu_p->ifdm_max < mtu_max) {
2430 mtu_max = devmtu_p->ifdm_max;
2431 }
2432 }
2433 *ret_min = mtu_min;
2434 *ret_max = mtu_max;
2435 return;
2436 }
2437
2438 static int
2439 bond_set_mtu_on_ports(ifbond_ref ifb, int mtu)
2440 {
2441 int error = 0;
2442 bondport_ref p;
2443
2444 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2445 error = siocsifmtu(p->po_ifp, mtu);
2446 if (error != 0) {
2447 printf("if_bond(%s): SIOCSIFMTU %s failed, %d\n",
2448 ifb->ifb_name, bondport_get_name(p), error);
2449 break;
2450 }
2451 }
2452 return (error);
2453 }
2454
2455 static int
2456 bond_set_mtu(struct ifnet * ifp, int mtu, int isdevmtu)
2457 {
2458 int error = 0;
2459 ifbond_ref ifb;
2460 int mtu_min;
2461 int mtu_max;
2462 int new_max;
2463 int old_max;
2464
2465 bond_lock();
2466 ifb = (ifbond_ref)ifp->if_private;
2467 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2468 error = (ifb == NULL) ? EOPNOTSUPP : EBUSY;
2469 goto done;
2470 }
2471 ifbond_retain(ifb);
2472 ifbond_wait(ifb, "bond_set_mtu");
2473
2474 /* check again */
2475 if (ifp->if_private == NULL || ifbond_flags_if_detaching(ifb)) {
2476 error = EBUSY;
2477 goto signal_done;
2478 }
2479 bond_get_mtu_values(ifb, &mtu_min, &mtu_max);
2480 if (mtu > mtu_max) {
2481 error = EINVAL;
2482 goto signal_done;
2483 }
2484 if (mtu < mtu_min && (isdevmtu == 0 || mtu != 0)) {
2485 /* allow SIOCSIFALTMTU to set the mtu to 0 */
2486 error = EINVAL;
2487 goto signal_done;
2488 }
2489 if (isdevmtu) {
2490 new_max = (mtu > (int)ifp->if_mtu) ? mtu : (int)ifp->if_mtu;
2491 }
2492 else {
2493 new_max = (mtu > ifb->ifb_altmtu) ? mtu : ifb->ifb_altmtu;
2494 }
2495 old_max = ((int)ifp->if_mtu > ifb->ifb_altmtu)
2496 ? (int)ifp->if_mtu : ifb->ifb_altmtu;
2497 if (new_max != old_max) {
2498 /* we can safely walk the list of port without the lock held */
2499 bond_unlock();
2500 error = bond_set_mtu_on_ports(ifb, new_max);
2501 if (error != 0) {
2502 /* try our best to back out of it */
2503 (void)bond_set_mtu_on_ports(ifb, old_max);
2504 }
2505 bond_lock();
2506 }
2507 if (error == 0) {
2508 if (isdevmtu) {
2509 ifb->ifb_altmtu = mtu;
2510 }
2511 else {
2512 ifp->if_mtu = mtu;
2513 }
2514 }
2515
2516 signal_done:
2517 ifbond_signal(ifb, "bond_set_mtu");
2518 ifbond_release(ifb);
2519
2520 done:
2521 bond_unlock();
2522 return (error);
2523 }
2524
2525 static int
2526 bond_ioctl(struct ifnet *ifp, u_int32_t cmd, void * data)
2527 {
2528 int error = 0;
2529 struct if_bond_req ibr;
2530 struct ifaddr * ifa;
2531 ifbond_ref ifb;
2532 struct ifreq * ifr;
2533 struct ifmediareq64 *ifmr;
2534 struct ifnet * port_ifp = NULL;
2535 user_addr_t user_addr;
2536
2537 if (ifp->if_type != IFT_IEEE8023ADLAG) {
2538 return (EOPNOTSUPP);
2539 }
2540 ifr = (struct ifreq *)data;
2541 ifa = (struct ifaddr *)data;
2542
2543 switch (cmd) {
2544 case SIOCSIFADDR:
2545 ifnet_set_flags(ifp, IFF_UP, IFF_UP);
2546 break;
2547
2548 case SIOCGIFMEDIA64:
2549 case SIOCGIFMEDIA:
2550 bond_lock();
2551 ifb = (ifbond_ref)ifp->if_private;
2552 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2553 bond_unlock();
2554 return (ifb == NULL ? EOPNOTSUPP : EBUSY);
2555 }
2556 ifmr = (struct ifmediareq64 *)data;
2557 ifmr->ifm_current = IFM_ETHER;
2558 ifmr->ifm_mask = 0;
2559 ifmr->ifm_status = IFM_AVALID;
2560 ifmr->ifm_active = IFM_ETHER;
2561 ifmr->ifm_count = 1;
2562 if (ifb->ifb_active_lag != NULL) {
2563 ifmr->ifm_active = ifb->ifb_active_lag->lag_active_media;
2564 ifmr->ifm_status |= IFM_ACTIVE;
2565 }
2566 bond_unlock();
2567 user_addr = (cmd == SIOCGIFMEDIA64)
2568 ? ifmr->ifm_ifmu.ifmu_ulist64
2569 : CAST_USER_ADDR_T(ifmr->ifm_ifmu.ifmu_ulist32);
2570 if (user_addr != USER_ADDR_NULL) {
2571 error = copyout(&ifmr->ifm_current,
2572 user_addr,
2573 sizeof(int));
2574 }
2575 break;
2576
2577 case SIOCSIFMEDIA:
2578 /* XXX send the SIFMEDIA to all children? Or force autoselect? */
2579 error = EINVAL;
2580 break;
2581
2582 case SIOCGIFDEVMTU:
2583 bond_lock();
2584 ifb = (ifbond_ref)ifp->if_private;
2585 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2586 bond_unlock();
2587 error = (ifb == NULL) ? EOPNOTSUPP : EBUSY;
2588 break;
2589 }
2590 ifr->ifr_devmtu.ifdm_current = bond_device_mtu(ifp, ifb);
2591 bond_get_mtu_values(ifb, &ifr->ifr_devmtu.ifdm_min,
2592 &ifr->ifr_devmtu.ifdm_max);
2593 bond_unlock();
2594 break;
2595
2596 case SIOCGIFALTMTU:
2597 bond_lock();
2598 ifb = (ifbond_ref)ifp->if_private;
2599 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2600 bond_unlock();
2601 error = (ifb == NULL) ? EOPNOTSUPP : EBUSY;
2602 break;
2603 }
2604 ifr->ifr_mtu = ifb->ifb_altmtu;
2605 bond_unlock();
2606 break;
2607
2608 case SIOCSIFALTMTU:
2609 error = bond_set_mtu(ifp, ifr->ifr_mtu, 1);
2610 break;
2611
2612 case SIOCSIFMTU:
2613 error = bond_set_mtu(ifp, ifr->ifr_mtu, 0);
2614 break;
2615
2616 case SIOCSIFBOND:
2617 user_addr = proc_is64bit(current_proc())
2618 ? ifr->ifr_data64 : CAST_USER_ADDR_T(ifr->ifr_data);
2619 error = copyin(user_addr, &ibr, sizeof(ibr));
2620 if (error) {
2621 break;
2622 }
2623 switch (ibr.ibr_op) {
2624 case IF_BOND_OP_ADD_INTERFACE:
2625 case IF_BOND_OP_REMOVE_INTERFACE:
2626 /* XXX ifunit() needs to return a reference on the ifp */
2627 port_ifp = ifunit(ibr.ibr_ibru.ibru_if_name);
2628 if (port_ifp == NULL) {
2629 error = ENXIO;
2630 break;
2631 }
2632 if (port_ifp->if_type != IFT_ETHER) {
2633 error = EPROTONOSUPPORT;
2634 break;
2635 }
2636 break;
2637 case IF_BOND_OP_SET_VERBOSE:
2638 break;
2639 default:
2640 error = EOPNOTSUPP;
2641 break;
2642 }
2643 if (error != 0) {
2644 break;
2645 }
2646 switch (ibr.ibr_op) {
2647 case IF_BOND_OP_ADD_INTERFACE:
2648 error = bond_add_interface(ifp, port_ifp);
2649 break;
2650 case IF_BOND_OP_REMOVE_INTERFACE:
2651 bond_lock();
2652 ifb = (ifbond_ref)ifp->if_private;
2653 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2654 bond_unlock();
2655 return (ifb == NULL ? EOPNOTSUPP : EBUSY);
2656 }
2657 error = bond_remove_interface(ifb, port_ifp);
2658 bond_unlock();
2659 break;
2660 case IF_BOND_OP_SET_VERBOSE:
2661 bond_lock();
2662 if (g_bond == NULL) {
2663 bond_unlock();
2664 error = ENXIO;
2665 break;
2666 }
2667 g_bond->verbose = ibr.ibr_ibru.ibru_int_val;
2668 bond_unlock();
2669 break;
2670 }
2671 break;
2672
2673 case SIOCGIFBOND:
2674 user_addr = proc_is64bit(current_proc())
2675 ? ifr->ifr_data64 : CAST_USER_ADDR_T(ifr->ifr_data);
2676 error = copyin(user_addr, &ibr, sizeof(ibr));
2677 if (error) {
2678 break;
2679 }
2680 switch (ibr.ibr_op) {
2681 case IF_BOND_OP_GET_STATUS:
2682 break;
2683 default:
2684 error = EOPNOTSUPP;
2685 break;
2686 }
2687 if (error != 0) {
2688 break;
2689 }
2690 bond_lock();
2691 ifb = (ifbond_ref)ifp->if_private;
2692 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2693 bond_unlock();
2694 return (ifb == NULL ? EOPNOTSUPP : EBUSY);
2695 }
2696 switch (ibr.ibr_op) {
2697 case IF_BOND_OP_GET_STATUS:
2698 error = bond_get_status(ifb, &ibr, user_addr);
2699 break;
2700 }
2701 bond_unlock();
2702 break;
2703
2704 case SIOCSIFLLADDR:
2705 error = EOPNOTSUPP;
2706 break;
2707
2708 case SIOCSIFFLAGS:
2709 /* enable/disable promiscuous mode */
2710 bond_lock();
2711 error = bond_set_promisc(ifp);
2712 bond_unlock();
2713 break;
2714
2715 case SIOCADDMULTI:
2716 case SIOCDELMULTI:
2717 error = bond_setmulti(ifp);
2718 break;
2719 default:
2720 error = EOPNOTSUPP;
2721 }
2722 return error;
2723 }
2724
2725 static void
2726 bond_if_free(struct ifnet * ifp)
2727 {
2728 ifbond_ref ifb;
2729
2730 if (ifp == NULL) {
2731 return;
2732 }
2733 bond_lock();
2734 ifb = (ifbond_ref)ifp->if_private;
2735 if (ifb == NULL) {
2736 bond_unlock();
2737 return;
2738 }
2739 ifp->if_private = NULL;
2740 ifbond_release(ifb);
2741 bond_unlock();
2742 dlil_if_release(ifp);
2743 return;
2744 }
2745
2746 static void
2747 bond_event(struct ifnet * port_ifp, struct kev_msg * event)
2748 {
2749 struct ifnet * bond_ifp = NULL;
2750 int event_code = 0;
2751 bondport_ref p;
2752 struct media_info media_info;
2753
2754 if (event->vendor_code != KEV_VENDOR_APPLE
2755 || event->kev_class != KEV_NETWORK_CLASS
2756 || event->kev_subclass != KEV_DL_SUBCLASS) {
2757 return;
2758 }
2759 switch (event->event_code) {
2760 case KEV_DL_IF_DETACHING:
2761 break;
2762 case KEV_DL_LINK_OFF:
2763 case KEV_DL_LINK_ON:
2764 media_info = interface_media_info(port_ifp);
2765 break;
2766 default:
2767 return;
2768 }
2769 bond_lock();
2770 p = bond_lookup_port(port_ifp);
2771 if (p == NULL) {
2772 bond_unlock();
2773 return;
2774 }
2775 switch (event->event_code) {
2776 case KEV_DL_IF_DETACHING:
2777 bond_remove_interface(p->po_bond, p->po_ifp);
2778 break;
2779 case KEV_DL_LINK_OFF:
2780 case KEV_DL_LINK_ON:
2781 p->po_media_info = media_info;
2782 if (p->po_enabled) {
2783 bondport_link_status_changed(p);
2784 }
2785 break;
2786 }
2787 /* generate a link-event */
2788 if (ifbond_selection(p->po_bond)) {
2789 event_code = (p->po_bond->ifb_active_lag == NULL)
2790 ? KEV_DL_LINK_OFF
2791 : KEV_DL_LINK_ON;
2792 /* XXX need to take a reference on bond_ifp */
2793 bond_ifp = p->po_bond->ifb_ifp;
2794 }
2795 bond_unlock();
2796 if (bond_ifp != NULL) {
2797 interface_link_event(bond_ifp, event_code);
2798 }
2799 return;
2800 }
2801
2802 static void
2803 interface_link_event(struct ifnet * ifp, u_long event_code)
2804 {
2805 struct {
2806 struct kern_event_msg header;
2807 u_long unit;
2808 char if_name[IFNAMSIZ];
2809 } event;
2810
2811 event.header.total_size = sizeof(event);
2812 event.header.vendor_code = KEV_VENDOR_APPLE;
2813 event.header.kev_class = KEV_NETWORK_CLASS;
2814 event.header.kev_subclass = KEV_DL_SUBCLASS;
2815 event.header.event_code = event_code;
2816 event.header.event_data[0] = ifp->if_family;
2817 event.unit = (u_long) ifp->if_unit;
2818 strncpy(event.if_name, ifp->if_name, IFNAMSIZ);
2819 dlil_event(ifp, &event.header);
2820 return;
2821 }
2822
2823 /*
2824 * Function: bond_attach_protocol
2825 * Purpose:
2826 * Attach a DLIL protocol to the interface.
2827 *
2828 * The ethernet demux special cases to always return PF_BOND if the
2829 * interface is bonded. That means we receive all traffic from that
2830 * interface without passing any of the traffic to any other attached
2831 * protocol.
2832 */
2833 static int
2834 bond_attach_protocol(struct ifnet *ifp)
2835 {
2836 int error;
2837 struct dlil_proto_reg_str reg;
2838
2839 bzero(&reg, sizeof(reg));
2840 TAILQ_INIT(&reg.demux_desc_head);
2841 reg.interface_family = ifp->if_family;
2842 reg.unit_number = ifp->if_unit;
2843 reg.input = bond_input;
2844 reg.event = bond_event;
2845 reg.protocol_family = PF_BOND;
2846
2847 error = dlil_attach_protocol(&reg);
2848 if (error) {
2849 printf("bond over %s%d: dlil_attach_protocol failed, %d\n",
2850 ifp->if_name, ifp->if_unit, error);
2851 }
2852 return (error);
2853 }
2854
2855 /*
2856 * Function: bond_detach_protocol
2857 * Purpose:
2858 * Detach our DLIL protocol from an interface
2859 */
2860 static int
2861 bond_detach_protocol(struct ifnet *ifp)
2862 {
2863 int error;
2864
2865 error = dlil_detach_protocol(ifp, PF_BOND);
2866 if (error) {
2867 printf("bond over %s%d: dlil_detach_protocol failed, %d\n",
2868 ifp->if_name, ifp->if_unit, error);
2869 }
2870 return (error);
2871 }
2872
2873 /*
2874 * DLIL interface family functions
2875 */
2876 extern int ether_add_if(struct ifnet *ifp);
2877 extern int ether_del_if(struct ifnet *ifp);
2878 extern int ether_init_if(struct ifnet *ifp);
2879 extern int ether_add_proto_old(struct ifnet *ifp, u_long protocol_family,
2880 struct ddesc_head_str *desc_head);
2881
2882 extern int ether_attach_inet(struct ifnet *ifp, u_long protocol_family);
2883 extern int ether_detach_inet(struct ifnet *ifp, u_long protocol_family);
2884 extern int ether_attach_inet6(struct ifnet *ifp, u_long protocol_family);
2885 extern int ether_detach_inet6(struct ifnet *ifp, u_long protocol_family);
2886
2887 __private_extern__ int
2888 bond_family_init(void)
2889 {
2890 int error=0;
2891 struct dlil_ifmod_reg_str ifmod_reg;
2892
2893 bzero(&ifmod_reg, sizeof(ifmod_reg));
2894 ifmod_reg.add_if = ether_add_if;
2895 ifmod_reg.del_if = ether_del_if;
2896 ifmod_reg.init_if = NULL;
2897 ifmod_reg.add_proto = ether_add_proto_old;
2898 ifmod_reg.del_proto = ether_del_proto;
2899 ifmod_reg.ifmod_ioctl = ether_ioctl;
2900 ifmod_reg.shutdown = NULL;
2901
2902 if (dlil_reg_if_modules(APPLE_IF_FAM_BOND, &ifmod_reg)) {
2903 printf("WARNING: bond_family_init -- "
2904 "Can't register if family modules\n");
2905 error = EIO;
2906 goto done;
2907 }
2908
2909 error = dlil_reg_proto_module(PF_INET, APPLE_IF_FAM_BOND,
2910 ether_attach_inet,
2911 ether_detach_inet);
2912 if (error != 0) {
2913 printf("bond: dlil_reg_proto_module failed for AF_INET6 error=%d\n",
2914 error);
2915 goto done;
2916 }
2917
2918 error = dlil_reg_proto_module(PF_INET6, APPLE_IF_FAM_BOND,
2919 ether_attach_inet6,
2920 ether_detach_inet6);
2921 if (error != 0) {
2922 printf("bond: dlil_reg_proto_module failed for AF_INET6 error=%d\n",
2923 error);
2924 goto done;
2925 }
2926 bond_clone_attach();
2927
2928 done:
2929 return (error);
2930 }
2931 /**
2932 **
2933 ** LACP routines:
2934 **
2935 **/
2936
2937 /**
2938 ** LACP ifbond_list routines
2939 **/
2940 static bondport_ref
2941 ifbond_list_find_moved_port(bondport_ref rx_port,
2942 const lacp_actor_partner_tlv_ref atlv)
2943 {
2944 ifbond_ref bond;
2945 bondport_ref p;
2946 partner_state_ref ps;
2947 LAG_info_ref ps_li;
2948
2949 TAILQ_FOREACH(bond, &g_bond->ifbond_list, ifb_bond_list) {
2950 TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) {
2951
2952 if (rx_port == p) {
2953 /* no point in comparing against ourselves */
2954 continue;
2955 }
2956 if (p->po_receive_state != ReceiveState_PORT_DISABLED) {
2957 /* it's not clear that we should be checking this */
2958 continue;
2959 }
2960 ps = &p->po_partner_state;
2961 if (lacp_actor_partner_state_defaulted(ps->ps_state)) {
2962 continue;
2963 }
2964 ps_li = &ps->ps_lag_info;
2965 if (ps->ps_port == lacp_actor_partner_tlv_get_port(atlv)
2966 && bcmp(&ps_li->li_system, atlv->lap_system,
2967 sizeof(ps_li->li_system)) == 0) {
2968 if (g_bond->verbose) {
2969 timestamp_printf("System " EA_FORMAT
2970 " Port 0x%x moved from %s to %s\n",
2971 EA_LIST(&ps_li->li_system), ps->ps_port,
2972 bondport_get_name(p),
2973 bondport_get_name(rx_port));
2974 }
2975 return (p);
2976 }
2977 }
2978 }
2979 return (NULL);
2980 }
2981
2982 /**
2983 ** LACP ifbond, LAG routines
2984 **/
2985
2986 static int
2987 ifbond_selection(ifbond_ref bond)
2988 {
2989 int all_ports_ready = 0;
2990 int active_media = 0;
2991 LAG_ref lag = NULL;
2992 int lag_changed = 0;
2993 bondport_ref p;
2994 int port_speed = 0;
2995
2996 lag = ifbond_find_best_LAG(bond, &active_media);
2997 if (lag != bond->ifb_active_lag) {
2998 if (bond->ifb_active_lag != NULL) {
2999 ifbond_deactivate_LAG(bond, bond->ifb_active_lag);
3000 bond->ifb_active_lag = NULL;
3001 }
3002 bond->ifb_active_lag = lag;
3003 if (lag != NULL) {
3004 ifbond_activate_LAG(bond, lag, active_media);
3005 }
3006 lag_changed = 1;
3007 }
3008 else if (lag != NULL) {
3009 if (lag->lag_active_media != active_media) {
3010 if (g_bond->verbose) {
3011 timestamp_printf("LAG PORT SPEED CHANGED from %d to %d\n",
3012 link_speed(lag->lag_active_media),
3013 link_speed(active_media));
3014 }
3015 ifbond_deactivate_LAG(bond, lag);
3016 ifbond_activate_LAG(bond, lag, active_media);
3017 lag_changed = 1;
3018 }
3019 }
3020 if (lag != NULL) {
3021 port_speed = link_speed(active_media);
3022 all_ports_ready = ifbond_all_ports_ready(bond);
3023 }
3024 TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) {
3025 if (lag != NULL && p->po_lag == lag
3026 && media_speed(&p->po_media_info) == port_speed
3027 && (p->po_mux_state == MuxState_DETACHED
3028 || p->po_selected == SelectedState_SELECTED
3029 || p->po_selected == SelectedState_STANDBY)
3030 && bondport_aggregatable(p)) {
3031 if (bond->ifb_max_active > 0) {
3032 if (lag->lag_selected_port_count < bond->ifb_max_active) {
3033 if (p->po_selected == SelectedState_STANDBY
3034 || p->po_selected == SelectedState_UNSELECTED) {
3035 bondport_set_selected(p, SelectedState_SELECTED);
3036 }
3037 }
3038 else if (p->po_selected == SelectedState_UNSELECTED) {
3039 bondport_set_selected(p, SelectedState_STANDBY);
3040 }
3041 }
3042 else {
3043 bondport_set_selected(p, SelectedState_SELECTED);
3044 }
3045 }
3046 if (bondport_flags_selected_changed(p)) {
3047 bondport_flags_clear_selected_changed(p);
3048 bondport_mux_machine(p, LAEventSelectedChange, NULL);
3049 }
3050 if (all_ports_ready
3051 && bondport_flags_ready(p)
3052 && p->po_mux_state == MuxState_WAITING) {
3053 bondport_mux_machine(p, LAEventReady, NULL);
3054 }
3055 bondport_transmit_machine(p, LAEventStart, NULL);
3056 }
3057 return (lag_changed);
3058 }
3059
3060 static LAG_ref
3061 ifbond_find_best_LAG(ifbond_ref bond, int * active_media)
3062 {
3063 int best_active = 0;
3064 LAG_ref best_lag = NULL;
3065 int best_count = 0;
3066 int best_speed = 0;
3067 LAG_ref lag;
3068
3069 if (bond->ifb_active_lag != NULL) {
3070 best_lag = bond->ifb_active_lag;
3071 best_count = LAG_get_aggregatable_port_count(best_lag, &best_active);
3072 if (bond->ifb_max_active > 0
3073 && best_count > bond->ifb_max_active) {
3074 best_count = bond->ifb_max_active;
3075 }
3076 best_speed = link_speed(best_active);
3077 }
3078 TAILQ_FOREACH(lag, &bond->ifb_lag_list, lag_list) {
3079 int active;
3080 int count;
3081 int speed;
3082
3083 if (lag == bond->ifb_active_lag) {
3084 /* we've already computed it */
3085 continue;
3086 }
3087 count = LAG_get_aggregatable_port_count(lag, &active);
3088 if (count == 0) {
3089 continue;
3090 }
3091 if (bond->ifb_max_active > 0
3092 && count > bond->ifb_max_active) {
3093 /* if there's a limit, don't count extra links */
3094 count = bond->ifb_max_active;
3095 }
3096 speed = link_speed(active);
3097 if ((count * speed) > (best_count * best_speed)) {
3098 best_count = count;
3099 best_speed = speed;
3100 best_active = active;
3101 best_lag = lag;
3102 }
3103 }
3104 if (best_count == 0) {
3105 return (NULL);
3106 }
3107 *active_media = best_active;
3108 return (best_lag);
3109 }
3110
3111 static void
3112 ifbond_deactivate_LAG(__unused ifbond_ref bond, LAG_ref lag)
3113 {
3114 bondport_ref p;
3115
3116 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3117 bondport_set_selected(p, SelectedState_UNSELECTED);
3118 }
3119 return;
3120 }
3121
3122 static void
3123 ifbond_activate_LAG(ifbond_ref bond, LAG_ref lag, int active_media)
3124 {
3125 int need = 0;
3126 bondport_ref p;
3127
3128 if (bond->ifb_max_active > 0) {
3129 need = bond->ifb_max_active;
3130 }
3131 lag->lag_active_media = active_media;
3132 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3133 if (bondport_aggregatable(p) == 0) {
3134 bondport_set_selected(p, SelectedState_UNSELECTED);
3135 }
3136 else if (media_speed(&p->po_media_info) != link_speed(active_media)) {
3137 bondport_set_selected(p, SelectedState_UNSELECTED);
3138 }
3139 else if (p->po_mux_state == MuxState_DETACHED) {
3140 if (bond->ifb_max_active > 0) {
3141 if (need > 0) {
3142 bondport_set_selected(p, SelectedState_SELECTED);
3143 need--;
3144 }
3145 else {
3146 bondport_set_selected(p, SelectedState_STANDBY);
3147 }
3148 }
3149 else {
3150 bondport_set_selected(p, SelectedState_SELECTED);
3151 }
3152 }
3153 else {
3154 bondport_set_selected(p, SelectedState_UNSELECTED);
3155 }
3156 }
3157 return;
3158 }
3159
3160 #if 0
3161 static void
3162 ifbond_set_max_active(ifbond_ref bond, int max_active)
3163 {
3164 LAG_ref lag = bond->ifb_active_lag;
3165
3166 bond->ifb_max_active = max_active;
3167 if (bond->ifb_max_active <= 0 || lag == NULL) {
3168 return;
3169 }
3170 if (lag->lag_selected_port_count > bond->ifb_max_active) {
3171 bondport_ref p;
3172 int remove_count;
3173
3174 remove_count = lag->lag_selected_port_count - bond->ifb_max_active;
3175 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3176 if (p->po_selected == SelectedState_SELECTED) {
3177 bondport_set_selected(p, SelectedState_UNSELECTED);
3178 remove_count--;
3179 if (remove_count == 0) {
3180 break;
3181 }
3182 }
3183 }
3184 }
3185 return;
3186 }
3187 #endif 0
3188
3189 static int
3190 ifbond_all_ports_ready(ifbond_ref bond)
3191 {
3192 int ready = 0;
3193 bondport_ref p;
3194
3195 if (bond->ifb_active_lag == NULL) {
3196 return (0);
3197 }
3198 TAILQ_FOREACH(p, &bond->ifb_active_lag->lag_port_list, po_lag_port_list) {
3199 if (p->po_mux_state == MuxState_WAITING
3200 && p->po_selected == SelectedState_SELECTED) {
3201 if (bondport_flags_ready(p) == 0) {
3202 return (0);
3203 }
3204 }
3205 /* note that there was at least one ready port */
3206 ready = 1;
3207 }
3208 return (ready);
3209 }
3210
3211 static int
3212 ifbond_all_ports_attached(ifbond_ref bond, bondport_ref this_port)
3213 {
3214 bondport_ref p;
3215
3216 TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) {
3217 if (this_port == p) {
3218 continue;
3219 }
3220 if (bondport_flags_mux_attached(p) == 0) {
3221 return (0);
3222 }
3223 }
3224 return (1);
3225 }
3226
3227 static LAG_ref
3228 ifbond_get_LAG_matching_port(ifbond_ref bond, bondport_ref p)
3229 {
3230 LAG_ref lag;
3231
3232 TAILQ_FOREACH(lag, &bond->ifb_lag_list, lag_list) {
3233 if (bcmp(&lag->lag_info, &p->po_partner_state.ps_lag_info,
3234 sizeof(lag->lag_info)) == 0) {
3235 return (lag);
3236 }
3237 }
3238 return (NULL);
3239 }
3240
3241 static int
3242 LAG_get_aggregatable_port_count(LAG_ref lag, int * active_media)
3243 {
3244 int active;
3245 int count;
3246 bondport_ref p;
3247 int speed;
3248
3249 active = 0;
3250 count = 0;
3251 speed = 0;
3252 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3253 if (bondport_aggregatable(p)) {
3254 int this_speed;
3255
3256 this_speed = media_speed(&p->po_media_info);
3257 if (this_speed == 0) {
3258 continue;
3259 }
3260 if (this_speed > speed) {
3261 active = p->po_media_info.mi_active;
3262 speed = this_speed;
3263 count = 1;
3264 }
3265 else if (this_speed == speed) {
3266 count++;
3267 }
3268 }
3269 }
3270 *active_media = active;
3271 return (count);
3272 }
3273
3274
3275 /**
3276 ** LACP bondport routines
3277 **/
3278 static void
3279 bondport_link_status_changed(bondport_ref p)
3280 {
3281 ifbond_ref bond = p->po_bond;
3282
3283 if (g_bond->verbose) {
3284 if (media_active(&p->po_media_info)) {
3285 timestamp_printf("[%s] Link UP %d Mbit/s %s duplex\n",
3286 bondport_get_name(p),
3287 media_speed(&p->po_media_info),
3288 media_full_duplex(&p->po_media_info)
3289 ? "full" : "half");
3290 }
3291 else {
3292 timestamp_printf("[%s] Link DOWN\n", bondport_get_name(p));
3293 }
3294 }
3295 if (media_active(&p->po_media_info)
3296 && bond->ifb_active_lag != NULL
3297 && p->po_lag == bond->ifb_active_lag
3298 && p->po_selected != SelectedState_UNSELECTED) {
3299 if (media_speed(&p->po_media_info) != p->po_lag->lag_active_media) {
3300 if (g_bond->verbose) {
3301 timestamp_printf("[%s] Port speed %d differs from LAG %d\n",
3302 bondport_get_name(p),
3303 media_speed(&p->po_media_info),
3304 link_speed(p->po_lag->lag_active_media));
3305 }
3306 bondport_set_selected(p, SelectedState_UNSELECTED);
3307 }
3308 }
3309 bondport_receive_machine(p, LAEventMediaChange, NULL);
3310 bondport_mux_machine(p, LAEventMediaChange, NULL);
3311 bondport_periodic_transmit_machine(p, LAEventMediaChange, NULL);
3312
3313 return;
3314 }
3315
3316 static int
3317 bondport_aggregatable(bondport_ref p)
3318 {
3319 partner_state_ref ps = &p->po_partner_state;
3320
3321 if (lacp_actor_partner_state_aggregatable(p->po_actor_state) == 0
3322 || lacp_actor_partner_state_aggregatable(ps->ps_state) == 0) {
3323 /* we and/or our partner are individual */
3324 return (0);
3325 }
3326 if (p->po_lag == NULL) {
3327 return (0);
3328 }
3329 switch (p->po_receive_state) {
3330 default:
3331 if (g_bond->verbose) {
3332 timestamp_printf("[%s] Port is not selectable\n",
3333 bondport_get_name(p));
3334 }
3335 return (0);
3336 case ReceiveState_CURRENT:
3337 case ReceiveState_EXPIRED:
3338 break;
3339 }
3340 return (1);
3341 }
3342
3343 static int
3344 bondport_matches_LAG(bondport_ref p, LAG_ref lag)
3345 {
3346 LAG_info_ref lag_li;
3347 partner_state_ref ps;
3348 LAG_info_ref ps_li;
3349
3350 ps = &p->po_partner_state;
3351 ps_li = &ps->ps_lag_info;
3352 lag_li = &lag->lag_info;
3353 if (ps_li->li_system_priority == lag_li->li_system_priority
3354 && ps_li->li_key == lag_li->li_key
3355 && (bcmp(&ps_li->li_system, &lag_li->li_system,
3356 sizeof(lag_li->li_system))
3357 == 0)) {
3358 return (1);
3359 }
3360 return (0);
3361 }
3362
3363 static int
3364 bondport_remove_from_LAG(bondport_ref p)
3365 {
3366 int active_lag = 0;
3367 ifbond_ref bond = p->po_bond;
3368 LAG_ref lag = p->po_lag;
3369
3370 if (lag == NULL) {
3371 return (0);
3372 }
3373 TAILQ_REMOVE(&lag->lag_port_list, p, po_lag_port_list);
3374 if (g_bond->verbose) {
3375 timestamp_printf("[%s] Removed from LAG (0x%04x," EA_FORMAT
3376 ",0x%04x)\n",
3377 bondport_get_name(p),
3378 lag->lag_info.li_system_priority,
3379 EA_LIST(&lag->lag_info.li_system),
3380 lag->lag_info.li_key);
3381 }
3382 p->po_lag = NULL;
3383 lag->lag_port_count--;
3384 if (lag->lag_port_count > 0) {
3385 return (bond->ifb_active_lag == lag);
3386 }
3387 if (g_bond->verbose) {
3388 timestamp_printf("Key 0x%04x: LAG Released (%04x," EA_FORMAT
3389 ",0x%04x)\n",
3390 bond->ifb_key,
3391 lag->lag_info.li_system_priority,
3392 EA_LIST(&lag->lag_info.li_system),
3393 lag->lag_info.li_key);
3394 }
3395 TAILQ_REMOVE(&bond->ifb_lag_list, lag, lag_list);
3396 if (bond->ifb_active_lag == lag) {
3397 bond->ifb_active_lag = NULL;
3398 active_lag = 1;
3399 }
3400 FREE(lag, M_BOND);
3401 return (active_lag);
3402 }
3403
3404 static void
3405 bondport_add_to_LAG(bondport_ref p, LAG_ref lag)
3406 {
3407 TAILQ_INSERT_TAIL(&lag->lag_port_list, p, po_lag_port_list);
3408 p->po_lag = lag;
3409 lag->lag_port_count++;
3410 if (g_bond->verbose) {
3411 timestamp_printf("[%s] Added to LAG (0x%04x," EA_FORMAT "0x%04x)\n",
3412 bondport_get_name(p),
3413 lag->lag_info.li_system_priority,
3414 EA_LIST(&lag->lag_info.li_system),
3415 lag->lag_info.li_key);
3416 }
3417 return;
3418 }
3419
3420 static void
3421 bondport_assign_to_LAG(bondport_ref p)
3422 {
3423 ifbond_ref bond = p->po_bond;
3424 LAG_ref lag;
3425
3426 if (lacp_actor_partner_state_defaulted(p->po_actor_state)) {
3427 bondport_remove_from_LAG(p);
3428 return;
3429 }
3430 lag = p->po_lag;
3431 if (lag != NULL) {
3432 if (bondport_matches_LAG(p, lag)) {
3433 /* still OK */
3434 return;
3435 }
3436 bondport_remove_from_LAG(p);
3437 }
3438 lag = ifbond_get_LAG_matching_port(bond, p);
3439 if (lag != NULL) {
3440 bondport_add_to_LAG(p, lag);
3441 return;
3442 }
3443 lag = (LAG_ref)_MALLOC(sizeof(*lag), M_BOND, M_WAITOK);
3444 TAILQ_INIT(&lag->lag_port_list);
3445 lag->lag_port_count = 0;
3446 lag->lag_selected_port_count = 0;
3447 lag->lag_info = p->po_partner_state.ps_lag_info;
3448 TAILQ_INSERT_TAIL(&bond->ifb_lag_list, lag, lag_list);
3449 if (g_bond->verbose) {
3450 timestamp_printf("Key 0x%04x: LAG Created (0x%04x," EA_FORMAT
3451 ",0x%04x)\n",
3452 bond->ifb_key,
3453 lag->lag_info.li_system_priority,
3454 EA_LIST(&lag->lag_info.li_system),
3455 lag->lag_info.li_key);
3456 }
3457 bondport_add_to_LAG(p, lag);
3458 return;
3459 }
3460
3461 static void
3462 bondport_receive_lacpdu(bondport_ref p, lacpdu_ref in_lacpdu_p)
3463 {
3464 bondport_ref moved_port;
3465
3466 moved_port
3467 = ifbond_list_find_moved_port(p, (const lacp_actor_partner_tlv_ref)
3468 &in_lacpdu_p->la_actor_tlv);
3469 if (moved_port != NULL) {
3470 bondport_receive_machine(moved_port, LAEventPortMoved, NULL);
3471 }
3472 bondport_receive_machine(p, LAEventPacket, in_lacpdu_p);
3473 bondport_mux_machine(p, LAEventPacket, in_lacpdu_p);
3474 bondport_periodic_transmit_machine(p, LAEventPacket, in_lacpdu_p);
3475 return;
3476 }
3477
3478 static void
3479 bondport_set_selected(bondport_ref p, SelectedState s)
3480 {
3481 if (s != p->po_selected) {
3482 ifbond_ref bond = p->po_bond;
3483 LAG_ref lag = p->po_lag;
3484
3485 bondport_flags_set_selected_changed(p);
3486 if (lag != NULL && bond->ifb_active_lag == lag) {
3487 if (p->po_selected == SelectedState_SELECTED) {
3488 lag->lag_selected_port_count--;
3489 }
3490 else if (s == SelectedState_SELECTED) {
3491 lag->lag_selected_port_count++;
3492 }
3493 if (g_bond->verbose) {
3494 timestamp_printf("[%s] SetSelected: %s (was %s)\n",
3495 bondport_get_name(p),
3496 SelectedStateString(s),
3497 SelectedStateString(p->po_selected));
3498 }
3499 }
3500 }
3501 p->po_selected = s;
3502 return;
3503 }
3504
3505 /**
3506 ** Receive machine
3507 **/
3508
3509 static void
3510 bondport_UpdateDefaultSelected(bondport_ref p)
3511 {
3512 bondport_set_selected(p, SelectedState_UNSELECTED);
3513 return;
3514 }
3515
3516 static void
3517 bondport_RecordDefault(bondport_ref p)
3518 {
3519 bzero(&p->po_partner_state, sizeof(p->po_partner_state));
3520 p->po_actor_state
3521 = lacp_actor_partner_state_set_defaulted(p->po_actor_state);
3522 bondport_assign_to_LAG(p);
3523 return;
3524 }
3525
3526 static void
3527 bondport_UpdateSelected(bondport_ref p, lacpdu_ref lacpdu_p)
3528 {
3529 lacp_actor_partner_tlv_ref actor;
3530 partner_state_ref ps;
3531 LAG_info_ref ps_li;
3532
3533 /* compare the PDU's Actor information to our Partner state */
3534 actor = (lacp_actor_partner_tlv_ref)lacpdu_p->la_actor_tlv;
3535 ps = &p->po_partner_state;
3536 ps_li = &ps->ps_lag_info;
3537 if (lacp_actor_partner_tlv_get_port(actor) != ps->ps_port
3538 || (lacp_actor_partner_tlv_get_port_priority(actor)
3539 != ps->ps_port_priority)
3540 || bcmp(actor->lap_system, &ps_li->li_system, sizeof(ps_li->li_system))
3541 || (lacp_actor_partner_tlv_get_system_priority(actor)
3542 != ps_li->li_system_priority)
3543 || (lacp_actor_partner_tlv_get_key(actor) != ps_li->li_key)
3544 || (lacp_actor_partner_state_aggregatable(actor->lap_state)
3545 != lacp_actor_partner_state_aggregatable(ps->ps_state))) {
3546 bondport_set_selected(p, SelectedState_UNSELECTED);
3547 if (g_bond->verbose) {
3548 timestamp_printf("[%s] updateSelected UNSELECTED\n",
3549 bondport_get_name(p));
3550 }
3551 }
3552 return;
3553 }
3554
3555 static void
3556 bondport_RecordPDU(bondport_ref p, lacpdu_ref lacpdu_p)
3557 {
3558 lacp_actor_partner_tlv_ref actor;
3559 ifbond_ref bond = p->po_bond;
3560 int lacp_maintain = 0;
3561 partner_state_ref ps;
3562 lacp_actor_partner_tlv_ref partner;
3563 LAG_info_ref ps_li;
3564
3565 /* copy the PDU's Actor information into our Partner state */
3566 actor = (lacp_actor_partner_tlv_ref)lacpdu_p->la_actor_tlv;
3567 ps = &p->po_partner_state;
3568 ps_li = &ps->ps_lag_info;
3569 ps->ps_port = lacp_actor_partner_tlv_get_port(actor);
3570 ps->ps_port_priority = lacp_actor_partner_tlv_get_port_priority(actor);
3571 ps_li->li_system = *((lacp_system_ref)actor->lap_system);
3572 ps_li->li_system_priority
3573 = lacp_actor_partner_tlv_get_system_priority(actor);
3574 ps_li->li_key = lacp_actor_partner_tlv_get_key(actor);
3575 ps->ps_state = lacp_actor_partner_state_set_out_of_sync(actor->lap_state);
3576 p->po_actor_state
3577 = lacp_actor_partner_state_set_not_defaulted(p->po_actor_state);
3578
3579 /* compare the PDU's Partner information to our own information */
3580 partner = (lacp_actor_partner_tlv_ref)lacpdu_p->la_partner_tlv;
3581
3582 if (lacp_actor_partner_state_active_lacp(ps->ps_state)
3583 || (lacp_actor_partner_state_active_lacp(p->po_actor_state)
3584 && lacp_actor_partner_state_active_lacp(partner->lap_state))) {
3585 if (g_bond->verbose) {
3586 timestamp_printf("[%s] recordPDU: LACP will maintain\n",
3587 bondport_get_name(p));
3588 }
3589 lacp_maintain = 1;
3590 }
3591 if ((lacp_actor_partner_tlv_get_port(partner)
3592 == bondport_get_index(p))
3593 && lacp_actor_partner_tlv_get_port_priority(partner) == p->po_priority
3594 && bcmp(partner->lap_system, &g_bond->system,
3595 sizeof(g_bond->system)) == 0
3596 && (lacp_actor_partner_tlv_get_system_priority(partner)
3597 == g_bond->system_priority)
3598 && lacp_actor_partner_tlv_get_key(partner) == bond->ifb_key
3599 && (lacp_actor_partner_state_aggregatable(partner->lap_state)
3600 == lacp_actor_partner_state_aggregatable(p->po_actor_state))
3601 && lacp_actor_partner_state_in_sync(actor->lap_state)
3602 && lacp_maintain) {
3603 ps->ps_state = lacp_actor_partner_state_set_in_sync(ps->ps_state);
3604 if (g_bond->verbose) {
3605 timestamp_printf("[%s] recordPDU: LACP partner in sync\n",
3606 bondport_get_name(p));
3607 }
3608 }
3609 else if (lacp_actor_partner_state_aggregatable(actor->lap_state) == 0
3610 && lacp_actor_partner_state_in_sync(actor->lap_state)
3611 && lacp_maintain) {
3612 ps->ps_state = lacp_actor_partner_state_set_in_sync(ps->ps_state);
3613 if (g_bond->verbose) {
3614 timestamp_printf("[%s] recordPDU: LACP partner in sync (ind)\n",
3615 bondport_get_name(p));
3616 }
3617 }
3618 bondport_assign_to_LAG(p);
3619 return;
3620 }
3621
3622 static __inline__ lacp_actor_partner_state
3623 updateNTTBits(lacp_actor_partner_state s)
3624 {
3625 return (s & (LACP_ACTOR_PARTNER_STATE_LACP_ACTIVITY
3626 | LACP_ACTOR_PARTNER_STATE_LACP_TIMEOUT
3627 | LACP_ACTOR_PARTNER_STATE_AGGREGATION
3628 | LACP_ACTOR_PARTNER_STATE_SYNCHRONIZATION));
3629 }
3630
3631 static void
3632 bondport_UpdateNTT(bondport_ref p, lacpdu_ref lacpdu_p)
3633 {
3634 ifbond_ref bond = p->po_bond;
3635 lacp_actor_partner_tlv_ref partner;
3636
3637 /* compare the PDU's Actor information to our Partner state */
3638 partner = (lacp_actor_partner_tlv_ref)lacpdu_p->la_partner_tlv;
3639 if ((lacp_actor_partner_tlv_get_port(partner) != bondport_get_index(p))
3640 || lacp_actor_partner_tlv_get_port_priority(partner) != p->po_priority
3641 || bcmp(partner->lap_system, &g_bond->system, sizeof(g_bond->system))
3642 || (lacp_actor_partner_tlv_get_system_priority(partner)
3643 != g_bond->system_priority)
3644 || lacp_actor_partner_tlv_get_key(partner) != bond->ifb_key
3645 || (updateNTTBits(partner->lap_state)
3646 != updateNTTBits(p->po_actor_state))) {
3647 bondport_flags_set_ntt(p);
3648 if (g_bond->verbose) {
3649 timestamp_printf("[%s] updateNTT: Need To Transmit\n",
3650 bondport_get_name(p));
3651 }
3652 }
3653 return;
3654 }
3655
3656 static void
3657 bondport_AttachMuxToAggregator(bondport_ref p)
3658 {
3659 if (bondport_flags_mux_attached(p) == 0) {
3660 if (g_bond->verbose) {
3661 timestamp_printf("[%s] Attached Mux To Aggregator\n",
3662 bondport_get_name(p));
3663 }
3664 bondport_flags_set_mux_attached(p);
3665 }
3666 return;
3667 }
3668
3669 static void
3670 bondport_DetachMuxFromAggregator(bondport_ref p)
3671 {
3672 if (bondport_flags_mux_attached(p)) {
3673 if (g_bond->verbose) {
3674 timestamp_printf("[%s] Detached Mux From Aggregator\n",
3675 bondport_get_name(p));
3676 }
3677 bondport_flags_clear_mux_attached(p);
3678 }
3679 return;
3680 }
3681
3682 static void
3683 bondport_enable_distributing(bondport_ref p)
3684 {
3685 if (bondport_flags_distributing(p) == 0) {
3686 ifbond_ref bond = p->po_bond;
3687
3688 bond->ifb_distributing_array[bond->ifb_distributing_count++] = p;
3689 if (g_bond->verbose) {
3690 timestamp_printf("[%s] Distribution Enabled\n",
3691 bondport_get_name(p));
3692 }
3693 bondport_flags_set_distributing(p);
3694 }
3695 return;
3696 }
3697
3698 static void
3699 bondport_disable_distributing(bondport_ref p)
3700 {
3701 if (bondport_flags_distributing(p)) {
3702 bondport_ref * array;
3703 ifbond_ref bond;
3704 int count;
3705 int i;
3706
3707 bond = p->po_bond;
3708 array = bond->ifb_distributing_array;
3709 count = bond->ifb_distributing_count;
3710 for (i = 0; i < count; i++) {
3711 if (array[i] == p) {
3712 int j;
3713
3714 for (j = i; j < (count - 1); j++) {
3715 array[j] = array[j + 1];
3716 }
3717 break;
3718 }
3719 }
3720 bond->ifb_distributing_count--;
3721 if (g_bond->verbose) {
3722 timestamp_printf("[%s] Distribution Disabled\n",
3723 bondport_get_name(p));
3724 }
3725 bondport_flags_clear_distributing(p);
3726 }
3727 return;
3728 }
3729
3730 /**
3731 ** Receive machine functions
3732 **/
3733 static void
3734 bondport_receive_machine_initialize(bondport_ref p, LAEvent event,
3735 void * event_data);
3736 static void
3737 bondport_receive_machine_port_disabled(bondport_ref p, LAEvent event,
3738 void * event_data);
3739 static void
3740 bondport_receive_machine_expired(bondport_ref p, LAEvent event,
3741 void * event_data);
3742 static void
3743 bondport_receive_machine_lacp_disabled(bondport_ref p, LAEvent event,
3744 void * event_data);
3745 static void
3746 bondport_receive_machine_defaulted(bondport_ref p, LAEvent event,
3747 void * event_data);
3748 static void
3749 bondport_receive_machine_current(bondport_ref p, LAEvent event,
3750 void * event_data);
3751
3752 static void
3753 bondport_receive_machine_event(bondport_ref p, LAEvent event,
3754 void * event_data)
3755 {
3756 switch (p->po_receive_state) {
3757 case ReceiveState_none:
3758 bondport_receive_machine_initialize(p, LAEventStart, NULL);
3759 break;
3760 case ReceiveState_INITIALIZE:
3761 bondport_receive_machine_initialize(p, event, event_data);
3762 break;
3763 case ReceiveState_PORT_DISABLED:
3764 bondport_receive_machine_port_disabled(p, event, event_data);
3765 break;
3766 case ReceiveState_EXPIRED:
3767 bondport_receive_machine_expired(p, event, event_data);
3768 break;
3769 case ReceiveState_LACP_DISABLED:
3770 bondport_receive_machine_lacp_disabled(p, event, event_data);
3771 break;
3772 case ReceiveState_DEFAULTED:
3773 bondport_receive_machine_defaulted(p, event, event_data);
3774 break;
3775 case ReceiveState_CURRENT:
3776 bondport_receive_machine_current(p, event, event_data);
3777 break;
3778 default:
3779 break;
3780 }
3781 return;
3782 }
3783
3784 static void
3785 bondport_receive_machine(bondport_ref p, LAEvent event,
3786 void * event_data)
3787 {
3788 switch (event) {
3789 case LAEventPacket:
3790 if (p->po_receive_state != ReceiveState_LACP_DISABLED) {
3791 bondport_receive_machine_current(p, event, event_data);
3792 }
3793 break;
3794 case LAEventMediaChange:
3795 if (media_active(&p->po_media_info)) {
3796 switch (p->po_receive_state) {
3797 case ReceiveState_PORT_DISABLED:
3798 case ReceiveState_LACP_DISABLED:
3799 bondport_receive_machine_port_disabled(p, LAEventMediaChange, NULL);
3800 break;
3801 default:
3802 break;
3803 }
3804 }
3805 else {
3806 bondport_receive_machine_port_disabled(p, LAEventStart, NULL);
3807 }
3808 break;
3809 default:
3810 bondport_receive_machine_event(p, event, event_data);
3811 break;
3812 }
3813 return;
3814 }
3815
3816 static void
3817 bondport_receive_machine_initialize(bondport_ref p, LAEvent event,
3818 __unused void * event_data)
3819 {
3820 switch (event) {
3821 case LAEventStart:
3822 devtimer_cancel(p->po_current_while_timer);
3823 if (g_bond->verbose) {
3824 timestamp_printf("[%s] Receive INITIALIZE\n",
3825 bondport_get_name(p));
3826 }
3827 p->po_receive_state = ReceiveState_INITIALIZE;
3828 bondport_set_selected(p, SelectedState_UNSELECTED);
3829 bondport_RecordDefault(p);
3830 p->po_actor_state
3831 = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
3832 bondport_receive_machine_port_disabled(p, LAEventStart, NULL);
3833 break;
3834 default:
3835 break;
3836 }
3837 return;
3838 }
3839
3840 static void
3841 bondport_receive_machine_port_disabled(bondport_ref p, LAEvent event,
3842 __unused void * event_data)
3843 {
3844 partner_state_ref ps;
3845
3846 switch (event) {
3847 case LAEventStart:
3848 devtimer_cancel(p->po_current_while_timer);
3849 if (g_bond->verbose) {
3850 timestamp_printf("[%s] Receive PORT_DISABLED\n",
3851 bondport_get_name(p));
3852 }
3853 p->po_receive_state = ReceiveState_PORT_DISABLED;
3854 ps = &p->po_partner_state;
3855 ps->ps_state = lacp_actor_partner_state_set_out_of_sync(ps->ps_state);
3856 /* FALL THROUGH */
3857 case LAEventMediaChange:
3858 if (media_active(&p->po_media_info)) {
3859 if (media_full_duplex(&p->po_media_info)) {
3860 bondport_receive_machine_expired(p, LAEventStart, NULL);
3861 }
3862 else {
3863 bondport_receive_machine_lacp_disabled(p, LAEventStart, NULL);
3864 }
3865 }
3866 else if (p->po_selected == SelectedState_SELECTED) {
3867 struct timeval tv;
3868
3869 if (g_bond->verbose) {
3870 timestamp_printf("[%s] Receive PORT_DISABLED: "
3871 "link timer started\n",
3872 bondport_get_name(p));
3873 }
3874 tv.tv_sec = 1;
3875 tv.tv_usec = 0;
3876 devtimer_set_relative(p->po_current_while_timer, tv,
3877 (devtimer_timeout_func)
3878 bondport_receive_machine_port_disabled,
3879 (void *)LAEventTimeout, NULL);
3880 }
3881 else if (p->po_selected == SelectedState_STANDBY) {
3882 bondport_set_selected(p, SelectedState_UNSELECTED);
3883 }
3884 break;
3885 case LAEventTimeout:
3886 if (p->po_selected == SelectedState_SELECTED) {
3887 if (g_bond->verbose) {
3888 timestamp_printf("[%s] Receive PORT_DISABLED: "
3889 "link timer completed, marking UNSELECTED\n",
3890 bondport_get_name(p));
3891 }
3892 bondport_set_selected(p, SelectedState_UNSELECTED);
3893 }
3894 break;
3895 case LAEventPortMoved:
3896 bondport_receive_machine_initialize(p, LAEventStart, NULL);
3897 break;
3898 default:
3899 break;
3900 }
3901 return;
3902 }
3903
3904 static void
3905 bondport_receive_machine_expired(bondport_ref p, LAEvent event,
3906 __unused void * event_data)
3907 {
3908 lacp_actor_partner_state s;
3909 struct timeval tv;
3910
3911 switch (event) {
3912 case LAEventStart:
3913 devtimer_cancel(p->po_current_while_timer);
3914 if (g_bond->verbose) {
3915 timestamp_printf("[%s] Receive EXPIRED\n",
3916 bondport_get_name(p));
3917 }
3918 p->po_receive_state = ReceiveState_EXPIRED;
3919 s = p->po_partner_state.ps_state;
3920 s = lacp_actor_partner_state_set_out_of_sync(s);
3921 s = lacp_actor_partner_state_set_short_timeout(s);
3922 p->po_partner_state.ps_state = s;
3923 p->po_actor_state
3924 = lacp_actor_partner_state_set_expired(p->po_actor_state);
3925 /* start current_while timer */
3926 tv.tv_sec = LACP_SHORT_TIMEOUT_TIME;
3927 tv.tv_usec = 0;
3928 devtimer_set_relative(p->po_current_while_timer, tv,
3929 (devtimer_timeout_func)
3930 bondport_receive_machine_expired,
3931 (void *)LAEventTimeout, NULL);
3932
3933 break;
3934 case LAEventTimeout:
3935 bondport_receive_machine_defaulted(p, LAEventStart, NULL);
3936 break;
3937 default:
3938 break;
3939 }
3940 return;
3941 }
3942
3943 static void
3944 bondport_receive_machine_lacp_disabled(bondport_ref p, LAEvent event,
3945 __unused void * event_data)
3946 {
3947 partner_state_ref ps;
3948 switch (event) {
3949 case LAEventStart:
3950 devtimer_cancel(p->po_current_while_timer);
3951 if (g_bond->verbose) {
3952 timestamp_printf("[%s] Receive LACP_DISABLED\n",
3953 bondport_get_name(p));
3954 }
3955 p->po_receive_state = ReceiveState_LACP_DISABLED;
3956 bondport_set_selected(p, SelectedState_UNSELECTED);
3957 bondport_RecordDefault(p);
3958 ps = &p->po_partner_state;
3959 ps->ps_state = lacp_actor_partner_state_set_individual(ps->ps_state);
3960 p->po_actor_state
3961 = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
3962 break;
3963 default:
3964 break;
3965 }
3966 return;
3967 }
3968
3969 static void
3970 bondport_receive_machine_defaulted(bondport_ref p, LAEvent event,
3971 __unused void * event_data)
3972 {
3973 switch (event) {
3974 case LAEventStart:
3975 devtimer_cancel(p->po_current_while_timer);
3976 if (g_bond->verbose) {
3977 timestamp_printf("[%s] Receive DEFAULTED\n",
3978 bondport_get_name(p));
3979 }
3980 p->po_receive_state = ReceiveState_DEFAULTED;
3981 bondport_UpdateDefaultSelected(p);
3982 bondport_RecordDefault(p);
3983 p->po_actor_state
3984 = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
3985 break;
3986 default:
3987 break;
3988 }
3989 return;
3990 }
3991
3992 static void
3993 bondport_receive_machine_current(bondport_ref p, LAEvent event,
3994 void * event_data)
3995 {
3996 partner_state_ref ps;
3997 struct timeval tv;
3998
3999 switch (event) {
4000 case LAEventPacket:
4001 devtimer_cancel(p->po_current_while_timer);
4002 if (g_bond->verbose) {
4003 timestamp_printf("[%s] Receive CURRENT\n",
4004 bondport_get_name(p));
4005 }
4006 p->po_receive_state = ReceiveState_CURRENT;
4007 bondport_UpdateSelected(p, event_data);
4008 bondport_UpdateNTT(p, event_data);
4009 bondport_RecordPDU(p, event_data);
4010 p->po_actor_state
4011 = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4012 bondport_assign_to_LAG(p);
4013 /* start current_while timer */
4014 ps = &p->po_partner_state;
4015 if (lacp_actor_partner_state_short_timeout(ps->ps_state)) {
4016 tv.tv_sec = LACP_SHORT_TIMEOUT_TIME;
4017 }
4018 else {
4019 tv.tv_sec = LACP_LONG_TIMEOUT_TIME;
4020 }
4021 tv.tv_usec = 0;
4022 devtimer_set_relative(p->po_current_while_timer, tv,
4023 (devtimer_timeout_func)
4024 bondport_receive_machine_current,
4025 (void *)LAEventTimeout, NULL);
4026 break;
4027 case LAEventTimeout:
4028 bondport_receive_machine_expired(p, LAEventStart, NULL);
4029 break;
4030 default:
4031 break;
4032 }
4033 return;
4034 }
4035
4036 /**
4037 ** Periodic Transmission machine
4038 **/
4039
4040 static void
4041 bondport_periodic_transmit_machine(bondport_ref p, LAEvent event,
4042 __unused void * event_data)
4043 {
4044 int interval;
4045 partner_state_ref ps;
4046 struct timeval tv;
4047
4048 switch (event) {
4049 case LAEventStart:
4050 if (g_bond->verbose) {
4051 timestamp_printf("[%s] periodic_transmit Start\n",
4052 bondport_get_name(p));
4053 }
4054 /* FALL THROUGH */
4055 case LAEventMediaChange:
4056 devtimer_cancel(p->po_periodic_timer);
4057 p->po_periodic_interval = 0;
4058 if (media_active(&p->po_media_info) == 0
4059 || media_full_duplex(&p->po_media_info) == 0) {
4060 break;
4061 }
4062 case LAEventPacket:
4063 /* Neither Partner nor Actor are LACP Active, no periodic tx */
4064 ps = &p->po_partner_state;
4065 if (lacp_actor_partner_state_active_lacp(p->po_actor_state) == 0
4066 && (lacp_actor_partner_state_active_lacp(ps->ps_state)
4067 == 0)) {
4068 devtimer_cancel(p->po_periodic_timer);
4069 p->po_periodic_interval = 0;
4070 break;
4071 }
4072 if (lacp_actor_partner_state_short_timeout(ps->ps_state)) {
4073 interval = LACP_FAST_PERIODIC_TIME;
4074 }
4075 else {
4076 interval = LACP_SLOW_PERIODIC_TIME;
4077 }
4078 if (p->po_periodic_interval != interval) {
4079 if (interval == LACP_FAST_PERIODIC_TIME
4080 && p->po_periodic_interval == LACP_SLOW_PERIODIC_TIME) {
4081 if (g_bond->verbose) {
4082 timestamp_printf("[%s] periodic_transmit:"
4083 " Need To Transmit\n",
4084 bondport_get_name(p));
4085 }
4086 bondport_flags_set_ntt(p);
4087 }
4088 p->po_periodic_interval = interval;
4089 tv.tv_usec = 0;
4090 tv.tv_sec = interval;
4091 devtimer_set_relative(p->po_periodic_timer, tv,
4092 (devtimer_timeout_func)
4093 bondport_periodic_transmit_machine,
4094 (void *)LAEventTimeout, NULL);
4095 if (g_bond->verbose) {
4096 timestamp_printf("[%s] Periodic Transmission Timer: %d secs\n",
4097 bondport_get_name(p),
4098 p->po_periodic_interval);
4099 }
4100 }
4101 break;
4102 case LAEventTimeout:
4103 bondport_flags_set_ntt(p);
4104 tv.tv_sec = p->po_periodic_interval;
4105 tv.tv_usec = 0;
4106 devtimer_set_relative(p->po_periodic_timer, tv, (devtimer_timeout_func)
4107 bondport_periodic_transmit_machine,
4108 (void *)LAEventTimeout, NULL);
4109 if (g_bond->verbose > 1) {
4110 timestamp_printf("[%s] Periodic Transmission Timer: %d secs\n",
4111 bondport_get_name(p), p->po_periodic_interval);
4112 }
4113 break;
4114 default:
4115 break;
4116 }
4117 return;
4118 }
4119
4120 /**
4121 ** Transmit machine
4122 **/
4123 static int
4124 bondport_can_transmit(bondport_ref p, int32_t current_secs,
4125 long * next_secs)
4126 {
4127 if (p->po_last_transmit_secs != current_secs) {
4128 p->po_last_transmit_secs = current_secs;
4129 p->po_n_transmit = 0;
4130 }
4131 if (p->po_n_transmit < LACP_PACKET_RATE) {
4132 p->po_n_transmit++;
4133 return (1);
4134 }
4135 if (next_secs != NULL) {
4136 *next_secs = current_secs + 1;
4137 }
4138 return (0);
4139 }
4140
4141 static void
4142 bondport_transmit_machine(bondport_ref p, LAEvent event,
4143 void * event_data)
4144 {
4145 lacp_actor_partner_tlv_ref aptlv;
4146 lacp_collector_tlv_ref ctlv;
4147 struct timeval next_tick_time = {0, 0};
4148 lacpdu_ref out_lacpdu_p;
4149 packet_buffer_ref pkt;
4150 partner_state_ref ps;
4151 LAG_info_ref ps_li;
4152
4153 switch (event) {
4154 case LAEventTimeout:
4155 case LAEventStart:
4156 if (p->po_periodic_interval == 0 || bondport_flags_ntt(p) == 0) {
4157 break;
4158 }
4159 if (event_data != NULL) {
4160 /* we're going away, transmit the packet no matter what */
4161 }
4162 else if (bondport_can_transmit(p, devtimer_current_secs(),
4163 &next_tick_time.tv_sec) == 0) {
4164 if (devtimer_enabled(p->po_transmit_timer)) {
4165 if (g_bond->verbose > 0) {
4166 timestamp_printf("[%s] Transmit Timer Already Set\n",
4167 bondport_get_name(p));
4168 }
4169 }
4170 else {
4171 devtimer_set_absolute(p->po_transmit_timer, next_tick_time,
4172 (devtimer_timeout_func)
4173 bondport_transmit_machine,
4174 (void *)LAEventTimeout, NULL);
4175 if (g_bond->verbose > 0) {
4176 timestamp_printf("[%s] Transmit Timer Deadline %d secs\n",
4177 bondport_get_name(p),
4178 next_tick_time.tv_sec);
4179 }
4180 }
4181 break;
4182 }
4183 if (g_bond->verbose > 0) {
4184 if (event == LAEventTimeout) {
4185 timestamp_printf("[%s] Transmit Timer Complete\n",
4186 bondport_get_name(p));
4187 }
4188 }
4189 pkt = packet_buffer_allocate(sizeof(*out_lacpdu_p));
4190 if (pkt == NULL) {
4191 printf("[%s] Transmit: failed to allocate packet buffer\n",
4192 bondport_get_name(p));
4193 break;
4194 }
4195 out_lacpdu_p = (lacpdu_ref)packet_buffer_byteptr(pkt);
4196 bzero(out_lacpdu_p, sizeof(*out_lacpdu_p));
4197 out_lacpdu_p->la_subtype = IEEE8023AD_SLOW_PROTO_SUBTYPE_LACP;
4198 out_lacpdu_p->la_version = LACPDU_VERSION_1;
4199
4200 /* Actor */
4201 aptlv = (lacp_actor_partner_tlv_ref)out_lacpdu_p->la_actor_tlv;
4202 aptlv->lap_tlv_type = LACPDU_TLV_TYPE_ACTOR;
4203 aptlv->lap_length = LACPDU_ACTOR_TLV_LENGTH;
4204 *((lacp_system_ref)aptlv->lap_system) = g_bond->system;
4205 lacp_actor_partner_tlv_set_system_priority(aptlv,
4206 g_bond->system_priority);
4207 lacp_actor_partner_tlv_set_port_priority(aptlv, p->po_priority);
4208 lacp_actor_partner_tlv_set_port(aptlv, bondport_get_index(p));
4209 lacp_actor_partner_tlv_set_key(aptlv, p->po_bond->ifb_key);
4210 aptlv->lap_state = p->po_actor_state;
4211
4212 /* Partner */
4213 aptlv = (lacp_actor_partner_tlv_ref)out_lacpdu_p->la_partner_tlv;
4214 aptlv->lap_tlv_type = LACPDU_TLV_TYPE_PARTNER;
4215 aptlv->lap_length = LACPDU_PARTNER_TLV_LENGTH;
4216 ps = &p->po_partner_state;
4217 ps_li = &ps->ps_lag_info;
4218 lacp_actor_partner_tlv_set_port(aptlv, ps->ps_port);
4219 lacp_actor_partner_tlv_set_port_priority(aptlv, ps->ps_port_priority);
4220 *((lacp_system_ref)aptlv->lap_system) = ps_li->li_system;
4221 lacp_actor_partner_tlv_set_system_priority(aptlv,
4222 ps_li->li_system_priority);
4223 lacp_actor_partner_tlv_set_key(aptlv, ps_li->li_key);
4224 aptlv->lap_state = ps->ps_state;
4225
4226 /* Collector */
4227 ctlv = (lacp_collector_tlv_ref)out_lacpdu_p->la_collector_tlv;
4228 ctlv->lac_tlv_type = LACPDU_TLV_TYPE_COLLECTOR;
4229 ctlv->lac_length = LACPDU_COLLECTOR_TLV_LENGTH;
4230
4231 bondport_slow_proto_transmit(p, pkt);
4232 bondport_flags_clear_ntt(p);
4233 if (g_bond->verbose > 0) {
4234 timestamp_printf("[%s] Transmit Packet %d\n",
4235 bondport_get_name(p), p->po_n_transmit);
4236 }
4237 break;
4238 default:
4239 break;
4240 }
4241 return;
4242 }
4243
4244 /**
4245 ** Mux machine functions
4246 **/
4247
4248 static void
4249 bondport_mux_machine_detached(bondport_ref p, LAEvent event,
4250 void * event_data);
4251 static void
4252 bondport_mux_machine_waiting(bondport_ref p, LAEvent event,
4253 void * event_data);
4254 static void
4255 bondport_mux_machine_attached(bondport_ref p, LAEvent event,
4256 void * event_data);
4257
4258 static void
4259 bondport_mux_machine_collecting_distributing(bondport_ref p, LAEvent event,
4260 void * event_data);
4261
4262 static void
4263 bondport_mux_machine(bondport_ref p, LAEvent event, void * event_data)
4264 {
4265 switch (p->po_mux_state) {
4266 case MuxState_none:
4267 bondport_mux_machine_detached(p, LAEventStart, NULL);
4268 break;
4269 case MuxState_DETACHED:
4270 bondport_mux_machine_detached(p, event, event_data);
4271 break;
4272 case MuxState_WAITING:
4273 bondport_mux_machine_waiting(p, event, event_data);
4274 break;
4275 case MuxState_ATTACHED:
4276 bondport_mux_machine_attached(p, event, event_data);
4277 break;
4278 case MuxState_COLLECTING_DISTRIBUTING:
4279 bondport_mux_machine_collecting_distributing(p, event, event_data);
4280 break;
4281 default:
4282 break;
4283 }
4284 return;
4285 }
4286
4287 static void
4288 bondport_mux_machine_detached(bondport_ref p, LAEvent event,
4289 __unused void * event_data)
4290 {
4291 lacp_actor_partner_state s;
4292
4293 switch (event) {
4294 case LAEventStart:
4295 devtimer_cancel(p->po_wait_while_timer);
4296 if (g_bond->verbose) {
4297 timestamp_printf("[%s] Mux DETACHED\n",
4298 bondport_get_name(p));
4299 }
4300 p->po_mux_state = MuxState_DETACHED;
4301 bondport_flags_clear_ready(p);
4302 bondport_DetachMuxFromAggregator(p);
4303 bondport_disable_distributing(p);
4304 s = p->po_actor_state;
4305 s = lacp_actor_partner_state_set_out_of_sync(s);
4306 s = lacp_actor_partner_state_set_not_collecting(s);
4307 s = lacp_actor_partner_state_set_not_distributing(s);
4308 p->po_actor_state = s;
4309 bondport_flags_set_ntt(p);
4310 break;
4311 case LAEventSelectedChange:
4312 case LAEventPacket:
4313 case LAEventMediaChange:
4314 if (p->po_selected == SelectedState_SELECTED
4315 || p->po_selected == SelectedState_STANDBY) {
4316 bondport_mux_machine_waiting(p, LAEventStart, NULL);
4317 }
4318 break;
4319 default:
4320 break;
4321 }
4322 return;
4323 }
4324
4325 static void
4326 bondport_mux_machine_waiting(bondport_ref p, LAEvent event,
4327 __unused void * event_data)
4328 {
4329 struct timeval tv;
4330
4331 switch (event) {
4332 case LAEventStart:
4333 devtimer_cancel(p->po_wait_while_timer);
4334 if (g_bond->verbose) {
4335 timestamp_printf("[%s] Mux WAITING\n",
4336 bondport_get_name(p));
4337 }
4338 p->po_mux_state = MuxState_WAITING;
4339 /* FALL THROUGH */
4340 default:
4341 case LAEventSelectedChange:
4342 if (p->po_selected == SelectedState_UNSELECTED) {
4343 bondport_mux_machine_detached(p, LAEventStart, NULL);
4344 break;
4345 }
4346 if (p->po_selected == SelectedState_STANDBY) {
4347 devtimer_cancel(p->po_wait_while_timer);
4348 /* wait until state changes to SELECTED */
4349 if (g_bond->verbose) {
4350 timestamp_printf("[%s] Mux WAITING: Standby\n",
4351 bondport_get_name(p));
4352 }
4353 break;
4354 }
4355 if (bondport_flags_ready(p)) {
4356 if (g_bond->verbose) {
4357 timestamp_printf("[%s] Mux WAITING: Port is already ready\n",
4358 bondport_get_name(p));
4359 }
4360 break;
4361 }
4362 if (devtimer_enabled(p->po_wait_while_timer)) {
4363 if (g_bond->verbose) {
4364 timestamp_printf("[%s] Mux WAITING: Timer already set\n",
4365 bondport_get_name(p));
4366 }
4367 break;
4368 }
4369 if (ifbond_all_ports_attached(p->po_bond, p)) {
4370 devtimer_cancel(p->po_wait_while_timer);
4371 if (g_bond->verbose) {
4372 timestamp_printf("[%s] Mux WAITING: No waiting\n",
4373 bondport_get_name(p));
4374 }
4375 bondport_flags_set_ready(p);
4376 goto no_waiting;
4377 }
4378 if (g_bond->verbose) {
4379 timestamp_printf("[%s] Mux WAITING: 2 seconds\n",
4380 bondport_get_name(p));
4381 }
4382 tv.tv_sec = LACP_AGGREGATE_WAIT_TIME;
4383 tv.tv_usec = 0;
4384 devtimer_set_relative(p->po_wait_while_timer, tv,
4385 (devtimer_timeout_func)
4386 bondport_mux_machine_waiting,
4387 (void *)LAEventTimeout, NULL);
4388 break;
4389 case LAEventTimeout:
4390 if (g_bond->verbose) {
4391 timestamp_printf("[%s] Mux WAITING: Ready\n",
4392 bondport_get_name(p));
4393 }
4394 bondport_flags_set_ready(p);
4395 break;
4396 case LAEventReady:
4397 no_waiting:
4398 if (bondport_flags_ready(p)){
4399 if (g_bond->verbose) {
4400 timestamp_printf("[%s] Mux WAITING: All Ports Ready\n",
4401 bondport_get_name(p));
4402 }
4403 bondport_mux_machine_attached(p, LAEventStart, NULL);
4404 break;
4405 }
4406 break;
4407 }
4408 return;
4409 }
4410
4411 static void
4412 bondport_mux_machine_attached(bondport_ref p, LAEvent event,
4413 __unused void * event_data)
4414 {
4415 lacp_actor_partner_state s;
4416
4417 switch (event) {
4418 case LAEventStart:
4419 devtimer_cancel(p->po_wait_while_timer);
4420 if (g_bond->verbose) {
4421 timestamp_printf("[%s] Mux ATTACHED\n",
4422 bondport_get_name(p));
4423 }
4424 p->po_mux_state = MuxState_ATTACHED;
4425 bondport_AttachMuxToAggregator(p);
4426 s = p->po_actor_state;
4427 s = lacp_actor_partner_state_set_in_sync(s);
4428 s = lacp_actor_partner_state_set_not_collecting(s);
4429 s = lacp_actor_partner_state_set_not_distributing(s);
4430 bondport_disable_distributing(p);
4431 p->po_actor_state = s;
4432 bondport_flags_set_ntt(p);
4433 /* FALL THROUGH */
4434 default:
4435 switch (p->po_selected) {
4436 case SelectedState_SELECTED:
4437 s = p->po_partner_state.ps_state;
4438 if (lacp_actor_partner_state_in_sync(s)) {
4439 bondport_mux_machine_collecting_distributing(p, LAEventStart,
4440 NULL);
4441 }
4442 break;
4443 default:
4444 bondport_mux_machine_detached(p, LAEventStart, NULL);
4445 break;
4446 }
4447 break;
4448 }
4449 return;
4450 }
4451
4452 static void
4453 bondport_mux_machine_collecting_distributing(bondport_ref p,
4454 LAEvent event,
4455 __unused void * event_data)
4456 {
4457 lacp_actor_partner_state s;
4458
4459 switch (event) {
4460 case LAEventStart:
4461 devtimer_cancel(p->po_wait_while_timer);
4462 if (g_bond->verbose) {
4463 timestamp_printf("[%s] Mux COLLECTING_DISTRIBUTING\n",
4464 bondport_get_name(p));
4465 }
4466 p->po_mux_state = MuxState_COLLECTING_DISTRIBUTING;
4467 bondport_enable_distributing(p);
4468 s = p->po_actor_state;
4469 s = lacp_actor_partner_state_set_collecting(s);
4470 s = lacp_actor_partner_state_set_distributing(s);
4471 p->po_actor_state = s;
4472 bondport_flags_set_ntt(p);
4473 /* FALL THROUGH */
4474 default:
4475 s = p->po_partner_state.ps_state;
4476 if (lacp_actor_partner_state_in_sync(s) == 0) {
4477 bondport_mux_machine_attached(p, LAEventStart, NULL);
4478 break;
4479 }
4480 switch (p->po_selected) {
4481 case SelectedState_UNSELECTED:
4482 case SelectedState_STANDBY:
4483 bondport_mux_machine_attached(p, LAEventStart, NULL);
4484 break;
4485 default:
4486 break;
4487 }
4488 break;
4489 }
4490 return;
4491 }