]> git.saurik.com Git - apple/xnu.git/blame_incremental - bsd/net/if_bond.c
xnu-1699.24.23.tar.gz
[apple/xnu.git] / bsd / net / if_bond.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2004-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*
30 * if_bond.c
31 * - bond/failover interface
32 * - implements IEEE 802.3ad Link Aggregation
33 */
34
35/*
36 * Modification History:
37 *
38 * April 29, 2004 Dieter Siegmund (dieter@apple.com)
39 * - created
40 */
41
42#include <sys/param.h>
43#include <sys/kernel.h>
44#include <sys/malloc.h>
45#include <sys/mbuf.h>
46#include <sys/queue.h>
47#include <sys/socket.h>
48#include <sys/sockio.h>
49#include <sys/sysctl.h>
50#include <sys/systm.h>
51#include <sys/kern_event.h>
52
53#include <net/bpf.h>
54#include <net/ethernet.h>
55#include <net/if.h>
56#include <net/kpi_interface.h>
57#include <net/if_arp.h>
58#include <net/if_dl.h>
59#include <net/if_ether.h>
60#include <net/if_types.h>
61#include <net/if_bond_var.h>
62#include <net/ieee8023ad.h>
63#include <net/lacp.h>
64#include <net/dlil.h>
65#include <sys/time.h>
66#include <net/devtimer.h>
67#include <net/if_vlan_var.h>
68#include <net/kpi_protocol.h>
69
70#include <kern/locks.h>
71#include <libkern/OSAtomic.h>
72
73#include <netinet/in.h>
74#include <netinet/if_ether.h>
75#include <netinet/in_systm.h>
76#include <netinet/ip.h>
77#include <netinet/ip6.h>
78
79#include <net/if_media.h>
80#include <net/multicast_list.h>
81
82static struct ether_addr slow_proto_multicast = {
83 IEEE8023AD_SLOW_PROTO_MULTICAST
84};
85
86#define BOND_MAXUNIT 128
87#define BONDNAME "bond"
88#define M_BOND M_DEVBUF
89
90#define EA_FORMAT "%x:%x:%x:%x:%x:%x"
91#define EA_CH(e, i) ((u_char)((u_char *)(e))[(i)])
92#define EA_LIST(ea) EA_CH(ea,0),EA_CH(ea,1),EA_CH(ea,2),EA_CH(ea,3),EA_CH(ea,4),EA_CH(ea,5)
93
94#define timestamp_printf printf
95
96/**
97 ** bond locks
98 **/
99static __inline__ lck_grp_t *
100my_lck_grp_alloc_init(const char * grp_name)
101{
102 lck_grp_t * grp;
103 lck_grp_attr_t * grp_attrs;
104
105 grp_attrs = lck_grp_attr_alloc_init();
106 grp = lck_grp_alloc_init(grp_name, grp_attrs);
107 lck_grp_attr_free(grp_attrs);
108 return (grp);
109}
110
111static __inline__ lck_mtx_t *
112my_lck_mtx_alloc_init(lck_grp_t * lck_grp)
113{
114 lck_attr_t * lck_attrs;
115 lck_mtx_t * lck_mtx;
116
117 lck_attrs = lck_attr_alloc_init();
118 lck_mtx = lck_mtx_alloc_init(lck_grp, lck_attrs);
119 lck_attr_free(lck_attrs);
120 return (lck_mtx);
121}
122
123static lck_mtx_t * bond_lck_mtx;
124
125static __inline__ void
126bond_lock_init(void)
127{
128 lck_grp_t * bond_lck_grp;
129
130 bond_lck_grp = my_lck_grp_alloc_init("if_bond");
131 bond_lck_mtx = my_lck_mtx_alloc_init(bond_lck_grp);
132}
133
134static __inline__ void
135bond_assert_lock_held(void)
136{
137 lck_mtx_assert(bond_lck_mtx, LCK_MTX_ASSERT_OWNED);
138 return;
139}
140
141static __inline__ void
142bond_assert_lock_not_held(void)
143{
144 lck_mtx_assert(bond_lck_mtx, LCK_MTX_ASSERT_NOTOWNED);
145 return;
146}
147
148static __inline__ void
149bond_lock(void)
150{
151 lck_mtx_lock(bond_lck_mtx);
152 return;
153}
154
155static __inline__ void
156bond_unlock(void)
157{
158 lck_mtx_unlock(bond_lck_mtx);
159 return;
160}
161
162/**
163 ** bond structures, types
164 **/
165
166struct LAG_info_s {
167 lacp_system li_system;
168 lacp_system_priority li_system_priority;
169 lacp_key li_key;
170};
171typedef struct LAG_info_s LAG_info, * LAG_info_ref;
172
173struct bondport_s;
174TAILQ_HEAD(port_list, bondport_s);
175struct ifbond_s;
176TAILQ_HEAD(ifbond_list, ifbond_s);
177struct LAG_s;
178TAILQ_HEAD(lag_list, LAG_s);
179
180typedef struct ifbond_s ifbond, * ifbond_ref;
181typedef struct bondport_s bondport, * bondport_ref;
182
183struct LAG_s {
184 TAILQ_ENTRY(LAG_s) lag_list;
185 struct port_list lag_port_list;
186 short lag_port_count;
187 short lag_selected_port_count;
188 int lag_active_media;
189 LAG_info lag_info;
190};
191typedef struct LAG_s LAG, * LAG_ref;
192
193typedef struct partner_state_s {
194 LAG_info ps_lag_info;
195 lacp_port ps_port;
196 lacp_port_priority ps_port_priority;
197 lacp_actor_partner_state ps_state;
198} partner_state, * partner_state_ref;
199
200struct ifbond_s {
201 TAILQ_ENTRY(ifbond_s) ifb_bond_list;
202 int ifb_flags;
203 SInt32 ifb_retain_count;
204 char ifb_name[IFNAMSIZ];
205 struct ifnet * ifb_ifp;
206 bpf_packet_func ifb_bpf_input;
207 bpf_packet_func ifb_bpf_output;
208 int ifb_altmtu;
209 struct port_list ifb_port_list;
210 short ifb_port_count;
211 struct lag_list ifb_lag_list;
212 lacp_key ifb_key;
213 short ifb_max_active; /* 0 == unlimited */
214 LAG_ref ifb_active_lag;
215 struct ifmultiaddr * ifb_ifma_slow_proto;
216 bondport_ref * ifb_distributing_array;
217 int ifb_distributing_count;
218 int ifb_last_link_event;
219 int ifb_mode; /* LACP, STATIC */
220};
221
222struct media_info {
223 int mi_active;
224 int mi_status;
225};
226
227enum {
228 ReceiveState_none = 0,
229 ReceiveState_INITIALIZE = 1,
230 ReceiveState_PORT_DISABLED = 2,
231 ReceiveState_EXPIRED = 3,
232 ReceiveState_LACP_DISABLED = 4,
233 ReceiveState_DEFAULTED = 5,
234 ReceiveState_CURRENT = 6,
235};
236
237typedef u_char ReceiveState;
238
239enum {
240 SelectedState_UNSELECTED = IF_BOND_STATUS_SELECTED_STATE_UNSELECTED,
241 SelectedState_SELECTED = IF_BOND_STATUS_SELECTED_STATE_SELECTED,
242 SelectedState_STANDBY = IF_BOND_STATUS_SELECTED_STATE_STANDBY
243};
244typedef u_char SelectedState;
245
246static __inline__ const char *
247SelectedStateString(SelectedState s)
248{
249 static const char * names[] = { "UNSELECTED", "SELECTED", "STANDBY" };
250
251 if (s <= SelectedState_STANDBY) {
252 return (names[s]);
253 }
254 return ("<unknown>");
255}
256
257enum {
258 MuxState_none = 0,
259 MuxState_DETACHED = 1,
260 MuxState_WAITING = 2,
261 MuxState_ATTACHED = 3,
262 MuxState_COLLECTING_DISTRIBUTING = 4,
263};
264
265typedef u_char MuxState;
266
267struct bondport_s {
268 TAILQ_ENTRY(bondport_s) po_port_list;
269 ifbond_ref po_bond;
270 struct multicast_list po_multicast;
271 struct ifnet * po_ifp;
272 struct ether_addr po_saved_addr;
273 int po_enabled;
274 char po_name[IFNAMSIZ];
275 struct ifdevmtu po_devmtu;
276
277 /* LACP */
278 TAILQ_ENTRY(bondport_s) po_lag_port_list;
279 devtimer_ref po_current_while_timer;
280 devtimer_ref po_periodic_timer;
281 devtimer_ref po_wait_while_timer;
282 devtimer_ref po_transmit_timer;
283 partner_state po_partner_state;
284 lacp_port_priority po_priority;
285 lacp_actor_partner_state po_actor_state;
286 u_char po_flags;
287 u_char po_periodic_interval;
288 u_char po_n_transmit;
289 ReceiveState po_receive_state;
290 MuxState po_mux_state;
291 SelectedState po_selected;
292 int32_t po_last_transmit_secs;
293 struct media_info po_media_info;
294 LAG_ref po_lag;
295};
296
297#define IFBF_PROMISC 0x1 /* promiscuous mode */
298#define IFBF_IF_DETACHING 0x2 /* interface is detaching */
299#define IFBF_LLADDR 0x4 /* specific link address requested */
300#define IFBF_CHANGE_IN_PROGRESS 0x8 /* interface add/remove in progress */
301
302static int bond_get_status(ifbond_ref ifb, struct if_bond_req * ibr_p,
303 user_addr_t datap);
304
305static __inline__ int
306ifbond_flags_promisc(ifbond_ref ifb)
307{
308 return ((ifb->ifb_flags & IFBF_PROMISC) != 0);
309}
310
311static __inline__ void
312ifbond_flags_set_promisc(ifbond_ref ifb)
313{
314 ifb->ifb_flags |= IFBF_PROMISC;
315 return;
316}
317
318static __inline__ void
319ifbond_flags_clear_promisc(ifbond_ref ifb)
320{
321 ifb->ifb_flags &= ~IFBF_PROMISC;
322 return;
323}
324
325static __inline__ int
326ifbond_flags_if_detaching(ifbond_ref ifb)
327{
328 return ((ifb->ifb_flags & IFBF_IF_DETACHING) != 0);
329}
330
331static __inline__ void
332ifbond_flags_set_if_detaching(ifbond_ref ifb)
333{
334 ifb->ifb_flags |= IFBF_IF_DETACHING;
335 return;
336}
337
338static __inline__ int
339ifbond_flags_lladdr(ifbond_ref ifb)
340{
341 return ((ifb->ifb_flags & IFBF_LLADDR) != 0);
342}
343
344static __inline__ void
345ifbond_flags_set_lladdr(ifbond_ref ifb)
346{
347 ifb->ifb_flags |= IFBF_LLADDR;
348 return;
349}
350
351static __inline__ void
352ifbond_flags_clear_lladdr(ifbond_ref ifb)
353{
354 ifb->ifb_flags &= ~IFBF_LLADDR;
355 return;
356}
357
358static __inline__ int
359ifbond_flags_change_in_progress(ifbond_ref ifb)
360{
361 return ((ifb->ifb_flags & IFBF_CHANGE_IN_PROGRESS) != 0);
362}
363
364static __inline__ void
365ifbond_flags_set_change_in_progress(ifbond_ref ifb)
366{
367 ifb->ifb_flags |= IFBF_CHANGE_IN_PROGRESS;
368 return;
369}
370
371static __inline__ void
372ifbond_flags_clear_change_in_progress(ifbond_ref ifb)
373{
374 ifb->ifb_flags &= ~IFBF_CHANGE_IN_PROGRESS;
375 return;
376}
377
378/*
379 * bondport_ref->po_flags bits
380 */
381#define BONDPORT_FLAGS_NTT 0x01
382#define BONDPORT_FLAGS_READY 0x02
383#define BONDPORT_FLAGS_SELECTED_CHANGED 0x04
384#define BONDPORT_FLAGS_MUX_ATTACHED 0x08
385#define BONDPORT_FLAGS_DISTRIBUTING 0x10
386#define BONDPORT_FLAGS_UNUSED2 0x20
387#define BONDPORT_FLAGS_UNUSED3 0x40
388#define BONDPORT_FLAGS_UNUSED4 0x80
389
390static __inline__ void
391bondport_flags_set_ntt(bondport_ref p)
392{
393 p->po_flags |= BONDPORT_FLAGS_NTT;
394 return;
395}
396
397static __inline__ void
398bondport_flags_clear_ntt(bondport_ref p)
399{
400 p->po_flags &= ~BONDPORT_FLAGS_NTT;
401 return;
402}
403
404static __inline__ int
405bondport_flags_ntt(bondport_ref p)
406{
407 return ((p->po_flags & BONDPORT_FLAGS_NTT) != 0);
408}
409
410static __inline__ void
411bondport_flags_set_ready(bondport_ref p)
412{
413 p->po_flags |= BONDPORT_FLAGS_READY;
414 return;
415}
416
417static __inline__ void
418bondport_flags_clear_ready(bondport_ref p)
419{
420 p->po_flags &= ~BONDPORT_FLAGS_READY;
421 return;
422}
423
424static __inline__ int
425bondport_flags_ready(bondport_ref p)
426{
427 return ((p->po_flags & BONDPORT_FLAGS_READY) != 0);
428}
429
430static __inline__ void
431bondport_flags_set_selected_changed(bondport_ref p)
432{
433 p->po_flags |= BONDPORT_FLAGS_SELECTED_CHANGED;
434 return;
435}
436
437static __inline__ void
438bondport_flags_clear_selected_changed(bondport_ref p)
439{
440 p->po_flags &= ~BONDPORT_FLAGS_SELECTED_CHANGED;
441 return;
442}
443
444static __inline__ int
445bondport_flags_selected_changed(bondport_ref p)
446{
447 return ((p->po_flags & BONDPORT_FLAGS_SELECTED_CHANGED) != 0);
448}
449
450static __inline__ void
451bondport_flags_set_mux_attached(bondport_ref p)
452{
453 p->po_flags |= BONDPORT_FLAGS_MUX_ATTACHED;
454 return;
455}
456
457static __inline__ void
458bondport_flags_clear_mux_attached(bondport_ref p)
459{
460 p->po_flags &= ~BONDPORT_FLAGS_MUX_ATTACHED;
461 return;
462}
463
464static __inline__ int
465bondport_flags_mux_attached(bondport_ref p)
466{
467 return ((p->po_flags & BONDPORT_FLAGS_MUX_ATTACHED) != 0);
468}
469
470static __inline__ void
471bondport_flags_set_distributing(bondport_ref p)
472{
473 p->po_flags |= BONDPORT_FLAGS_DISTRIBUTING;
474 return;
475}
476
477static __inline__ void
478bondport_flags_clear_distributing(bondport_ref p)
479{
480 p->po_flags &= ~BONDPORT_FLAGS_DISTRIBUTING;
481 return;
482}
483
484static __inline__ int
485bondport_flags_distributing(bondport_ref p)
486{
487 return ((p->po_flags & BONDPORT_FLAGS_DISTRIBUTING) != 0);
488}
489
490typedef struct bond_globals_s {
491 struct ifbond_list ifbond_list;
492 lacp_system system;
493 lacp_system_priority system_priority;
494 int verbose;
495} * bond_globals_ref;
496
497static bond_globals_ref g_bond;
498
499/**
500 ** packet_buffer routines
501 ** - thin wrapper for mbuf
502 **/
503
504typedef struct mbuf * packet_buffer_ref;
505
506static packet_buffer_ref
507packet_buffer_allocate(int length)
508{
509 packet_buffer_ref m;
510 int size;
511
512 /* leave room for ethernet header */
513 size = length + sizeof(struct ether_header);
514 if (size > (int)MHLEN) {
515 /* XXX doesn't handle large payloads */
516 printf("bond: packet_buffer_allocate size %d > max %u\n", size, MHLEN);
517 return (NULL);
518 }
519 m = m_gethdr(M_WAITOK, MT_DATA);
520 if (m == NULL) {
521 return (NULL);
522 }
523 m->m_len = size;
524 m->m_pkthdr.len = size;
525 return (m);
526}
527
528static void *
529packet_buffer_byteptr(packet_buffer_ref buf)
530{
531 return (buf->m_data + sizeof(struct ether_header));
532}
533
534typedef enum {
535 LAEventStart,
536 LAEventTimeout,
537 LAEventPacket,
538 LAEventMediaChange,
539 LAEventSelectedChange,
540 LAEventPortMoved,
541 LAEventReady
542} LAEvent;
543
544/**
545 ** Receive machine
546 **/
547static void
548bondport_receive_machine(bondport_ref p, LAEvent event,
549 void * event_data);
550/**
551 ** Periodic Transmission machine
552 **/
553static void
554bondport_periodic_transmit_machine(bondport_ref p, LAEvent event,
555 void * event_data);
556
557/**
558 ** Transmit machine
559 **/
560#define TRANSMIT_MACHINE_TX_IMMEDIATE ((void *)1)
561
562static void
563bondport_transmit_machine(bondport_ref p, LAEvent event,
564 void * event_data);
565
566/**
567 ** Mux machine
568 **/
569static void
570bondport_mux_machine(bondport_ref p, LAEvent event,
571 void * event_data);
572
573/**
574 ** bond, LAG
575 **/
576static void
577ifbond_activate_LAG(ifbond_ref bond, LAG_ref lag, int active_media);
578
579static void
580ifbond_deactivate_LAG(ifbond_ref bond, LAG_ref lag);
581
582static int
583ifbond_all_ports_ready(ifbond_ref bond);
584
585static LAG_ref
586ifbond_find_best_LAG(ifbond_ref bond, int * active_media);
587
588static int
589LAG_get_aggregatable_port_count(LAG_ref lag, int * active_media);
590
591static int
592ifbond_selection(ifbond_ref bond);
593
594
595/**
596 ** bondport
597 **/
598
599static void
600bondport_receive_lacpdu(bondport_ref p, lacpdu_ref in_lacpdu_p);
601
602static void
603bondport_slow_proto_transmit(bondport_ref p, packet_buffer_ref buf);
604
605static bondport_ref
606bondport_create(struct ifnet * port_ifp, lacp_port_priority priority,
607 int active, int short_timeout, int * error);
608static void
609bondport_start(bondport_ref p);
610
611static void
612bondport_free(bondport_ref p);
613
614static int
615bondport_aggregatable(bondport_ref p);
616
617static int
618bondport_remove_from_LAG(bondport_ref p);
619
620static void
621bondport_set_selected(bondport_ref p, SelectedState s);
622
623static int
624bondport_matches_LAG(bondport_ref p, LAG_ref lag);
625
626static void
627bondport_link_status_changed(bondport_ref p);
628
629static void
630bondport_enable_distributing(bondport_ref p);
631
632static void
633bondport_disable_distributing(bondport_ref p);
634
635static __inline__ int
636bondport_collecting(bondport_ref p)
637{
638 if (p->po_bond->ifb_mode == IF_BOND_MODE_LACP) {
639 return (lacp_actor_partner_state_collecting(p->po_actor_state));
640 }
641 return (TRUE);
642}
643
644/**
645 ** bond interface/dlil specific routines
646 **/
647static int bond_clone_create(struct if_clone *, u_int32_t, void *);
648static int bond_clone_destroy(struct ifnet *);
649static int bond_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t m,
650 char *frame_header);
651static int bond_output(struct ifnet *ifp, struct mbuf *m);
652static int bond_ioctl(struct ifnet *ifp, u_long cmd, void * addr);
653static int bond_set_bpf_tap(struct ifnet * ifp, bpf_tap_mode mode,
654 bpf_packet_func func);
655static int bond_attach_protocol(struct ifnet *ifp);
656static int bond_detach_protocol(struct ifnet *ifp);
657static int bond_setmulti(struct ifnet *ifp);
658static int bond_add_interface(struct ifnet * ifp, struct ifnet * port_ifp);
659static int bond_remove_interface(ifbond_ref ifb, struct ifnet * port_ifp);
660static void bond_if_free(struct ifnet * ifp);
661
662static struct if_clone bond_cloner = IF_CLONE_INITIALIZER(BONDNAME,
663 bond_clone_create,
664 bond_clone_destroy,
665 0,
666 BOND_MAXUNIT);
667static void interface_link_event(struct ifnet * ifp, u_int32_t event_code);
668
669static int
670siocsifmtu(struct ifnet * ifp, int mtu)
671{
672 struct ifreq ifr;
673
674 bzero(&ifr, sizeof(ifr));
675 ifr.ifr_mtu = mtu;
676 return (ifnet_ioctl(ifp, 0, SIOCSIFMTU, &ifr));
677}
678
679static int
680siocgifdevmtu(struct ifnet * ifp, struct ifdevmtu * ifdm_p)
681{
682 struct ifreq ifr;
683 int error;
684
685 bzero(&ifr, sizeof(ifr));
686 error = ifnet_ioctl(ifp, 0, SIOCGIFDEVMTU, &ifr);
687 if (error == 0) {
688 *ifdm_p = ifr.ifr_devmtu;
689 }
690 return (error);
691}
692
693static __inline__ void
694ether_addr_copy(void * dest, const void * source)
695{
696 bcopy(source, dest, ETHER_ADDR_LEN);
697 return;
698}
699
700static __inline__ void
701ifbond_retain(ifbond_ref ifb)
702{
703 OSIncrementAtomic(&ifb->ifb_retain_count);
704}
705
706static __inline__ void
707ifbond_release(ifbond_ref ifb)
708{
709 UInt32 old_retain_count;
710
711 old_retain_count = OSDecrementAtomic(&ifb->ifb_retain_count);
712 switch (old_retain_count) {
713 case 0:
714 panic("ifbond_release: retain count is 0\n");
715 break;
716 case 1:
717 if (g_bond->verbose) {
718 printf("ifbond_release(%s)\n", ifb->ifb_name);
719 }
720 if (ifb->ifb_ifma_slow_proto != NULL) {
721 if (g_bond->verbose) {
722 printf("ifbond_release(%s) removing multicast\n",
723 ifb->ifb_name);
724 }
725 (void) if_delmulti_anon(ifb->ifb_ifma_slow_proto->ifma_ifp,
726 ifb->ifb_ifma_slow_proto->ifma_addr);
727 IFMA_REMREF(ifb->ifb_ifma_slow_proto);
728 }
729 if (ifb->ifb_distributing_array != NULL) {
730 FREE(ifb->ifb_distributing_array, M_BOND);
731 }
732 FREE(ifb, M_BOND);
733 break;
734 default:
735 break;
736 }
737 return;
738}
739
740/*
741 * Function: ifbond_wait
742 * Purpose:
743 * Allows a single thread to gain exclusive access to the ifbond
744 * data structure. Some operations take a long time to complete,
745 * and some have side-effects that we can't predict. Holding the
746 * bond_lock() across such operations is not possible.
747 *
748 * For example:
749 * 1) The SIOCSIFLLADDR ioctl takes a long time (several seconds) to
750 * complete. Simply holding the bond_lock() would freeze all other
751 * data structure accesses during that time.
752 * 2) When we attach our protocol to the interface, a dlil event is
753 * generated and invokes our bond_event() function. bond_event()
754 * needs to take the bond_lock(), but we're already holding it, so
755 * we're deadlocked against ourselves.
756 * Notes:
757 * Before calling, you must be holding the bond_lock and have taken
758 * a reference on the ifbond_ref.
759 */
760static void
761ifbond_wait(ifbond_ref ifb, const char * msg)
762{
763 int waited = 0;
764
765 /* other add/remove in progress */
766 while (ifbond_flags_change_in_progress(ifb)) {
767 if (g_bond->verbose) {
768 printf("%s: %s msleep\n", ifb->ifb_name, msg);
769 }
770 waited = 1;
771 (void)msleep(ifb, bond_lck_mtx, PZERO, msg, 0);
772 }
773 /* prevent other bond list remove/add from taking place */
774 ifbond_flags_set_change_in_progress(ifb);
775 if (g_bond->verbose && waited) {
776 printf("%s: %s woke up\n", ifb->ifb_name, msg);
777 }
778 return;
779}
780
781/*
782 * Function: ifbond_signal
783 * Purpose:
784 * Allows the thread that previously invoked ifbond_wait() to
785 * give up exclusive access to the ifbond data structure, and wake up
786 * any other threads waiting to access
787 * Notes:
788 * Before calling, you must be holding the bond_lock and have taken
789 * a reference on the ifbond_ref.
790 */
791static void
792ifbond_signal(ifbond_ref ifb, const char * msg)
793{
794 ifbond_flags_clear_change_in_progress(ifb);
795 wakeup((caddr_t)ifb);
796 if (g_bond->verbose) {
797 printf("%s: %s wakeup\n", ifb->ifb_name, msg);
798 }
799 return;
800}
801
802/**
803 ** Media information
804 **/
805
806static int
807link_speed(int active)
808{
809 switch (IFM_SUBTYPE(active)) {
810 case IFM_10_T:
811 case IFM_10_2:
812 case IFM_10_5:
813 case IFM_10_STP:
814 case IFM_10_FL:
815 return (10);
816 case IFM_100_TX:
817 case IFM_100_FX:
818 case IFM_100_T4:
819 case IFM_100_VG:
820 case IFM_100_T2:
821 return (100);
822 case IFM_1000_SX:
823 case IFM_1000_LX:
824 case IFM_1000_CX:
825 case IFM_1000_TX:
826 return (1000);
827 case IFM_HPNA_1:
828 return (0);
829 default:
830 /* assume that new defined types are going to be at least 10GigE */
831 case IFM_10G_SR:
832 case IFM_10G_LR:
833 return (10000);
834 }
835}
836
837static __inline__ int
838media_active(const struct media_info * mi)
839{
840 if ((mi->mi_status & IFM_AVALID) == 0) {
841 return (1);
842 }
843 return ((mi->mi_status & IFM_ACTIVE) != 0);
844}
845
846static __inline__ int
847media_full_duplex(const struct media_info * mi)
848{
849 return ((mi->mi_active & IFM_FDX) != 0);
850}
851
852static __inline__ int
853media_speed(const struct media_info * mi)
854{
855 return (link_speed(mi->mi_active));
856}
857
858static struct media_info
859interface_media_info(struct ifnet * ifp)
860{
861 struct ifmediareq ifmr;
862 struct media_info mi;
863
864 bzero(&mi, sizeof(mi));
865 bzero(&ifmr, sizeof(ifmr));
866 if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
867 if (ifmr.ifm_count != 0) {
868 mi.mi_status = ifmr.ifm_status;
869 mi.mi_active = ifmr.ifm_active;
870 }
871 }
872 return (mi);
873}
874
875static int
876if_siflladdr(struct ifnet * ifp, const struct ether_addr * ea_p)
877{
878 struct ifreq ifr;
879
880 /*
881 * XXX setting the sa_len to ETHER_ADDR_LEN is wrong, but the driver
882 * currently expects it that way
883 */
884 ifr.ifr_addr.sa_family = AF_UNSPEC;
885 ifr.ifr_addr.sa_len = ETHER_ADDR_LEN;
886 ether_addr_copy(ifr.ifr_addr.sa_data, ea_p);
887 return (ifnet_ioctl(ifp, 0, SIOCSIFLLADDR, &ifr));
888}
889
890/**
891 ** bond_globals
892 **/
893static bond_globals_ref
894bond_globals_create(lacp_system_priority sys_pri,
895 lacp_system_ref sys)
896{
897 bond_globals_ref b;
898
899 b = _MALLOC(sizeof(*b), M_BOND, M_WAITOK);
900 if (b == NULL) {
901 return (NULL);
902 }
903 bzero(b, sizeof(*b));
904 TAILQ_INIT(&b->ifbond_list);
905 b->system = *sys;
906 b->system_priority = sys_pri;
907 return (b);
908}
909
910static int
911bond_globals_init(void)
912{
913 bond_globals_ref b;
914 int i;
915 struct ifnet * ifp;
916
917 bond_assert_lock_not_held();
918
919 if (g_bond != NULL) {
920 return (0);
921 }
922
923 /*
924 * use en0's ethernet address as the system identifier, and if it's not
925 * there, use en1 .. en3
926 */
927 ifp = NULL;
928 for (i = 0; i < 4; i++) {
929 char ifname[IFNAMSIZ+1];
930 snprintf(ifname, sizeof(ifname), "en%d", i);
931 ifp = ifunit(ifname);
932 if (ifp != NULL) {
933 break;
934 }
935 }
936 b = NULL;
937 if (ifp != NULL) {
938 b = bond_globals_create(0x8000, (lacp_system_ref)ifnet_lladdr(ifp));
939 }
940 bond_lock();
941 if (g_bond != NULL) {
942 bond_unlock();
943 _FREE(b, M_BOND);
944 return (0);
945 }
946 g_bond = b;
947 bond_unlock();
948 if (ifp == NULL) {
949 return (ENXIO);
950 }
951 if (b == NULL) {
952 return (ENOMEM);
953 }
954 return (0);
955}
956
957static void
958bond_bpf_vlan(struct ifnet * ifp, struct mbuf * m,
959 const struct ether_header * eh_p,
960 u_int16_t vlan_tag, bpf_packet_func func)
961{
962 struct ether_vlan_header * vlh_p;
963 struct mbuf * vl_m;
964
965 vl_m = m_get(M_DONTWAIT, MT_DATA);
966 if (vl_m == NULL) {
967 return;
968 }
969 /* populate a new mbuf containing the vlan ethernet header */
970 vl_m->m_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
971 vlh_p = mtod(vl_m, struct ether_vlan_header *);
972 bcopy(eh_p, vlh_p, offsetof(struct ether_header, ether_type));
973 vlh_p->evl_encap_proto = htons(ETHERTYPE_VLAN);
974 vlh_p->evl_tag = htons(vlan_tag);
975 vlh_p->evl_proto = eh_p->ether_type;
976 vl_m->m_next = m;
977 (*func)(ifp, vl_m);
978 vl_m->m_next = NULL;
979 m_free(vl_m);
980 return;
981}
982
983static __inline__ void
984bond_bpf_output(struct ifnet * ifp, struct mbuf * m,
985 bpf_packet_func func)
986{
987 if (func != NULL) {
988 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) {
989 const struct ether_header * eh_p;
990 eh_p = mtod(m, const struct ether_header *);
991 m->m_data += ETHER_HDR_LEN;
992 m->m_len -= ETHER_HDR_LEN;
993 bond_bpf_vlan(ifp, m, eh_p, m->m_pkthdr.vlan_tag, func);
994 m->m_data -= ETHER_HDR_LEN;
995 m->m_len += ETHER_HDR_LEN;
996 } else {
997 (*func)(ifp, m);
998 }
999 }
1000 return;
1001}
1002
1003static __inline__ void
1004bond_bpf_input(ifnet_t ifp, mbuf_t m, const struct ether_header * eh_p,
1005 bpf_packet_func func)
1006{
1007 if (func != NULL) {
1008 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) {
1009 bond_bpf_vlan(ifp, m, eh_p, m->m_pkthdr.vlan_tag, func);
1010 } else {
1011 /* restore the header */
1012 m->m_data -= ETHER_HDR_LEN;
1013 m->m_len += ETHER_HDR_LEN;
1014 (*func)(ifp, m);
1015 m->m_data += ETHER_HDR_LEN;
1016 m->m_len -= ETHER_HDR_LEN;
1017 }
1018 }
1019 return;
1020}
1021
1022/*
1023 * Function: bond_setmulti
1024 * Purpose:
1025 * Enable multicast reception on "our" interface by enabling multicasts on
1026 * each of the member ports.
1027 */
1028static int
1029bond_setmulti(struct ifnet * ifp)
1030{
1031 ifbond_ref ifb;
1032 int error;
1033 int result = 0;
1034 bondport_ref p;
1035
1036 bond_lock();
1037 ifb = ifnet_softc(ifp);
1038 if (ifb == NULL || ifbond_flags_if_detaching(ifb)
1039 || TAILQ_EMPTY(&ifb->ifb_port_list)) {
1040 bond_unlock();
1041 return (0);
1042 }
1043 ifbond_retain(ifb);
1044 ifbond_wait(ifb, "bond_setmulti");
1045
1046 if (ifbond_flags_if_detaching(ifb)) {
1047 /* someone destroyed the bond while we were waiting */
1048 result = EBUSY;
1049 goto signal_done;
1050 }
1051 bond_unlock();
1052
1053 /* ifbond_wait() let's us safely walk the list without holding the lock */
1054 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
1055 struct ifnet * port_ifp = p->po_ifp;
1056
1057 error = multicast_list_program(&p->po_multicast,
1058 ifp, port_ifp);
1059 if (error != 0) {
1060 printf("bond_setmulti(%s): "
1061 "multicast_list_program(%s%d) failed, %d\n",
1062 ifb->ifb_name, ifnet_name(port_ifp),
1063 ifnet_unit(port_ifp), error);
1064 result = error;
1065 }
1066 }
1067 bond_lock();
1068 signal_done:
1069 ifbond_signal(ifb, "bond_setmulti");
1070 bond_unlock();
1071 ifbond_release(ifb);
1072 return (result);
1073}
1074
1075static int
1076bond_clone_attach(void)
1077{
1078 int error;
1079
1080 if ((error = if_clone_attach(&bond_cloner)) != 0)
1081 return error;
1082 bond_lock_init();
1083 return 0;
1084}
1085
1086static int
1087ifbond_add_slow_proto_multicast(ifbond_ref ifb)
1088{
1089 int error;
1090 struct ifmultiaddr * ifma = NULL;
1091 struct sockaddr_dl sdl;
1092
1093 bond_assert_lock_not_held();
1094
1095 bzero(&sdl, sizeof(sdl));
1096 sdl.sdl_len = sizeof(sdl);
1097 sdl.sdl_family = AF_LINK;
1098 sdl.sdl_type = IFT_ETHER;
1099 sdl.sdl_nlen = 0;
1100 sdl.sdl_alen = sizeof(slow_proto_multicast);
1101 bcopy(&slow_proto_multicast, sdl.sdl_data, sizeof(slow_proto_multicast));
1102 error = if_addmulti_anon(ifb->ifb_ifp, (struct sockaddr *)&sdl, &ifma);
1103 if (error == 0) {
1104 ifb->ifb_ifma_slow_proto = ifma;
1105 }
1106 return (error);
1107}
1108
1109static int
1110bond_clone_create(struct if_clone * ifc, u_int32_t unit, __unused void *params)
1111{
1112 int error;
1113 ifbond_ref ifb;
1114 ifnet_t ifp;
1115 struct ifnet_init_params bond_init;
1116
1117 error = bond_globals_init();
1118 if (error != 0) {
1119 return (error);
1120 }
1121
1122 ifb = _MALLOC(sizeof(ifbond), M_BOND, M_WAITOK);
1123 if (ifb == NULL) {
1124 return (ENOMEM);
1125 }
1126 bzero(ifb, sizeof(*ifb));
1127
1128 ifbond_retain(ifb);
1129 TAILQ_INIT(&ifb->ifb_port_list);
1130 TAILQ_INIT(&ifb->ifb_lag_list);
1131 ifb->ifb_key = unit + 1;
1132
1133 /* use the interface name as the unique id for ifp recycle */
1134 if ((u_int32_t)snprintf(ifb->ifb_name, sizeof(ifb->ifb_name), "%s%d",
1135 ifc->ifc_name, unit) >= sizeof(ifb->ifb_name)) {
1136 ifbond_release(ifb);
1137 return (EINVAL);
1138 }
1139
1140 bzero(&bond_init, sizeof(bond_init));
1141 bond_init.uniqueid = ifb->ifb_name;
1142 bond_init.uniqueid_len = strlen(ifb->ifb_name);
1143 bond_init.name = ifc->ifc_name;
1144 bond_init.unit = unit;
1145 bond_init.family = IFNET_FAMILY_BOND;
1146 bond_init.type = IFT_IEEE8023ADLAG;
1147 bond_init.output = bond_output;
1148 bond_init.demux = ether_demux;
1149 bond_init.add_proto = ether_add_proto;
1150 bond_init.del_proto = ether_del_proto;
1151 bond_init.check_multi = ether_check_multi;
1152 bond_init.framer = ether_frameout;
1153 bond_init.ioctl = bond_ioctl;
1154 bond_init.set_bpf_tap = bond_set_bpf_tap;
1155 bond_init.detach = bond_if_free;
1156 bond_init.broadcast_addr = etherbroadcastaddr;
1157 bond_init.broadcast_len = ETHER_ADDR_LEN;
1158 bond_init.softc = ifb;
1159 error = ifnet_allocate(&bond_init, &ifp);
1160
1161 if (error) {
1162 ifbond_release(ifb);
1163 return (error);
1164 }
1165
1166 ifb->ifb_ifp = ifp;
1167 ifnet_set_offload(ifp, 0);
1168 ifnet_set_addrlen(ifp, ETHER_ADDR_LEN); /* XXX ethernet specific */
1169 ifnet_set_flags(ifp, IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX, 0xffff);
1170 ifnet_set_baudrate(ifp, 0);
1171 ifnet_set_mtu(ifp, 0);
1172
1173 error = ifnet_attach(ifp, NULL);
1174 if (error != 0) {
1175 ifnet_release(ifp);
1176 ifbond_release(ifb);
1177 return (error);
1178 }
1179 error = ifbond_add_slow_proto_multicast(ifb);
1180 if (error != 0) {
1181 printf("bond_clone_create(%s): "
1182 "failed to add slow_proto multicast, %d\n",
1183 ifb->ifb_name, error);
1184 }
1185
1186 /* attach as ethernet */
1187 bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header));
1188
1189 bond_lock();
1190 TAILQ_INSERT_HEAD(&g_bond->ifbond_list, ifb, ifb_bond_list);
1191 bond_unlock();
1192
1193 return (0);
1194}
1195
1196static void
1197bond_remove_all_interfaces(ifbond_ref ifb)
1198{
1199 bondport_ref p;
1200
1201 bond_assert_lock_held();
1202
1203 /*
1204 * do this in reverse order to avoid re-programming the mac address
1205 * as each head interface is removed
1206 */
1207 while ((p = TAILQ_LAST(&ifb->ifb_port_list, port_list)) != NULL) {
1208 bond_remove_interface(ifb, p->po_ifp);
1209 }
1210 return;
1211}
1212
1213static void
1214bond_remove(ifbond_ref ifb)
1215{
1216 bond_assert_lock_held();
1217 ifbond_flags_set_if_detaching(ifb);
1218 TAILQ_REMOVE(&g_bond->ifbond_list, ifb, ifb_bond_list);
1219 bond_remove_all_interfaces(ifb);
1220 return;
1221}
1222
1223static void
1224bond_if_detach(struct ifnet * ifp)
1225{
1226 int error;
1227
1228 error = ifnet_detach(ifp);
1229 if (error) {
1230 printf("bond_if_detach %s%d: ifnet_detach failed, %d\n",
1231 ifnet_name(ifp), ifnet_unit(ifp), error);
1232 }
1233
1234 return;
1235}
1236
1237static int
1238bond_clone_destroy(struct ifnet * ifp)
1239{
1240 ifbond_ref ifb;
1241
1242 bond_lock();
1243 ifb = ifnet_softc(ifp);
1244 if (ifb == NULL || ifnet_type(ifp) != IFT_IEEE8023ADLAG) {
1245 bond_unlock();
1246 return 0;
1247 }
1248 if (ifbond_flags_if_detaching(ifb)) {
1249 bond_unlock();
1250 return 0;
1251 }
1252 bond_remove(ifb);
1253 bond_unlock();
1254 bond_if_detach(ifp);
1255 return 0;
1256}
1257
1258static int
1259bond_set_bpf_tap(struct ifnet * ifp, bpf_tap_mode mode, bpf_packet_func func)
1260{
1261 ifbond_ref ifb;
1262
1263 bond_lock();
1264 ifb = ifnet_softc(ifp);
1265 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
1266 bond_unlock();
1267 return (ENODEV);
1268 }
1269 switch (mode) {
1270 case BPF_TAP_DISABLE:
1271 ifb->ifb_bpf_input = ifb->ifb_bpf_output = NULL;
1272 break;
1273
1274 case BPF_TAP_INPUT:
1275 ifb->ifb_bpf_input = func;
1276 break;
1277
1278 case BPF_TAP_OUTPUT:
1279 ifb->ifb_bpf_output = func;
1280 break;
1281
1282 case BPF_TAP_INPUT_OUTPUT:
1283 ifb->ifb_bpf_input = ifb->ifb_bpf_output = func;
1284 break;
1285 default:
1286 break;
1287 }
1288 bond_unlock();
1289 return 0;
1290}
1291
1292static uint32_t
1293ether_header_hash(struct ether_header * eh_p)
1294{
1295 uint32_t h;
1296
1297 /* get 32-bits from destination ether and ether type */
1298 h = (*((uint16_t *)&eh_p->ether_dhost[4]) << 16)
1299 | eh_p->ether_type;
1300 h ^= *((uint32_t *)&eh_p->ether_dhost[0]);
1301 return (h);
1302}
1303
1304static struct mbuf *
1305S_mbuf_skip_to_offset(struct mbuf * m, int32_t * offset)
1306{
1307 int len;
1308
1309 len = m->m_len;
1310 while (*offset >= len) {
1311 *offset -= len;
1312 m = m->m_next;
1313 if (m == NULL) {
1314 break;
1315 }
1316 len = m->m_len;
1317 }
1318 return (m);
1319}
1320
1321#if BYTE_ORDER == BIG_ENDIAN
1322static __inline__ uint32_t
1323make_uint32(u_char c0, u_char c1, u_char c2, u_char c3)
1324{
1325 return (((uint32_t)c0 << 24) | ((uint32_t)c1 << 16)
1326 | ((uint32_t)c2 << 8) | (uint32_t)c3);
1327}
1328#else /* BYTE_ORDER == LITTLE_ENDIAN */
1329static __inline__ uint32_t
1330make_uint32(u_char c0, u_char c1, u_char c2, u_char c3)
1331{
1332 return (((uint32_t)c3 << 24) | ((uint32_t)c2 << 16)
1333 | ((uint32_t)c1 << 8) | (uint32_t)c0);
1334}
1335#endif /* BYTE_ORDER == LITTLE_ENDIAN */
1336
1337static int
1338S_mbuf_copy_uint32(struct mbuf * m, int32_t offset, uint32_t * val)
1339{
1340 struct mbuf * current;
1341 u_char * current_data;
1342 struct mbuf * next;
1343 u_char * next_data;
1344 int space_current;
1345
1346 current = S_mbuf_skip_to_offset(m, &offset);
1347 if (current == NULL) {
1348 return (1);
1349 }
1350 current_data = mtod(current, u_char *) + offset;
1351 space_current = current->m_len - offset;
1352 if (space_current >= (int)sizeof(uint32_t)) {
1353 *val = *((uint32_t *)current_data);
1354 return (0);
1355 }
1356 next = current->m_next;
1357 if (next == NULL || (next->m_len + space_current) < (int)sizeof(uint32_t)) {
1358 return (1);
1359 }
1360 next_data = mtod(next, u_char *);
1361 switch (space_current) {
1362 case 1:
1363 *val = make_uint32(current_data[0], next_data[0],
1364 next_data[1], next_data[2]);
1365 break;
1366 case 2:
1367 *val = make_uint32(current_data[0], current_data[1],
1368 next_data[0], next_data[1]);
1369 break;
1370 default:
1371 *val = make_uint32(current_data[0], current_data[1],
1372 current_data[2], next_data[0]);
1373 break;
1374 }
1375 return (0);
1376}
1377
1378#define IP_SRC_OFFSET (offsetof(struct ip, ip_src) - offsetof(struct ip, ip_p))
1379#define IP_DST_OFFSET (offsetof(struct ip, ip_dst) - offsetof(struct ip, ip_p))
1380
1381static uint32_t
1382ip_header_hash(struct mbuf * m)
1383{
1384 u_char * data;
1385 struct in_addr ip_dst;
1386 struct in_addr ip_src;
1387 u_char ip_p;
1388 int32_t offset;
1389 struct mbuf * orig_m = m;
1390
1391 /* find the IP protocol field relative to the start of the packet */
1392 offset = offsetof(struct ip, ip_p) + sizeof(struct ether_header);
1393 m = S_mbuf_skip_to_offset(m, &offset);
1394 if (m == NULL || m->m_len < 1) {
1395 goto bad_ip_packet;
1396 }
1397 data = mtod(m, u_char *) + offset;
1398 ip_p = *data;
1399
1400 /* find the IP src relative to the IP protocol */
1401 if ((m->m_len - offset)
1402 >= (int)(IP_SRC_OFFSET + sizeof(struct in_addr) * 2)) {
1403 /* this should be the normal case */
1404 ip_src = *(struct in_addr *)(data + IP_SRC_OFFSET);
1405 ip_dst = *(struct in_addr *)(data + IP_DST_OFFSET);
1406 }
1407 else {
1408 if (S_mbuf_copy_uint32(m, offset + IP_SRC_OFFSET,
1409 (uint32_t *)&ip_src.s_addr)) {
1410 goto bad_ip_packet;
1411 }
1412 if (S_mbuf_copy_uint32(m, offset + IP_DST_OFFSET,
1413 (uint32_t *)&ip_dst.s_addr)) {
1414 goto bad_ip_packet;
1415 }
1416 }
1417 return (ntohl(ip_dst.s_addr) ^ ntohl(ip_src.s_addr) ^ ((uint32_t)ip_p));
1418
1419 bad_ip_packet:
1420 return (ether_header_hash(mtod(orig_m, struct ether_header *)));
1421}
1422
1423#define IP6_ADDRS_LEN (sizeof(struct in6_addr) * 2)
1424static uint32_t
1425ipv6_header_hash(struct mbuf * m)
1426{
1427 u_char * data;
1428 int i;
1429 int32_t offset;
1430 struct mbuf * orig_m = m;
1431 uint32_t * scan;
1432 uint32_t val;
1433
1434 /* find the IP protocol field relative to the start of the packet */
1435 offset = offsetof(struct ip6_hdr, ip6_src) + sizeof(struct ether_header);
1436 m = S_mbuf_skip_to_offset(m, &offset);
1437 if (m == NULL) {
1438 goto bad_ipv6_packet;
1439 }
1440 data = mtod(m, u_char *) + offset;
1441 val = 0;
1442 if ((m->m_len - offset) >= (int)IP6_ADDRS_LEN) {
1443 /* this should be the normal case */
1444 for (i = 0, scan = (uint32_t *)data;
1445 i < (int)(IP6_ADDRS_LEN / sizeof(uint32_t));
1446 i++, scan++) {
1447 val ^= *scan;
1448 }
1449 }
1450 else {
1451 for (i = 0; i < (int)(IP6_ADDRS_LEN / sizeof(uint32_t)); i++) {
1452 uint32_t tmp;
1453 if (S_mbuf_copy_uint32(m, offset + i * sizeof(uint32_t),
1454 (uint32_t *)&tmp)) {
1455 goto bad_ipv6_packet;
1456 }
1457 val ^= tmp;
1458 }
1459 }
1460 return (ntohl(val));
1461
1462 bad_ipv6_packet:
1463 return (ether_header_hash(mtod(orig_m, struct ether_header *)));
1464}
1465
1466static int
1467bond_output(struct ifnet * ifp, struct mbuf * m)
1468{
1469 bpf_packet_func bpf_func;
1470 uint32_t h;
1471 ifbond_ref ifb;
1472 struct ifnet * port_ifp = NULL;
1473
1474 if (m == 0) {
1475 return (0);
1476 }
1477 if ((m->m_flags & M_PKTHDR) == 0) {
1478 m_freem(m);
1479 return (0);
1480 }
1481 if (m->m_pkthdr.socket_id != 0) {
1482 h = m->m_pkthdr.socket_id;
1483 }
1484 else {
1485 struct ether_header * eh_p;
1486
1487 eh_p = mtod(m, struct ether_header *);
1488 switch (ntohs(eh_p->ether_type)) {
1489 case ETHERTYPE_IP:
1490 h = ip_header_hash(m);
1491 break;
1492 case ETHERTYPE_IPV6:
1493 h = ipv6_header_hash(m);
1494 break;
1495 default:
1496 h = ether_header_hash(eh_p);
1497 break;
1498 }
1499 }
1500 bond_lock();
1501 ifb = ifnet_softc(ifp);
1502 if (ifb == NULL || ifbond_flags_if_detaching(ifb)
1503 || ifb->ifb_distributing_count == 0) {
1504 goto done;
1505 }
1506 h %= ifb->ifb_distributing_count;
1507 port_ifp = ifb->ifb_distributing_array[h]->po_ifp;
1508 bpf_func = ifb->ifb_bpf_output;
1509 bond_unlock();
1510
1511 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) {
1512 (void)ifnet_stat_increment_out(ifp, 1,
1513 m->m_pkthdr.len + ETHER_VLAN_ENCAP_LEN,
1514 0);
1515 } else {
1516 (void)ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
1517 }
1518 bond_bpf_output(ifp, m, bpf_func);
1519
1520 return (ifnet_output_raw(port_ifp, PF_BOND, m));
1521
1522 done:
1523 bond_unlock();
1524 m_freem(m);
1525 return (0);
1526}
1527
1528static bondport_ref
1529ifbond_lookup_port(ifbond_ref ifb, struct ifnet * port_ifp)
1530{
1531 bondport_ref p;
1532 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
1533 if (p->po_ifp == port_ifp) {
1534 return (p);
1535 }
1536 }
1537 return (NULL);
1538}
1539
1540static bondport_ref
1541bond_lookup_port(struct ifnet * port_ifp)
1542{
1543 ifbond_ref ifb;
1544 bondport_ref port;
1545
1546 TAILQ_FOREACH(ifb, &g_bond->ifbond_list, ifb_bond_list) {
1547 port = ifbond_lookup_port(ifb, port_ifp);
1548 if (port != NULL) {
1549 return (port);
1550 }
1551 }
1552 return (NULL);
1553}
1554
1555static void
1556bond_receive_lacpdu(struct mbuf * m, struct ifnet * port_ifp)
1557{
1558 struct ifnet * bond_ifp = NULL;
1559 ifbond_ref ifb;
1560 int event_code = 0;
1561 bondport_ref p;
1562
1563 bond_lock();
1564 if ((ifnet_eflags(port_ifp) & IFEF_BOND) == 0) {
1565 goto done;
1566 }
1567 p = bond_lookup_port(port_ifp);
1568 if (p == NULL) {
1569 goto done;
1570 }
1571 if (p->po_enabled == 0) {
1572 goto done;
1573 }
1574 ifb = p->po_bond;
1575 if (ifb->ifb_mode != IF_BOND_MODE_LACP) {
1576 goto done;
1577 }
1578 bondport_receive_lacpdu(p, (lacpdu_ref)m->m_data);
1579 if (ifbond_selection(ifb)) {
1580 event_code = (ifb->ifb_active_lag == NULL)
1581 ? KEV_DL_LINK_OFF
1582 : KEV_DL_LINK_ON;
1583 /* XXX need to take a reference on bond_ifp */
1584 bond_ifp = ifb->ifb_ifp;
1585 ifb->ifb_last_link_event = event_code;
1586 }
1587 else {
1588 event_code = (ifb->ifb_active_lag == NULL)
1589 ? KEV_DL_LINK_OFF
1590 : KEV_DL_LINK_ON;
1591 if (event_code != ifb->ifb_last_link_event) {
1592 if (g_bond->verbose) {
1593 timestamp_printf("%s: (receive) generating LINK event\n",
1594 ifb->ifb_name);
1595 }
1596 bond_ifp = ifb->ifb_ifp;
1597 ifb->ifb_last_link_event = event_code;
1598 }
1599 }
1600
1601 done:
1602 bond_unlock();
1603 if (bond_ifp != NULL) {
1604 interface_link_event(bond_ifp, event_code);
1605 }
1606 m_freem(m);
1607 return;
1608}
1609
1610static void
1611bond_receive_la_marker_pdu(struct mbuf * m, struct ifnet * port_ifp)
1612{
1613 la_marker_pdu_ref marker_p;
1614 bondport_ref p;
1615
1616 marker_p = (la_marker_pdu_ref)(m->m_data + ETHER_HDR_LEN);
1617 if (marker_p->lm_marker_tlv_type != LA_MARKER_TLV_TYPE_MARKER) {
1618 goto failed;
1619 }
1620 bond_lock();
1621 if ((ifnet_eflags(port_ifp) & IFEF_BOND) == 0) {
1622 bond_unlock();
1623 goto failed;
1624 }
1625 p = bond_lookup_port(port_ifp);
1626 if (p == NULL || p->po_enabled == 0
1627 || p->po_bond->ifb_mode != IF_BOND_MODE_LACP) {
1628 bond_unlock();
1629 goto failed;
1630 }
1631 /* echo back the same packet as a marker response */
1632 marker_p->lm_marker_tlv_type = LA_MARKER_TLV_TYPE_MARKER_RESPONSE;
1633 bondport_slow_proto_transmit(p, (packet_buffer_ref)m);
1634 bond_unlock();
1635 return;
1636
1637 failed:
1638 m_freem(m);
1639 return;
1640}
1641
1642static int
1643bond_input(ifnet_t port_ifp, __unused protocol_family_t protocol, mbuf_t m,
1644 char * frame_header)
1645{
1646 bpf_packet_func bpf_func;
1647 const struct ether_header * eh_p;
1648 ifbond_ref ifb;
1649 struct ifnet * ifp;
1650 bondport_ref p;
1651
1652 eh_p = (const struct ether_header *)frame_header;
1653 if ((m->m_flags & M_MCAST) != 0
1654 && bcmp(eh_p->ether_dhost, &slow_proto_multicast,
1655 sizeof(eh_p->ether_dhost)) == 0
1656 && ntohs(eh_p->ether_type) == IEEE8023AD_SLOW_PROTO_ETHERTYPE) {
1657 u_char subtype = *mtod(m, u_char *);
1658
1659 if (subtype == IEEE8023AD_SLOW_PROTO_SUBTYPE_LACP) {
1660 if (m->m_pkthdr.len < (int)offsetof(lacpdu, la_reserved)) {
1661 m_freem(m);
1662 return (0);
1663 }
1664 /* send to lacp */
1665 if (m->m_len < (int)offsetof(lacpdu, la_reserved)) {
1666 m = m_pullup(m, offsetof(lacpdu, la_reserved));
1667 if (m == NULL) {
1668 return (0);
1669 }
1670 }
1671 bond_receive_lacpdu(m, port_ifp);
1672 return (0);
1673 }
1674 else if (subtype == IEEE8023AD_SLOW_PROTO_SUBTYPE_LA_MARKER_PROTOCOL) {
1675 int min_size;
1676
1677 /* restore the ethernet header pointer in the mbuf */
1678 m->m_pkthdr.len += ETHER_HDR_LEN;
1679 m->m_data -= ETHER_HDR_LEN;
1680 m->m_len += ETHER_HDR_LEN;
1681 min_size = ETHER_HDR_LEN + offsetof(la_marker_pdu, lm_reserved);
1682 if (m->m_pkthdr.len < min_size) {
1683 m_freem(m);
1684 return (0);
1685 }
1686 /* send to lacp */
1687 if (m->m_len < min_size) {
1688 m = m_pullup(m, min_size);
1689 if (m == NULL) {
1690 return (0);
1691 }
1692 }
1693 /* send to marker responder */
1694 bond_receive_la_marker_pdu(m, port_ifp);
1695 return (0);
1696 }
1697 else if (subtype == 0
1698 || subtype > IEEE8023AD_SLOW_PROTO_SUBTYPE_RESERVED_END) {
1699 /* invalid subtype, discard the frame */
1700 m_freem(m);
1701 return (0);
1702 }
1703 }
1704 bond_lock();
1705 if ((ifnet_eflags(port_ifp) & IFEF_BOND) == 0) {
1706 goto done;
1707 }
1708 p = bond_lookup_port(port_ifp);
1709 if (p == NULL || bondport_collecting(p) == 0) {
1710 goto done;
1711 }
1712
1713 /* make the packet appear as if it arrived on the bonded interface */
1714 ifb = p->po_bond;
1715 ifp = ifb->ifb_ifp;
1716 bpf_func = ifb->ifb_bpf_input;
1717 bond_unlock();
1718
1719 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) {
1720 (void)ifnet_stat_increment_in(ifp, 1,
1721 (m->m_pkthdr.len + ETHER_HDR_LEN
1722 + ETHER_VLAN_ENCAP_LEN), 0);
1723 }
1724 else {
1725 (void)ifnet_stat_increment_in(ifp, 1,
1726 (m->m_pkthdr.len + ETHER_HDR_LEN), 0);
1727 }
1728 m->m_pkthdr.rcvif = ifp;
1729 bond_bpf_input(ifp, m, eh_p, bpf_func);
1730 m->m_pkthdr.header = frame_header;
1731 dlil_input_packet_list(ifp, m);
1732 return 0;
1733
1734 done:
1735 bond_unlock();
1736 m_freem(m);
1737 return (0);
1738}
1739
1740static __inline__ const char *
1741bondport_get_name(bondport_ref p)
1742{
1743 return (p->po_name);
1744}
1745
1746static __inline__ int
1747bondport_get_index(bondport_ref p)
1748{
1749 return (ifnet_index(p->po_ifp));
1750}
1751
1752static void
1753bondport_slow_proto_transmit(bondport_ref p, packet_buffer_ref buf)
1754{
1755 struct ether_header * eh_p;
1756 int error;
1757
1758 /* packet_buffer_allocate leaves room for ethernet header */
1759 eh_p = mtod(buf, struct ether_header *);
1760 bcopy(&slow_proto_multicast, &eh_p->ether_dhost, sizeof(eh_p->ether_dhost));
1761 bcopy(&p->po_saved_addr, eh_p->ether_shost, sizeof(eh_p->ether_shost));
1762 eh_p->ether_type = htons(IEEE8023AD_SLOW_PROTO_ETHERTYPE);
1763 error = ifnet_output_raw(p->po_ifp, PF_BOND, buf);
1764 if (error != 0) {
1765 printf("bondport_slow_proto_transmit(%s) failed %d\n",
1766 bondport_get_name(p), error);
1767 }
1768 return;
1769}
1770
1771static void
1772bondport_timer_process_func(devtimer_ref timer,
1773 devtimer_process_func_event event)
1774{
1775 bondport_ref p;
1776
1777 switch (event) {
1778 case devtimer_process_func_event_lock:
1779 bond_lock();
1780 devtimer_retain(timer);
1781 break;
1782 case devtimer_process_func_event_unlock:
1783 if (devtimer_valid(timer)) {
1784 /* as long as the devtimer is valid, we can look at arg0 */
1785 int event_code = 0;
1786 struct ifnet * bond_ifp = NULL;
1787
1788 p = (bondport_ref)devtimer_arg0(timer);
1789 if (ifbond_selection(p->po_bond)) {
1790 event_code = (p->po_bond->ifb_active_lag == NULL)
1791 ? KEV_DL_LINK_OFF
1792 : KEV_DL_LINK_ON;
1793 /* XXX need to take a reference on bond_ifp */
1794 bond_ifp = p->po_bond->ifb_ifp;
1795 p->po_bond->ifb_last_link_event = event_code;
1796 }
1797 else {
1798 event_code = (p->po_bond->ifb_active_lag == NULL)
1799 ? KEV_DL_LINK_OFF
1800 : KEV_DL_LINK_ON;
1801 if (event_code != p->po_bond->ifb_last_link_event) {
1802 if (g_bond->verbose) {
1803 timestamp_printf("%s: (timer) generating LINK event\n",
1804 p->po_bond->ifb_name);
1805 }
1806 bond_ifp = p->po_bond->ifb_ifp;
1807 p->po_bond->ifb_last_link_event = event_code;
1808 }
1809 }
1810 devtimer_release(timer);
1811 bond_unlock();
1812 if (bond_ifp != NULL) {
1813 interface_link_event(bond_ifp, event_code);
1814 }
1815 }
1816 else {
1817 /* timer is going away */
1818 devtimer_release(timer);
1819 bond_unlock();
1820 }
1821 break;
1822 default:
1823 break;
1824 }
1825}
1826
1827static bondport_ref
1828bondport_create(struct ifnet * port_ifp, lacp_port_priority priority,
1829 int active, int short_timeout, int * ret_error)
1830{
1831 int error = 0;
1832 bondport_ref p = NULL;
1833 lacp_actor_partner_state s;
1834
1835 *ret_error = 0;
1836 p = _MALLOC(sizeof(*p), M_BOND, M_WAITOK);
1837 if (p == NULL) {
1838 *ret_error = ENOMEM;
1839 return (NULL);
1840 }
1841 bzero(p, sizeof(*p));
1842 multicast_list_init(&p->po_multicast);
1843 if ((u_int32_t)snprintf(p->po_name, sizeof(p->po_name), "%s%d",
1844 ifnet_name(port_ifp), ifnet_unit(port_ifp))
1845 >= sizeof(p->po_name)) {
1846 printf("if_bond: name too large\n");
1847 *ret_error = EINVAL;
1848 goto failed;
1849 }
1850 error = siocgifdevmtu(port_ifp, &p->po_devmtu);
1851 if (error != 0) {
1852 printf("if_bond: SIOCGIFDEVMTU %s failed, %d\n",
1853 bondport_get_name(p), error);
1854 goto failed;
1855 }
1856 /* remember the current interface MTU so it can be restored */
1857 p->po_devmtu.ifdm_current = ifnet_mtu(port_ifp);
1858 p->po_ifp = port_ifp;
1859 p->po_media_info = interface_media_info(port_ifp);
1860 p->po_current_while_timer = devtimer_create(bondport_timer_process_func, p);
1861 if (p->po_current_while_timer == NULL) {
1862 *ret_error = ENOMEM;
1863 goto failed;
1864 }
1865 p->po_periodic_timer = devtimer_create(bondport_timer_process_func, p);
1866 if (p->po_periodic_timer == NULL) {
1867 *ret_error = ENOMEM;
1868 goto failed;
1869 }
1870 p->po_wait_while_timer = devtimer_create(bondport_timer_process_func, p);
1871 if (p->po_wait_while_timer == NULL) {
1872 *ret_error = ENOMEM;
1873 goto failed;
1874 }
1875 p->po_transmit_timer = devtimer_create(bondport_timer_process_func, p);
1876 if (p->po_transmit_timer == NULL) {
1877 *ret_error = ENOMEM;
1878 goto failed;
1879 }
1880 p->po_receive_state = ReceiveState_none;
1881 p->po_mux_state = MuxState_none;
1882 p->po_priority = priority;
1883 s = 0;
1884 s = lacp_actor_partner_state_set_aggregatable(s);
1885 if (short_timeout) {
1886 s = lacp_actor_partner_state_set_short_timeout(s);
1887 }
1888 if (active) {
1889 s = lacp_actor_partner_state_set_active_lacp(s);
1890 }
1891 p->po_actor_state = s;
1892 return (p);
1893
1894 failed:
1895 bondport_free(p);
1896 return (NULL);
1897}
1898
1899static void
1900bondport_start(bondport_ref p)
1901{
1902 bondport_receive_machine(p, LAEventStart, NULL);
1903 bondport_mux_machine(p, LAEventStart, NULL);
1904 bondport_periodic_transmit_machine(p, LAEventStart, NULL);
1905 bondport_transmit_machine(p, LAEventStart, NULL);
1906 return;
1907}
1908
1909/*
1910 * Function: bondport_invalidate_timers
1911 * Purpose:
1912 * Invalidate all of the timers for the bondport.
1913 */
1914static void
1915bondport_invalidate_timers(bondport_ref p)
1916{
1917 devtimer_invalidate(p->po_current_while_timer);
1918 devtimer_invalidate(p->po_periodic_timer);
1919 devtimer_invalidate(p->po_wait_while_timer);
1920 devtimer_invalidate(p->po_transmit_timer);
1921}
1922
1923/*
1924 * Function: bondport_cancel_timers
1925 * Purpose:
1926 * Cancel all of the timers for the bondport.
1927 */
1928static void
1929bondport_cancel_timers(bondport_ref p)
1930{
1931 devtimer_cancel(p->po_current_while_timer);
1932 devtimer_cancel(p->po_periodic_timer);
1933 devtimer_cancel(p->po_wait_while_timer);
1934 devtimer_cancel(p->po_transmit_timer);
1935}
1936
1937static void
1938bondport_free(bondport_ref p)
1939{
1940 multicast_list_remove(&p->po_multicast);
1941 devtimer_release(p->po_current_while_timer);
1942 devtimer_release(p->po_periodic_timer);
1943 devtimer_release(p->po_wait_while_timer);
1944 devtimer_release(p->po_transmit_timer);
1945 FREE(p, M_BOND);
1946 return;
1947}
1948
1949#define BOND_ADD_PROGRESS_IN_LIST 0x1
1950#define BOND_ADD_PROGRESS_PROTO_ATTACHED 0x2
1951#define BOND_ADD_PROGRESS_LLADDR_SET 0x4
1952#define BOND_ADD_PROGRESS_MTU_SET 0x8
1953
1954static __inline__ int
1955bond_device_mtu(struct ifnet * ifp, ifbond_ref ifb)
1956{
1957 return (((int)ifnet_mtu(ifp) > ifb->ifb_altmtu)
1958 ? (int)ifnet_mtu(ifp) : ifb->ifb_altmtu);
1959}
1960
1961static int
1962bond_add_interface(struct ifnet * ifp, struct ifnet * port_ifp)
1963{
1964 int devmtu;
1965 int error = 0;
1966 int event_code = 0;
1967 int first = FALSE;
1968 ifbond_ref ifb;
1969 bondport_ref * new_array = NULL;
1970 bondport_ref * old_array = NULL;
1971 bondport_ref p;
1972 int progress = 0;
1973
1974 /* pre-allocate space for new port */
1975 p = bondport_create(port_ifp, 0x8000, 1, 0, &error);
1976 if (p == NULL) {
1977 return (error);
1978 }
1979 bond_lock();
1980 ifb = (ifbond_ref)ifnet_softc(ifp);
1981 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
1982 bond_unlock();
1983 bondport_free(p);
1984 return ((ifb == NULL ? EOPNOTSUPP : EBUSY));
1985 }
1986
1987 /* make sure this interface can handle our current MTU */
1988 devmtu = bond_device_mtu(ifp, ifb);
1989 if (devmtu != 0
1990 && (devmtu > p->po_devmtu.ifdm_max || devmtu < p->po_devmtu.ifdm_min)) {
1991 bond_unlock();
1992 printf("if_bond: interface %s doesn't support mtu %d",
1993 bondport_get_name(p), devmtu);
1994 bondport_free(p);
1995 return (EINVAL);
1996 }
1997
1998 /* make sure ifb doesn't get de-allocated while we wait */
1999 ifbond_retain(ifb);
2000
2001 /* wait for other add or remove to complete */
2002 ifbond_wait(ifb, "bond_add_interface");
2003
2004 if (ifbond_flags_if_detaching(ifb)) {
2005 /* someone destroyed the bond while we were waiting */
2006 error = EBUSY;
2007 goto signal_done;
2008 }
2009 if (bond_lookup_port(port_ifp) != NULL) {
2010 /* port is already part of a bond */
2011 error = EBUSY;
2012 goto signal_done;
2013 }
2014 ifnet_lock_exclusive(port_ifp);
2015 if ((ifnet_eflags(port_ifp) & (IFEF_VLAN | IFEF_BOND)) != 0) {
2016 /* interface already has VLAN's, or is part of bond */
2017 ifnet_lock_done(port_ifp);
2018 error = EBUSY;
2019 goto signal_done;
2020 }
2021
2022 /* mark the interface busy */
2023 /* can't use ifnet_set_eflags because that takes the lock */
2024 port_ifp->if_eflags |= IFEF_BOND;
2025 ifnet_lock_done(port_ifp);
2026
2027 if (TAILQ_EMPTY(&ifb->ifb_port_list)) {
2028 ifnet_set_offload(ifp, ifnet_offload(port_ifp));
2029 ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
2030 if (ifbond_flags_lladdr(ifb) == FALSE) {
2031 first = TRUE;
2032 }
2033 } else {
2034 ifnet_offload_t ifp_offload;
2035 ifnet_offload_t port_ifp_offload;
2036
2037 ifp_offload = ifnet_offload(ifp);
2038 port_ifp_offload = ifnet_offload(port_ifp);
2039 if (ifp_offload != port_ifp_offload) {
2040 ifnet_offload_t offload;
2041
2042 offload = ifp_offload & port_ifp_offload;
2043 printf("bond_add_interface(%s, %s) "
2044 "hwassist values don't match 0x%x != 0x%x, using 0x%x instead\n",
2045 ifb->ifb_name, bondport_get_name(p),
2046 ifp_offload, port_ifp_offload, offload);
2047 /*
2048 * XXX
2049 * if the bond has VLAN's, we can't simply change the hwassist
2050 * field behind its back: this needs work
2051 */
2052 ifnet_set_offload(ifp, offload);
2053 }
2054 }
2055 p->po_bond = ifb;
2056
2057 /* remember the port's ethernet address so it can be restored */
2058 ether_addr_copy(&p->po_saved_addr, ifnet_lladdr(port_ifp));
2059
2060 /* add it to the list of ports */
2061 TAILQ_INSERT_TAIL(&ifb->ifb_port_list, p, po_port_list);
2062 ifb->ifb_port_count++;
2063
2064 /* set the default MTU */
2065 if (ifnet_mtu(ifp) == 0) {
2066 ifnet_set_mtu(ifp, ETHERMTU);
2067 }
2068 bond_unlock();
2069
2070
2071 /* first port added to bond determines bond's ethernet address */
2072 if (first) {
2073 ifnet_set_lladdr_and_type(ifp, ifnet_lladdr(port_ifp), ETHER_ADDR_LEN,
2074 IFT_ETHER);
2075 }
2076
2077 progress |= BOND_ADD_PROGRESS_IN_LIST;
2078
2079 /* allocate a larger distributing array */
2080 new_array = (bondport_ref *)
2081 _MALLOC(sizeof(*new_array) * ifb->ifb_port_count, M_BOND, M_WAITOK);
2082 if (new_array == NULL) {
2083 error = ENOMEM;
2084 goto failed;
2085 }
2086
2087 /* attach our BOND "protocol" to the interface */
2088 error = bond_attach_protocol(port_ifp);
2089 if (error) {
2090 goto failed;
2091 }
2092 progress |= BOND_ADD_PROGRESS_PROTO_ATTACHED;
2093
2094 /* set the interface MTU */
2095 devmtu = bond_device_mtu(ifp, ifb);
2096 error = siocsifmtu(port_ifp, devmtu);
2097 if (error != 0) {
2098 printf("bond_add_interface(%s, %s):"
2099 " SIOCSIFMTU %d failed %d\n",
2100 ifb->ifb_name, bondport_get_name(p), devmtu, error);
2101 goto failed;
2102 }
2103 progress |= BOND_ADD_PROGRESS_MTU_SET;
2104
2105 /* program the port with our multicast addresses */
2106 error = multicast_list_program(&p->po_multicast, ifp, port_ifp);
2107 if (error) {
2108 printf("bond_add_interface(%s, %s):"
2109 " multicast_list_program failed %d\n",
2110 ifb->ifb_name, bondport_get_name(p), error);
2111 goto failed;
2112 }
2113
2114 /* mark the interface up */
2115 ifnet_set_flags(port_ifp, IFF_UP, IFF_UP);
2116
2117 error = ifnet_ioctl(port_ifp, 0, SIOCSIFFLAGS, NULL);
2118 if (error != 0) {
2119 printf("bond_add_interface(%s, %s): SIOCSIFFLAGS failed %d\n",
2120 ifb->ifb_name, bondport_get_name(p), error);
2121 goto failed;
2122 }
2123
2124 /* re-program the port's ethernet address */
2125 error = if_siflladdr(port_ifp,
2126 (const struct ether_addr *)ifnet_lladdr(ifp));
2127 if (error != 0) {
2128 /* port doesn't support setting the link address */
2129 printf("bond_add_interface(%s, %s): if_siflladdr failed %d\n",
2130 ifb->ifb_name, bondport_get_name(p), error);
2131 goto failed;
2132 }
2133 progress |= BOND_ADD_PROGRESS_LLADDR_SET;
2134
2135 bond_lock();
2136
2137 /* no failures past this point */
2138 p->po_enabled = 1;
2139
2140 /* copy the contents of the existing distributing array */
2141 if (ifb->ifb_distributing_count) {
2142 bcopy(ifb->ifb_distributing_array, new_array,
2143 sizeof(*new_array) * ifb->ifb_distributing_count);
2144 }
2145 old_array = ifb->ifb_distributing_array;
2146 ifb->ifb_distributing_array = new_array;
2147
2148 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2149 bondport_start(p);
2150
2151 /* check if we need to generate a link status event */
2152 if (ifbond_selection(ifb)) {
2153 event_code = (ifb->ifb_active_lag == NULL)
2154 ? KEV_DL_LINK_OFF
2155 : KEV_DL_LINK_ON;
2156 ifb->ifb_last_link_event = event_code;
2157 }
2158 }
2159 else {
2160 /* are we adding the first distributing interface? */
2161 if (media_active(&p->po_media_info)) {
2162 if (ifb->ifb_distributing_count == 0) {
2163 ifb->ifb_last_link_event = event_code = KEV_DL_LINK_ON;
2164 }
2165 bondport_enable_distributing(p);
2166 }
2167 else {
2168 bondport_disable_distributing(p);
2169 }
2170 }
2171 /* clear the busy state, and wakeup anyone waiting */
2172 ifbond_signal(ifb, "bond_add_interface");
2173 bond_unlock();
2174 if (event_code != 0) {
2175 interface_link_event(ifp, event_code);
2176 }
2177 if (old_array != NULL) {
2178 FREE(old_array, M_BOND);
2179 }
2180 return 0;
2181
2182 failed:
2183 bond_assert_lock_not_held();
2184
2185 /* if this was the first port to be added, clear our address */
2186 if (first) {
2187 ifnet_set_lladdr_and_type(ifp, NULL, 0, IFT_IEEE8023ADLAG);
2188 }
2189
2190 if (new_array != NULL) {
2191 FREE(new_array, M_BOND);
2192 }
2193 if ((progress & BOND_ADD_PROGRESS_LLADDR_SET) != 0) {
2194 int error1;
2195
2196 error1 = if_siflladdr(port_ifp, &p->po_saved_addr);
2197 if (error1 != 0) {
2198 printf("bond_add_interface(%s, %s): if_siflladdr failed %d\n",
2199 ifb->ifb_name, bondport_get_name(p), error1);
2200 }
2201 }
2202 if ((progress & BOND_ADD_PROGRESS_PROTO_ATTACHED) != 0) {
2203 (void)bond_detach_protocol(port_ifp);
2204 }
2205 if ((progress & BOND_ADD_PROGRESS_MTU_SET) != 0) {
2206 int error1;
2207
2208 error1 = siocsifmtu(port_ifp, p->po_devmtu.ifdm_current);
2209 if (error1 != 0) {
2210 printf("bond_add_interface(%s, %s): SIOCSIFMTU %d failed %d\n",
2211 ifb->ifb_name, bondport_get_name(p),
2212 p->po_devmtu.ifdm_current, error1);
2213 }
2214 }
2215 bond_lock();
2216 if ((progress & BOND_ADD_PROGRESS_IN_LIST) != 0) {
2217 TAILQ_REMOVE(&ifb->ifb_port_list, p, po_port_list);
2218 ifb->ifb_port_count--;
2219 }
2220 ifnet_set_eflags(ifp, 0, IFEF_BOND);
2221 if (TAILQ_EMPTY(&ifb->ifb_port_list)) {
2222 ifb->ifb_altmtu = 0;
2223 ifnet_set_mtu(ifp, 0);
2224 ifnet_set_offload(ifp, 0);
2225 }
2226
2227 signal_done:
2228 ifbond_signal(ifb, "bond_add_interface");
2229 bond_unlock();
2230 ifbond_release(ifb);
2231 bondport_free(p);
2232 return (error);
2233}
2234
2235static int
2236bond_remove_interface(ifbond_ref ifb, struct ifnet * port_ifp)
2237{
2238 int active_lag = 0;
2239 int error = 0;
2240 int event_code = 0;
2241 bondport_ref head_port;
2242 struct ifnet * ifp;
2243 int last = FALSE;
2244 int new_link_address = FALSE;
2245 bondport_ref p;
2246 lacp_actor_partner_state s;
2247 int was_distributing;
2248
2249 bond_assert_lock_held();
2250
2251 ifbond_retain(ifb);
2252 ifbond_wait(ifb, "bond_remove_interface");
2253
2254 p = ifbond_lookup_port(ifb, port_ifp);
2255 if (p == NULL) {
2256 error = ENXIO;
2257 /* it got removed by another thread */
2258 goto signal_done;
2259 }
2260
2261 /* de-select it and remove it from the lists */
2262 was_distributing = bondport_flags_distributing(p);
2263 bondport_disable_distributing(p);
2264 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2265 bondport_set_selected(p, SelectedState_UNSELECTED);
2266 active_lag = bondport_remove_from_LAG(p);
2267 /* invalidate timers here while holding the bond_lock */
2268 bondport_invalidate_timers(p);
2269
2270 /* announce that we're Individual now */
2271 s = p->po_actor_state;
2272 s = lacp_actor_partner_state_set_individual(s);
2273 s = lacp_actor_partner_state_set_not_collecting(s);
2274 s = lacp_actor_partner_state_set_not_distributing(s);
2275 s = lacp_actor_partner_state_set_out_of_sync(s);
2276 p->po_actor_state = s;
2277 bondport_flags_set_ntt(p);
2278 }
2279
2280 TAILQ_REMOVE(&ifb->ifb_port_list, p, po_port_list);
2281 ifb->ifb_port_count--;
2282
2283 ifp = ifb->ifb_ifp;
2284 head_port = TAILQ_FIRST(&ifb->ifb_port_list);
2285 if (head_port == NULL) {
2286 ifnet_set_flags(ifp, 0, IFF_RUNNING);
2287 if (ifbond_flags_lladdr(ifb) == FALSE) {
2288 last = TRUE;
2289 }
2290 ifnet_set_offload(ifp, 0);
2291 ifnet_set_mtu(ifp, 0);
2292 ifb->ifb_altmtu = 0;
2293 } else if (ifbond_flags_lladdr(ifb) == FALSE
2294 && bcmp(&p->po_saved_addr, ifnet_lladdr(ifp),
2295 ETHER_ADDR_LEN) == 0) {
2296 new_link_address = TRUE;
2297 }
2298 /* check if we need to generate a link status event */
2299 if (ifb->ifb_mode == IF_BOND_MODE_LACP ) {
2300 if (ifbond_selection(ifb) || active_lag) {
2301 event_code = (ifb->ifb_active_lag == NULL)
2302 ? KEV_DL_LINK_OFF
2303 : KEV_DL_LINK_ON;
2304 ifb->ifb_last_link_event = event_code;
2305 }
2306 bondport_transmit_machine(p, LAEventStart,
2307 TRANSMIT_MACHINE_TX_IMMEDIATE);
2308 }
2309 else {
2310 /* are we removing the last distributing interface? */
2311 if (was_distributing && ifb->ifb_distributing_count == 0) {
2312 ifb->ifb_last_link_event = event_code = KEV_DL_LINK_OFF;
2313 }
2314 }
2315
2316 bond_unlock();
2317
2318 if (last) {
2319 ifnet_set_lladdr_and_type(ifp, NULL, 0, IFT_IEEE8023ADLAG);
2320 }
2321 else if (new_link_address) {
2322 struct ifnet * scan_ifp;
2323 bondport_ref scan_port;
2324
2325 /* ifbond_wait() allows port list traversal without holding the lock */
2326
2327 /* this port gave the bond its ethernet address, switch to new one */
2328 ifnet_set_lladdr_and_type(ifp,
2329 &head_port->po_saved_addr, ETHER_ADDR_LEN,
2330 IFT_ETHER);
2331
2332 /* re-program each port with the new link address */
2333 TAILQ_FOREACH(scan_port, &ifb->ifb_port_list, po_port_list) {
2334 scan_ifp = scan_port->po_ifp;
2335
2336 error = if_siflladdr(scan_ifp,
2337 (const struct ether_addr *) ifnet_lladdr(ifp));
2338 if (error != 0) {
2339 printf("bond_remove_interface(%s, %s): "
2340 "if_siflladdr (%s) failed %d\n",
2341 ifb->ifb_name, bondport_get_name(p),
2342 bondport_get_name(scan_port), error);
2343 }
2344 }
2345 }
2346
2347 /* restore the port's ethernet address */
2348 error = if_siflladdr(port_ifp, &p->po_saved_addr);
2349 if (error != 0) {
2350 printf("bond_remove_interface(%s, %s): if_siflladdr failed %d\n",
2351 ifb->ifb_name, bondport_get_name(p), error);
2352 }
2353
2354 /* restore the port's MTU */
2355 error = siocsifmtu(port_ifp, p->po_devmtu.ifdm_current);
2356 if (error != 0) {
2357 printf("bond_remove_interface(%s, %s): SIOCSIFMTU %d failed %d\n",
2358 ifb->ifb_name, bondport_get_name(p),
2359 p->po_devmtu.ifdm_current, error);
2360 }
2361
2362 /* remove the bond "protocol" */
2363 bond_detach_protocol(port_ifp);
2364
2365 /* generate link event */
2366 if (event_code != 0) {
2367 interface_link_event(ifp, event_code);
2368 }
2369
2370 bond_lock();
2371 bondport_free(p);
2372 ifnet_set_eflags(port_ifp, 0, IFEF_BOND);
2373 /* release this bondport's reference to the ifbond */
2374 ifbond_release(ifb);
2375
2376 signal_done:
2377 ifbond_signal(ifb, "bond_remove_interface");
2378 ifbond_release(ifb);
2379 return (error);
2380}
2381
2382static void
2383bond_set_lacp_mode(ifbond_ref ifb)
2384{
2385 bondport_ref p;
2386
2387 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2388 bondport_disable_distributing(p);
2389 bondport_start(p);
2390 }
2391 return;
2392}
2393
2394static void
2395bond_set_static_mode(ifbond_ref ifb)
2396{
2397 bondport_ref p;
2398 lacp_actor_partner_state s;
2399
2400 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2401 bondport_disable_distributing(p);
2402 bondport_set_selected(p, SelectedState_UNSELECTED);
2403 (void)bondport_remove_from_LAG(p);
2404 bondport_cancel_timers(p);
2405
2406 /* announce that we're Individual now */
2407 s = p->po_actor_state;
2408 s = lacp_actor_partner_state_set_individual(s);
2409 s = lacp_actor_partner_state_set_not_collecting(s);
2410 s = lacp_actor_partner_state_set_not_distributing(s);
2411 s = lacp_actor_partner_state_set_out_of_sync(s);
2412 p->po_actor_state = s;
2413 bondport_flags_set_ntt(p);
2414 bondport_transmit_machine(p, LAEventStart,
2415 TRANSMIT_MACHINE_TX_IMMEDIATE);
2416 /* clear state */
2417 p->po_actor_state = 0;
2418 bzero(&p->po_partner_state, sizeof(p->po_partner_state));
2419
2420 if (media_active(&p->po_media_info)) {
2421 bondport_enable_distributing(p);
2422 }
2423 else {
2424 bondport_disable_distributing(p);
2425 }
2426 }
2427 return;
2428}
2429
2430static int
2431bond_set_mode(struct ifnet * ifp, int mode)
2432{
2433 int error = 0;
2434 int event_code = 0;
2435 ifbond_ref ifb;
2436
2437 bond_lock();
2438 ifb = (ifbond_ref)ifnet_softc(ifp);
2439 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2440 bond_unlock();
2441 return ((ifb == NULL) ? EOPNOTSUPP : EBUSY);
2442 }
2443 if (ifb->ifb_mode == mode) {
2444 bond_unlock();
2445 return (0);
2446 }
2447
2448 ifbond_retain(ifb);
2449 ifbond_wait(ifb, "bond_set_mode");
2450
2451 /* verify (again) that the mode is actually different */
2452 if (ifb->ifb_mode == mode) {
2453 /* nothing to do */
2454 goto signal_done;
2455 }
2456
2457 ifb->ifb_mode = mode;
2458 if (mode == IF_BOND_MODE_LACP) {
2459 bond_set_lacp_mode(ifb);
2460
2461 /* check if we need to generate a link status event */
2462 if (ifbond_selection(ifb)) {
2463 event_code = (ifb->ifb_active_lag == NULL)
2464 ? KEV_DL_LINK_OFF
2465 : KEV_DL_LINK_ON;
2466 }
2467 } else {
2468 bond_set_static_mode(ifb);
2469 event_code = (ifb->ifb_distributing_count == 0)
2470 ? KEV_DL_LINK_OFF
2471 : KEV_DL_LINK_ON;
2472 }
2473 ifb->ifb_last_link_event = event_code;
2474
2475 signal_done:
2476 ifbond_signal(ifb, "bond_set_mode");
2477 bond_unlock();
2478 ifbond_release(ifb);
2479
2480 if (event_code != 0) {
2481 interface_link_event(ifp, event_code);
2482 }
2483 return (error);
2484}
2485
2486static int
2487bond_get_status(ifbond_ref ifb, struct if_bond_req * ibr_p, user_addr_t datap)
2488{
2489 int count;
2490 user_addr_t dst;
2491 int error = 0;
2492 struct if_bond_status_req * ibsr;
2493 struct if_bond_status ibs;
2494 bondport_ref port;
2495
2496 ibsr = &(ibr_p->ibr_ibru.ibru_status);
2497 if (ibsr->ibsr_version != IF_BOND_STATUS_REQ_VERSION) {
2498 return (EINVAL);
2499 }
2500 ibsr->ibsr_key = ifb->ifb_key;
2501 ibsr->ibsr_mode = ifb->ifb_mode;
2502 ibsr->ibsr_total = ifb->ifb_port_count;
2503 dst = proc_is64bit(current_proc())
2504 ? ibsr->ibsr_ibsru.ibsru_buffer64
2505 : CAST_USER_ADDR_T(ibsr->ibsr_ibsru.ibsru_buffer);
2506 if (dst == USER_ADDR_NULL) {
2507 /* just want to know how many there are */
2508 goto done;
2509 }
2510 if (ibsr->ibsr_count < 0) {
2511 return (EINVAL);
2512 }
2513 count = (ifb->ifb_port_count < ibsr->ibsr_count)
2514 ? ifb->ifb_port_count : ibsr->ibsr_count;
2515 TAILQ_FOREACH(port, &ifb->ifb_port_list, po_port_list) {
2516 struct if_bond_partner_state * ibps_p;
2517 partner_state_ref ps;
2518
2519 if (count == 0) {
2520 break;
2521 }
2522 bzero(&ibs, sizeof(ibs));
2523 strncpy(ibs.ibs_if_name, port->po_name, sizeof(ibs.ibs_if_name));
2524 ibs.ibs_port_priority = port->po_priority;
2525 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2526 ibs.ibs_state = port->po_actor_state;
2527 ibs.ibs_selected_state = port->po_selected;
2528 ps = &port->po_partner_state;
2529 ibps_p = &ibs.ibs_partner_state;
2530 ibps_p->ibps_system = ps->ps_lag_info.li_system;
2531 ibps_p->ibps_system_priority = ps->ps_lag_info.li_system_priority;
2532 ibps_p->ibps_key = ps->ps_lag_info.li_key;
2533 ibps_p->ibps_port = ps->ps_port;
2534 ibps_p->ibps_port_priority = ps->ps_port_priority;
2535 ibps_p->ibps_state = ps->ps_state;
2536 }
2537 else {
2538 /* fake the selected information */
2539 ibs.ibs_selected_state = bondport_flags_distributing(port)
2540 ? SelectedState_SELECTED : SelectedState_UNSELECTED;
2541 }
2542 error = copyout(&ibs, dst, sizeof(ibs));
2543 if (error != 0) {
2544 break;
2545 }
2546 dst += sizeof(ibs);
2547 count--;
2548 }
2549
2550 done:
2551 if (error == 0) {
2552 error = copyout(ibr_p, datap, sizeof(*ibr_p));
2553 }
2554 else {
2555 (void)copyout(ibr_p, datap, sizeof(*ibr_p));
2556 }
2557 return (error);
2558}
2559
2560static int
2561bond_set_promisc(__unused struct ifnet *ifp)
2562{
2563 int error = 0;
2564 /*
2565 * The benefit of doing this currently does not warrant
2566 * the added code complexity. Do nothing and return.
2567 */
2568 return (error);
2569}
2570
2571static void
2572bond_get_mtu_values(ifbond_ref ifb, int * ret_min, int * ret_max)
2573{
2574 int mtu_min = 0;
2575 int mtu_max = 0;
2576 bondport_ref p;
2577
2578 if (TAILQ_FIRST(&ifb->ifb_port_list) != NULL) {
2579 mtu_min = IF_MINMTU;
2580 }
2581 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2582 struct ifdevmtu * devmtu_p = &p->po_devmtu;
2583
2584 if (devmtu_p->ifdm_min > mtu_min) {
2585 mtu_min = devmtu_p->ifdm_min;
2586 }
2587 if (mtu_max == 0 || devmtu_p->ifdm_max < mtu_max) {
2588 mtu_max = devmtu_p->ifdm_max;
2589 }
2590 }
2591 *ret_min = mtu_min;
2592 *ret_max = mtu_max;
2593 return;
2594}
2595
2596static int
2597bond_set_mtu_on_ports(ifbond_ref ifb, int mtu)
2598{
2599 int error = 0;
2600 bondport_ref p;
2601
2602 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2603 error = siocsifmtu(p->po_ifp, mtu);
2604 if (error != 0) {
2605 printf("if_bond(%s): SIOCSIFMTU %s failed, %d\n",
2606 ifb->ifb_name, bondport_get_name(p), error);
2607 break;
2608 }
2609 }
2610 return (error);
2611}
2612
2613static int
2614bond_set_mtu(struct ifnet * ifp, int mtu, int isdevmtu)
2615{
2616 int error = 0;
2617 ifbond_ref ifb;
2618 int mtu_min;
2619 int mtu_max;
2620 int new_max;
2621 int old_max;
2622
2623 bond_lock();
2624 ifb = (ifbond_ref)ifnet_softc(ifp);
2625 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2626 error = (ifb == NULL) ? EOPNOTSUPP : EBUSY;
2627 goto done;
2628 }
2629 ifbond_retain(ifb);
2630 ifbond_wait(ifb, "bond_set_mtu");
2631
2632 /* check again */
2633 if (ifnet_softc(ifp) == NULL || ifbond_flags_if_detaching(ifb)) {
2634 error = EBUSY;
2635 goto signal_done;
2636 }
2637 bond_get_mtu_values(ifb, &mtu_min, &mtu_max);
2638 if (mtu > mtu_max) {
2639 error = EINVAL;
2640 goto signal_done;
2641 }
2642 if (mtu < mtu_min && (isdevmtu == 0 || mtu != 0)) {
2643 /* allow SIOCSIFALTMTU to set the mtu to 0 */
2644 error = EINVAL;
2645 goto signal_done;
2646 }
2647 if (isdevmtu) {
2648 new_max = (mtu > (int)ifnet_mtu(ifp)) ? mtu : (int)ifnet_mtu(ifp);
2649 }
2650 else {
2651 new_max = (mtu > ifb->ifb_altmtu) ? mtu : ifb->ifb_altmtu;
2652 }
2653 old_max = ((int)ifnet_mtu(ifp) > ifb->ifb_altmtu)
2654 ? (int)ifnet_mtu(ifp) : ifb->ifb_altmtu;
2655 if (new_max != old_max) {
2656 /* we can safely walk the list of port without the lock held */
2657 bond_unlock();
2658 error = bond_set_mtu_on_ports(ifb, new_max);
2659 if (error != 0) {
2660 /* try our best to back out of it */
2661 (void)bond_set_mtu_on_ports(ifb, old_max);
2662 }
2663 bond_lock();
2664 }
2665 if (error == 0) {
2666 if (isdevmtu) {
2667 ifb->ifb_altmtu = mtu;
2668 }
2669 else {
2670 ifnet_set_mtu(ifp, mtu);
2671 }
2672 }
2673
2674 signal_done:
2675 ifbond_signal(ifb, "bond_set_mtu");
2676 ifbond_release(ifb);
2677
2678 done:
2679 bond_unlock();
2680 return (error);
2681}
2682
2683static int
2684bond_ioctl(struct ifnet *ifp, u_long cmd, void * data)
2685{
2686 int error = 0;
2687 struct if_bond_req ibr;
2688 struct ifaddr * ifa;
2689 ifbond_ref ifb;
2690 struct ifreq * ifr;
2691 struct ifmediareq *ifmr;
2692 struct ifnet * port_ifp = NULL;
2693 user_addr_t user_addr;
2694
2695 if (ifnet_type(ifp) != IFT_IEEE8023ADLAG) {
2696 return (EOPNOTSUPP);
2697 }
2698 ifr = (struct ifreq *)data;
2699 ifa = (struct ifaddr *)data;
2700
2701 switch (cmd) {
2702 case SIOCSIFADDR:
2703 ifnet_set_flags(ifp, IFF_UP, IFF_UP);
2704 break;
2705
2706 case SIOCGIFMEDIA32:
2707 case SIOCGIFMEDIA64:
2708 bond_lock();
2709 ifb = (ifbond_ref)ifnet_softc(ifp);
2710 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2711 bond_unlock();
2712 return (ifb == NULL ? EOPNOTSUPP : EBUSY);
2713 }
2714 ifmr = (struct ifmediareq *)data;
2715 ifmr->ifm_current = IFM_ETHER;
2716 ifmr->ifm_mask = 0;
2717 ifmr->ifm_status = IFM_AVALID;
2718 ifmr->ifm_active = IFM_ETHER;
2719 ifmr->ifm_count = 1;
2720 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2721 if (ifb->ifb_active_lag != NULL) {
2722 ifmr->ifm_active = ifb->ifb_active_lag->lag_active_media;
2723 ifmr->ifm_status |= IFM_ACTIVE;
2724 }
2725 }
2726 else if (ifb->ifb_distributing_count > 0) {
2727 ifmr->ifm_active
2728 = ifb->ifb_distributing_array[0]->po_media_info.mi_active;
2729 ifmr->ifm_status |= IFM_ACTIVE;
2730 }
2731 bond_unlock();
2732 user_addr = (cmd == SIOCGIFMEDIA64) ?
2733 ((struct ifmediareq64 *)ifmr)->ifmu_ulist :
2734 CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist);
2735 if (user_addr != USER_ADDR_NULL) {
2736 error = copyout(&ifmr->ifm_current,
2737 user_addr,
2738 sizeof(int));
2739 }
2740 break;
2741
2742 case SIOCSIFMEDIA:
2743 /* XXX send the SIFMEDIA to all children? Or force autoselect? */
2744 error = EINVAL;
2745 break;
2746
2747 case SIOCGIFDEVMTU:
2748 bond_lock();
2749 ifb = (ifbond_ref)ifnet_softc(ifp);
2750 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2751 bond_unlock();
2752 error = (ifb == NULL) ? EOPNOTSUPP : EBUSY;
2753 break;
2754 }
2755 ifr->ifr_devmtu.ifdm_current = bond_device_mtu(ifp, ifb);
2756 bond_get_mtu_values(ifb, &ifr->ifr_devmtu.ifdm_min,
2757 &ifr->ifr_devmtu.ifdm_max);
2758 bond_unlock();
2759 break;
2760
2761 case SIOCGIFALTMTU:
2762 bond_lock();
2763 ifb = (ifbond_ref)ifnet_softc(ifp);
2764 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2765 bond_unlock();
2766 error = (ifb == NULL) ? EOPNOTSUPP : EBUSY;
2767 break;
2768 }
2769 ifr->ifr_mtu = ifb->ifb_altmtu;
2770 bond_unlock();
2771 break;
2772
2773 case SIOCSIFALTMTU:
2774 error = bond_set_mtu(ifp, ifr->ifr_mtu, 1);
2775 break;
2776
2777 case SIOCSIFMTU:
2778 error = bond_set_mtu(ifp, ifr->ifr_mtu, 0);
2779 break;
2780
2781 case SIOCSIFBOND:
2782 user_addr = proc_is64bit(current_proc())
2783 ? ifr->ifr_data64 : CAST_USER_ADDR_T(ifr->ifr_data);
2784 error = copyin(user_addr, &ibr, sizeof(ibr));
2785 if (error) {
2786 break;
2787 }
2788 switch (ibr.ibr_op) {
2789 case IF_BOND_OP_ADD_INTERFACE:
2790 case IF_BOND_OP_REMOVE_INTERFACE:
2791 port_ifp = ifunit(ibr.ibr_ibru.ibru_if_name);
2792 if (port_ifp == NULL) {
2793 error = ENXIO;
2794 break;
2795 }
2796 if (ifnet_type(port_ifp) != IFT_ETHER) {
2797 error = EPROTONOSUPPORT;
2798 break;
2799 }
2800 break;
2801 case IF_BOND_OP_SET_VERBOSE:
2802 case IF_BOND_OP_SET_MODE:
2803 break;
2804 default:
2805 error = EOPNOTSUPP;
2806 break;
2807 }
2808 if (error != 0) {
2809 break;
2810 }
2811 switch (ibr.ibr_op) {
2812 case IF_BOND_OP_ADD_INTERFACE:
2813 error = bond_add_interface(ifp, port_ifp);
2814 break;
2815 case IF_BOND_OP_REMOVE_INTERFACE:
2816 bond_lock();
2817 ifb = (ifbond_ref)ifnet_softc(ifp);
2818 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2819 bond_unlock();
2820 return (ifb == NULL ? EOPNOTSUPP : EBUSY);
2821 }
2822 error = bond_remove_interface(ifb, port_ifp);
2823 bond_unlock();
2824 break;
2825 case IF_BOND_OP_SET_VERBOSE:
2826 bond_lock();
2827 if (g_bond == NULL) {
2828 bond_unlock();
2829 error = ENXIO;
2830 break;
2831 }
2832 g_bond->verbose = ibr.ibr_ibru.ibru_int_val;
2833 bond_unlock();
2834 break;
2835 case IF_BOND_OP_SET_MODE:
2836 switch (ibr.ibr_ibru.ibru_int_val) {
2837 case IF_BOND_MODE_LACP:
2838 case IF_BOND_MODE_STATIC:
2839 break;
2840 default:
2841 error = EINVAL;
2842 break;
2843 }
2844 if (error != 0) {
2845 break;
2846 }
2847 error = bond_set_mode(ifp, ibr.ibr_ibru.ibru_int_val);
2848 break;
2849 }
2850 break; /* SIOCSIFBOND */
2851
2852 case SIOCGIFBOND:
2853 user_addr = proc_is64bit(current_proc())
2854 ? ifr->ifr_data64 : CAST_USER_ADDR_T(ifr->ifr_data);
2855 error = copyin(user_addr, &ibr, sizeof(ibr));
2856 if (error) {
2857 break;
2858 }
2859 switch (ibr.ibr_op) {
2860 case IF_BOND_OP_GET_STATUS:
2861 break;
2862 default:
2863 error = EOPNOTSUPP;
2864 break;
2865 }
2866 if (error != 0) {
2867 break;
2868 }
2869 bond_lock();
2870 ifb = (ifbond_ref)ifnet_softc(ifp);
2871 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2872 bond_unlock();
2873 return (ifb == NULL ? EOPNOTSUPP : EBUSY);
2874 }
2875 switch (ibr.ibr_op) {
2876 case IF_BOND_OP_GET_STATUS:
2877 error = bond_get_status(ifb, &ibr, user_addr);
2878 break;
2879 }
2880 bond_unlock();
2881 break; /* SIOCGIFBOND */
2882
2883 case SIOCSIFLLADDR:
2884 error = EOPNOTSUPP;
2885 break;
2886
2887 case SIOCSIFFLAGS:
2888 /* enable/disable promiscuous mode */
2889 bond_lock();
2890 error = bond_set_promisc(ifp);
2891 bond_unlock();
2892 break;
2893
2894 case SIOCADDMULTI:
2895 case SIOCDELMULTI:
2896 error = bond_setmulti(ifp);
2897 break;
2898 default:
2899 error = EOPNOTSUPP;
2900 }
2901 return error;
2902}
2903
2904static void
2905bond_if_free(struct ifnet * ifp)
2906{
2907 ifbond_ref ifb;
2908
2909 if (ifp == NULL) {
2910 return;
2911 }
2912 bond_lock();
2913 ifb = (ifbond_ref)ifnet_softc(ifp);
2914 if (ifb == NULL) {
2915 bond_unlock();
2916 return;
2917 }
2918 ifbond_release(ifb);
2919 bond_unlock();
2920 ifnet_release(ifp);
2921 return;
2922}
2923
2924static void
2925bond_handle_event(struct ifnet * port_ifp, int event_code)
2926{
2927 struct ifnet * bond_ifp = NULL;
2928 ifbond_ref ifb;
2929 int old_distributing_count;
2930 bondport_ref p;
2931 struct media_info media_info = { 0, 0};
2932
2933 switch (event_code) {
2934 case KEV_DL_IF_DETACHED:
2935 break;
2936 case KEV_DL_LINK_OFF:
2937 case KEV_DL_LINK_ON:
2938 media_info = interface_media_info(port_ifp);
2939 break;
2940 default:
2941 return;
2942 }
2943 bond_lock();
2944 p = bond_lookup_port(port_ifp);
2945 if (p == NULL) {
2946 bond_unlock();
2947 return;
2948 }
2949 ifb = p->po_bond;
2950 old_distributing_count = ifb->ifb_distributing_count;
2951 switch (event_code) {
2952 case KEV_DL_IF_DETACHED:
2953 bond_remove_interface(ifb, p->po_ifp);
2954 break;
2955 case KEV_DL_LINK_OFF:
2956 case KEV_DL_LINK_ON:
2957 p->po_media_info = media_info;
2958 if (p->po_enabled) {
2959 bondport_link_status_changed(p);
2960 }
2961 break;
2962 }
2963 /* generate a link-event */
2964 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2965 if (ifbond_selection(ifb)) {
2966 event_code = (ifb->ifb_active_lag == NULL)
2967 ? KEV_DL_LINK_OFF
2968 : KEV_DL_LINK_ON;
2969 /* XXX need to take a reference on bond_ifp */
2970 bond_ifp = ifb->ifb_ifp;
2971 ifb->ifb_last_link_event = event_code;
2972 }
2973 else {
2974 event_code = (ifb->ifb_active_lag == NULL)
2975 ? KEV_DL_LINK_OFF
2976 : KEV_DL_LINK_ON;
2977 if (event_code != ifb->ifb_last_link_event) {
2978 if (g_bond->verbose) {
2979 timestamp_printf("%s: (event) generating LINK event\n",
2980 ifb->ifb_name);
2981 }
2982 bond_ifp = ifb->ifb_ifp;
2983 ifb->ifb_last_link_event = event_code;
2984 }
2985 }
2986 }
2987 else {
2988 /*
2989 * if the distributing array membership changed from 0 <-> !0
2990 * generate a link event
2991 */
2992 if (old_distributing_count == 0
2993 && ifb->ifb_distributing_count != 0) {
2994 event_code = KEV_DL_LINK_ON;
2995 }
2996 else if (old_distributing_count != 0
2997 && ifb->ifb_distributing_count == 0) {
2998 event_code = KEV_DL_LINK_OFF;
2999 }
3000 if (event_code != 0 && event_code != ifb->ifb_last_link_event) {
3001 bond_ifp = ifb->ifb_ifp;
3002 ifb->ifb_last_link_event = event_code;
3003 }
3004 }
3005
3006 bond_unlock();
3007 if (bond_ifp != NULL) {
3008 interface_link_event(bond_ifp, event_code);
3009 }
3010 return;
3011}
3012
3013static void
3014bond_event(struct ifnet * port_ifp, __unused protocol_family_t protocol,
3015 const struct kev_msg * event)
3016{
3017 int event_code;
3018
3019 if (event->vendor_code != KEV_VENDOR_APPLE
3020 || event->kev_class != KEV_NETWORK_CLASS
3021 || event->kev_subclass != KEV_DL_SUBCLASS) {
3022 return;
3023 }
3024 event_code = event->event_code;
3025 switch (event_code) {
3026 case KEV_DL_LINK_OFF:
3027 case KEV_DL_LINK_ON:
3028 /* we only care about link status changes */
3029 bond_handle_event(port_ifp, event_code);
3030 break;
3031 default:
3032 break;
3033 }
3034 return;
3035}
3036
3037static errno_t
3038bond_detached(ifnet_t port_ifp, __unused protocol_family_t protocol)
3039{
3040 bond_handle_event(port_ifp, KEV_DL_IF_DETACHED);
3041 return (0);
3042}
3043
3044static void
3045interface_link_event(struct ifnet * ifp, u_int32_t event_code)
3046{
3047 struct {
3048 struct kern_event_msg header;
3049 u_int32_t unit;
3050 char if_name[IFNAMSIZ];
3051 } event;
3052
3053 bzero(&event, sizeof(event));
3054 event.header.total_size = sizeof(event);
3055 event.header.vendor_code = KEV_VENDOR_APPLE;
3056 event.header.kev_class = KEV_NETWORK_CLASS;
3057 event.header.kev_subclass = KEV_DL_SUBCLASS;
3058 event.header.event_code = event_code;
3059 event.header.event_data[0] = ifnet_family(ifp);
3060 event.unit = (u_int32_t) ifnet_unit(ifp);
3061 strncpy(event.if_name, ifnet_name(ifp), IFNAMSIZ);
3062 ifnet_event(ifp, &event.header);
3063 return;
3064}
3065
3066/*
3067 * Function: bond_attach_protocol
3068 * Purpose:
3069 * Attach a DLIL protocol to the interface.
3070 *
3071 * The ethernet demux special cases to always return PF_BOND if the
3072 * interface is bonded. That means we receive all traffic from that
3073 * interface without passing any of the traffic to any other attached
3074 * protocol.
3075 */
3076static int
3077bond_attach_protocol(struct ifnet *ifp)
3078{
3079 int error;
3080 struct ifnet_attach_proto_param reg;
3081
3082 bzero(&reg, sizeof(reg));
3083 reg.input = bond_input;
3084 reg.event = bond_event;
3085 reg.detached = bond_detached;
3086
3087 error = ifnet_attach_protocol(ifp, PF_BOND, &reg);
3088 if (error) {
3089 printf("bond over %s%d: ifnet_attach_protocol failed, %d\n",
3090 ifnet_name(ifp), ifnet_unit(ifp), error);
3091 }
3092 return (error);
3093}
3094
3095/*
3096 * Function: bond_detach_protocol
3097 * Purpose:
3098 * Detach our DLIL protocol from an interface
3099 */
3100static int
3101bond_detach_protocol(struct ifnet *ifp)
3102{
3103 int error;
3104
3105 error = ifnet_detach_protocol(ifp, PF_BOND);
3106 if (error) {
3107 printf("bond over %s%d: ifnet_detach_protocol failed, %d\n",
3108 ifnet_name(ifp), ifnet_unit(ifp), error);
3109 }
3110 return (error);
3111}
3112
3113/*
3114 * DLIL interface family functions
3115 */
3116extern int ether_attach_inet(ifnet_t ifp, protocol_family_t protocol_family);
3117extern void ether_detach_inet(ifnet_t ifp, protocol_family_t protocol_family);
3118extern int ether_attach_inet6(ifnet_t ifp, protocol_family_t protocol_family);
3119extern void ether_detach_inet6(ifnet_t ifp, protocol_family_t protocol_family);
3120extern int ether_attach_at(ifnet_t ifp, protocol_family_t protocol_family);
3121extern void ether_detach_at(ifnet_t ifp, protocol_family_t protocol_family);
3122
3123__private_extern__ int
3124bond_family_init(void)
3125{
3126 int error=0;
3127
3128 error = proto_register_plumber(PF_INET, APPLE_IF_FAM_BOND,
3129 ether_attach_inet,
3130 ether_detach_inet);
3131 if (error != 0) {
3132 printf("bond: proto_register_plumber failed for AF_INET error=%d\n",
3133 error);
3134 goto done;
3135 }
3136#if INET6
3137 error = proto_register_plumber(PF_INET6, APPLE_IF_FAM_BOND,
3138 ether_attach_inet6,
3139 ether_detach_inet6);
3140 if (error != 0) {
3141 printf("bond: proto_register_plumber failed for AF_INET6 error=%d\n",
3142 error);
3143 goto done;
3144 }
3145#endif
3146#if NETAT
3147 error = proto_register_plumber(PF_APPLETALK, APPLE_IF_FAM_BOND,
3148 ether_attach_at,
3149 ether_detach_at);
3150 if (error != 0) {
3151 printf("bond: proto_register_plumber failed for AppleTalk error=%d\n",
3152 error);
3153 goto done;
3154 }
3155#endif
3156 error = bond_clone_attach();
3157 if (error != 0) {
3158 printf("bond: proto_register_plumber failed bond_clone_attach error=%d\n",
3159 error);
3160 goto done;
3161 }
3162
3163 done:
3164 return (error);
3165}
3166/**
3167 **
3168 ** LACP routines:
3169 **
3170 **/
3171
3172/**
3173 ** LACP ifbond_list routines
3174 **/
3175static bondport_ref
3176ifbond_list_find_moved_port(bondport_ref rx_port,
3177 const lacp_actor_partner_tlv_ref atlv)
3178{
3179 ifbond_ref bond;
3180 bondport_ref p;
3181 partner_state_ref ps;
3182 LAG_info_ref ps_li;
3183
3184 TAILQ_FOREACH(bond, &g_bond->ifbond_list, ifb_bond_list) {
3185 TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) {
3186
3187 if (rx_port == p) {
3188 /* no point in comparing against ourselves */
3189 continue;
3190 }
3191 if (p->po_receive_state != ReceiveState_PORT_DISABLED) {
3192 /* it's not clear that we should be checking this */
3193 continue;
3194 }
3195 ps = &p->po_partner_state;
3196 if (lacp_actor_partner_state_defaulted(ps->ps_state)) {
3197 continue;
3198 }
3199 ps_li = &ps->ps_lag_info;
3200 if (ps->ps_port == lacp_actor_partner_tlv_get_port(atlv)
3201 && bcmp(&ps_li->li_system, atlv->lap_system,
3202 sizeof(ps_li->li_system)) == 0) {
3203 if (g_bond->verbose) {
3204 timestamp_printf("System " EA_FORMAT
3205 " Port 0x%x moved from %s to %s\n",
3206 EA_LIST(&ps_li->li_system), ps->ps_port,
3207 bondport_get_name(p),
3208 bondport_get_name(rx_port));
3209 }
3210 return (p);
3211 }
3212 }
3213 }
3214 return (NULL);
3215}
3216
3217/**
3218 ** LACP ifbond, LAG routines
3219 **/
3220
3221static int
3222ifbond_selection(ifbond_ref bond)
3223{
3224 int all_ports_ready = 0;
3225 int active_media = 0;
3226 LAG_ref lag = NULL;
3227 int lag_changed = 0;
3228 bondport_ref p;
3229 int port_speed = 0;
3230
3231 lag = ifbond_find_best_LAG(bond, &active_media);
3232 if (lag != bond->ifb_active_lag) {
3233 if (bond->ifb_active_lag != NULL) {
3234 ifbond_deactivate_LAG(bond, bond->ifb_active_lag);
3235 bond->ifb_active_lag = NULL;
3236 }
3237 bond->ifb_active_lag = lag;
3238 if (lag != NULL) {
3239 ifbond_activate_LAG(bond, lag, active_media);
3240 }
3241 lag_changed = 1;
3242 }
3243 else if (lag != NULL) {
3244 if (lag->lag_active_media != active_media) {
3245 if (g_bond->verbose) {
3246 timestamp_printf("LAG PORT SPEED CHANGED from %d to %d\n",
3247 link_speed(lag->lag_active_media),
3248 link_speed(active_media));
3249 }
3250 ifbond_deactivate_LAG(bond, lag);
3251 ifbond_activate_LAG(bond, lag, active_media);
3252 lag_changed = 1;
3253 }
3254 }
3255 if (lag != NULL) {
3256 port_speed = link_speed(active_media);
3257 all_ports_ready = ifbond_all_ports_ready(bond);
3258 }
3259 TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) {
3260 if (lag != NULL && p->po_lag == lag
3261 && media_speed(&p->po_media_info) == port_speed
3262 && (p->po_mux_state == MuxState_DETACHED
3263 || p->po_selected == SelectedState_SELECTED
3264 || p->po_selected == SelectedState_STANDBY)
3265 && bondport_aggregatable(p)) {
3266 if (bond->ifb_max_active > 0) {
3267 if (lag->lag_selected_port_count < bond->ifb_max_active) {
3268 if (p->po_selected == SelectedState_STANDBY
3269 || p->po_selected == SelectedState_UNSELECTED) {
3270 bondport_set_selected(p, SelectedState_SELECTED);
3271 }
3272 }
3273 else if (p->po_selected == SelectedState_UNSELECTED) {
3274 bondport_set_selected(p, SelectedState_STANDBY);
3275 }
3276 }
3277 else {
3278 bondport_set_selected(p, SelectedState_SELECTED);
3279 }
3280 }
3281 if (bondport_flags_selected_changed(p)) {
3282 bondport_flags_clear_selected_changed(p);
3283 bondport_mux_machine(p, LAEventSelectedChange, NULL);
3284 }
3285 if (all_ports_ready
3286 && bondport_flags_ready(p)
3287 && p->po_mux_state == MuxState_WAITING) {
3288 bondport_mux_machine(p, LAEventReady, NULL);
3289 }
3290 bondport_transmit_machine(p, LAEventStart, NULL);
3291 }
3292 return (lag_changed);
3293}
3294
3295static LAG_ref
3296ifbond_find_best_LAG(ifbond_ref bond, int * active_media)
3297{
3298 int best_active = 0;
3299 LAG_ref best_lag = NULL;
3300 int best_count = 0;
3301 int best_speed = 0;
3302 LAG_ref lag;
3303
3304 if (bond->ifb_active_lag != NULL) {
3305 best_lag = bond->ifb_active_lag;
3306 best_count = LAG_get_aggregatable_port_count(best_lag, &best_active);
3307 if (bond->ifb_max_active > 0
3308 && best_count > bond->ifb_max_active) {
3309 best_count = bond->ifb_max_active;
3310 }
3311 best_speed = link_speed(best_active);
3312 }
3313 TAILQ_FOREACH(lag, &bond->ifb_lag_list, lag_list) {
3314 int active;
3315 int count;
3316 int speed;
3317
3318 if (lag == bond->ifb_active_lag) {
3319 /* we've already computed it */
3320 continue;
3321 }
3322 count = LAG_get_aggregatable_port_count(lag, &active);
3323 if (count == 0) {
3324 continue;
3325 }
3326 if (bond->ifb_max_active > 0
3327 && count > bond->ifb_max_active) {
3328 /* if there's a limit, don't count extra links */
3329 count = bond->ifb_max_active;
3330 }
3331 speed = link_speed(active);
3332 if ((count * speed) > (best_count * best_speed)) {
3333 best_count = count;
3334 best_speed = speed;
3335 best_active = active;
3336 best_lag = lag;
3337 }
3338 }
3339 if (best_count == 0) {
3340 return (NULL);
3341 }
3342 *active_media = best_active;
3343 return (best_lag);
3344}
3345
3346static void
3347ifbond_deactivate_LAG(__unused ifbond_ref bond, LAG_ref lag)
3348{
3349 bondport_ref p;
3350
3351 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3352 bondport_set_selected(p, SelectedState_UNSELECTED);
3353 }
3354 return;
3355}
3356
3357static void
3358ifbond_activate_LAG(ifbond_ref bond, LAG_ref lag, int active_media)
3359{
3360 int need = 0;
3361 bondport_ref p;
3362
3363 if (bond->ifb_max_active > 0) {
3364 need = bond->ifb_max_active;
3365 }
3366 lag->lag_active_media = active_media;
3367 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3368 if (bondport_aggregatable(p) == 0) {
3369 bondport_set_selected(p, SelectedState_UNSELECTED);
3370 }
3371 else if (media_speed(&p->po_media_info) != link_speed(active_media)) {
3372 bondport_set_selected(p, SelectedState_UNSELECTED);
3373 }
3374 else if (p->po_mux_state == MuxState_DETACHED) {
3375 if (bond->ifb_max_active > 0) {
3376 if (need > 0) {
3377 bondport_set_selected(p, SelectedState_SELECTED);
3378 need--;
3379 }
3380 else {
3381 bondport_set_selected(p, SelectedState_STANDBY);
3382 }
3383 }
3384 else {
3385 bondport_set_selected(p, SelectedState_SELECTED);
3386 }
3387 }
3388 else {
3389 bondport_set_selected(p, SelectedState_UNSELECTED);
3390 }
3391 }
3392 return;
3393}
3394
3395#if 0
3396static void
3397ifbond_set_max_active(ifbond_ref bond, int max_active)
3398{
3399 LAG_ref lag = bond->ifb_active_lag;
3400
3401 bond->ifb_max_active = max_active;
3402 if (bond->ifb_max_active <= 0 || lag == NULL) {
3403 return;
3404 }
3405 if (lag->lag_selected_port_count > bond->ifb_max_active) {
3406 bondport_ref p;
3407 int remove_count;
3408
3409 remove_count = lag->lag_selected_port_count - bond->ifb_max_active;
3410 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3411 if (p->po_selected == SelectedState_SELECTED) {
3412 bondport_set_selected(p, SelectedState_UNSELECTED);
3413 remove_count--;
3414 if (remove_count == 0) {
3415 break;
3416 }
3417 }
3418 }
3419 }
3420 return;
3421}
3422#endif
3423
3424static int
3425ifbond_all_ports_ready(ifbond_ref bond)
3426{
3427 int ready = 0;
3428 bondport_ref p;
3429
3430 if (bond->ifb_active_lag == NULL) {
3431 return (0);
3432 }
3433 TAILQ_FOREACH(p, &bond->ifb_active_lag->lag_port_list, po_lag_port_list) {
3434 if (p->po_mux_state == MuxState_WAITING
3435 && p->po_selected == SelectedState_SELECTED) {
3436 if (bondport_flags_ready(p) == 0) {
3437 return (0);
3438 }
3439 }
3440 /* note that there was at least one ready port */
3441 ready = 1;
3442 }
3443 return (ready);
3444}
3445
3446static int
3447ifbond_all_ports_attached(ifbond_ref bond, bondport_ref this_port)
3448{
3449 bondport_ref p;
3450
3451 TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) {
3452 if (this_port == p) {
3453 continue;
3454 }
3455 if (bondport_flags_mux_attached(p) == 0) {
3456 return (0);
3457 }
3458 }
3459 return (1);
3460}
3461
3462static LAG_ref
3463ifbond_get_LAG_matching_port(ifbond_ref bond, bondport_ref p)
3464{
3465 LAG_ref lag;
3466
3467 TAILQ_FOREACH(lag, &bond->ifb_lag_list, lag_list) {
3468 if (bcmp(&lag->lag_info, &p->po_partner_state.ps_lag_info,
3469 sizeof(lag->lag_info)) == 0) {
3470 return (lag);
3471 }
3472 }
3473 return (NULL);
3474}
3475
3476static int
3477LAG_get_aggregatable_port_count(LAG_ref lag, int * active_media)
3478{
3479 int active;
3480 int count;
3481 bondport_ref p;
3482 int speed;
3483
3484 active = 0;
3485 count = 0;
3486 speed = 0;
3487 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3488 if (bondport_aggregatable(p)) {
3489 int this_speed;
3490
3491 this_speed = media_speed(&p->po_media_info);
3492 if (this_speed == 0) {
3493 continue;
3494 }
3495 if (this_speed > speed) {
3496 active = p->po_media_info.mi_active;
3497 speed = this_speed;
3498 count = 1;
3499 }
3500 else if (this_speed == speed) {
3501 count++;
3502 }
3503 }
3504 }
3505 *active_media = active;
3506 return (count);
3507}
3508
3509
3510/**
3511 ** LACP bondport routines
3512 **/
3513static void
3514bondport_link_status_changed(bondport_ref p)
3515{
3516 ifbond_ref bond = p->po_bond;
3517
3518 if (g_bond->verbose) {
3519 if (media_active(&p->po_media_info)) {
3520 timestamp_printf("[%s] Link UP %d Mbit/s %s duplex\n",
3521 bondport_get_name(p),
3522 media_speed(&p->po_media_info),
3523 media_full_duplex(&p->po_media_info)
3524 ? "full" : "half");
3525 }
3526 else {
3527 timestamp_printf("[%s] Link DOWN\n", bondport_get_name(p));
3528 }
3529 }
3530 if (bond->ifb_mode == IF_BOND_MODE_LACP) {
3531 if (media_active(&p->po_media_info)
3532 && bond->ifb_active_lag != NULL
3533 && p->po_lag == bond->ifb_active_lag
3534 && p->po_selected != SelectedState_UNSELECTED) {
3535 if (media_speed(&p->po_media_info) != p->po_lag->lag_active_media) {
3536 if (g_bond->verbose) {
3537 timestamp_printf("[%s] Port speed %d differs from LAG %d\n",
3538 bondport_get_name(p),
3539 media_speed(&p->po_media_info),
3540 link_speed(p->po_lag->lag_active_media));
3541 }
3542 bondport_set_selected(p, SelectedState_UNSELECTED);
3543 }
3544 }
3545 bondport_receive_machine(p, LAEventMediaChange, NULL);
3546 bondport_mux_machine(p, LAEventMediaChange, NULL);
3547 bondport_periodic_transmit_machine(p, LAEventMediaChange, NULL);
3548 }
3549 else {
3550 if (media_active(&p->po_media_info)) {
3551 bondport_enable_distributing(p);
3552 }
3553 else {
3554 bondport_disable_distributing(p);
3555 }
3556 }
3557 return;
3558}
3559
3560static int
3561bondport_aggregatable(bondport_ref p)
3562{
3563 partner_state_ref ps = &p->po_partner_state;
3564
3565 if (lacp_actor_partner_state_aggregatable(p->po_actor_state) == 0
3566 || lacp_actor_partner_state_aggregatable(ps->ps_state) == 0) {
3567 /* we and/or our partner are individual */
3568 return (0);
3569 }
3570 if (p->po_lag == NULL) {
3571 return (0);
3572 }
3573 switch (p->po_receive_state) {
3574 default:
3575 if (g_bond->verbose) {
3576 timestamp_printf("[%s] Port is not selectable\n",
3577 bondport_get_name(p));
3578 }
3579 return (0);
3580 case ReceiveState_CURRENT:
3581 case ReceiveState_EXPIRED:
3582 break;
3583 }
3584 return (1);
3585}
3586
3587static int
3588bondport_matches_LAG(bondport_ref p, LAG_ref lag)
3589{
3590 LAG_info_ref lag_li;
3591 partner_state_ref ps;
3592 LAG_info_ref ps_li;
3593
3594 ps = &p->po_partner_state;
3595 ps_li = &ps->ps_lag_info;
3596 lag_li = &lag->lag_info;
3597 if (ps_li->li_system_priority == lag_li->li_system_priority
3598 && ps_li->li_key == lag_li->li_key
3599 && (bcmp(&ps_li->li_system, &lag_li->li_system,
3600 sizeof(lag_li->li_system))
3601 == 0)) {
3602 return (1);
3603 }
3604 return (0);
3605}
3606
3607static int
3608bondport_remove_from_LAG(bondport_ref p)
3609{
3610 int active_lag = 0;
3611 ifbond_ref bond = p->po_bond;
3612 LAG_ref lag = p->po_lag;
3613
3614 if (lag == NULL) {
3615 return (0);
3616 }
3617 TAILQ_REMOVE(&lag->lag_port_list, p, po_lag_port_list);
3618 if (g_bond->verbose) {
3619 timestamp_printf("[%s] Removed from LAG (0x%04x," EA_FORMAT
3620 ",0x%04x)\n",
3621 bondport_get_name(p),
3622 lag->lag_info.li_system_priority,
3623 EA_LIST(&lag->lag_info.li_system),
3624 lag->lag_info.li_key);
3625 }
3626 p->po_lag = NULL;
3627 lag->lag_port_count--;
3628 if (lag->lag_port_count > 0) {
3629 return (bond->ifb_active_lag == lag);
3630 }
3631 if (g_bond->verbose) {
3632 timestamp_printf("Key 0x%04x: LAG Released (%04x," EA_FORMAT
3633 ",0x%04x)\n",
3634 bond->ifb_key,
3635 lag->lag_info.li_system_priority,
3636 EA_LIST(&lag->lag_info.li_system),
3637 lag->lag_info.li_key);
3638 }
3639 TAILQ_REMOVE(&bond->ifb_lag_list, lag, lag_list);
3640 if (bond->ifb_active_lag == lag) {
3641 bond->ifb_active_lag = NULL;
3642 active_lag = 1;
3643 }
3644 FREE(lag, M_BOND);
3645 return (active_lag);
3646}
3647
3648static void
3649bondport_add_to_LAG(bondport_ref p, LAG_ref lag)
3650{
3651 TAILQ_INSERT_TAIL(&lag->lag_port_list, p, po_lag_port_list);
3652 p->po_lag = lag;
3653 lag->lag_port_count++;
3654 if (g_bond->verbose) {
3655 timestamp_printf("[%s] Added to LAG (0x%04x," EA_FORMAT "0x%04x)\n",
3656 bondport_get_name(p),
3657 lag->lag_info.li_system_priority,
3658 EA_LIST(&lag->lag_info.li_system),
3659 lag->lag_info.li_key);
3660 }
3661 return;
3662}
3663
3664static void
3665bondport_assign_to_LAG(bondport_ref p)
3666{
3667 ifbond_ref bond = p->po_bond;
3668 LAG_ref lag;
3669
3670 if (lacp_actor_partner_state_defaulted(p->po_actor_state)) {
3671 bondport_remove_from_LAG(p);
3672 return;
3673 }
3674 lag = p->po_lag;
3675 if (lag != NULL) {
3676 if (bondport_matches_LAG(p, lag)) {
3677 /* still OK */
3678 return;
3679 }
3680 bondport_remove_from_LAG(p);
3681 }
3682 lag = ifbond_get_LAG_matching_port(bond, p);
3683 if (lag != NULL) {
3684 bondport_add_to_LAG(p, lag);
3685 return;
3686 }
3687 lag = (LAG_ref)_MALLOC(sizeof(*lag), M_BOND, M_WAITOK);
3688 TAILQ_INIT(&lag->lag_port_list);
3689 lag->lag_port_count = 0;
3690 lag->lag_selected_port_count = 0;
3691 lag->lag_info = p->po_partner_state.ps_lag_info;
3692 TAILQ_INSERT_TAIL(&bond->ifb_lag_list, lag, lag_list);
3693 if (g_bond->verbose) {
3694 timestamp_printf("Key 0x%04x: LAG Created (0x%04x," EA_FORMAT
3695 ",0x%04x)\n",
3696 bond->ifb_key,
3697 lag->lag_info.li_system_priority,
3698 EA_LIST(&lag->lag_info.li_system),
3699 lag->lag_info.li_key);
3700 }
3701 bondport_add_to_LAG(p, lag);
3702 return;
3703}
3704
3705static void
3706bondport_receive_lacpdu(bondport_ref p, lacpdu_ref in_lacpdu_p)
3707{
3708 bondport_ref moved_port;
3709
3710 moved_port
3711 = ifbond_list_find_moved_port(p, (const lacp_actor_partner_tlv_ref)
3712 &in_lacpdu_p->la_actor_tlv);
3713 if (moved_port != NULL) {
3714 bondport_receive_machine(moved_port, LAEventPortMoved, NULL);
3715 }
3716 bondport_receive_machine(p, LAEventPacket, in_lacpdu_p);
3717 bondport_mux_machine(p, LAEventPacket, in_lacpdu_p);
3718 bondport_periodic_transmit_machine(p, LAEventPacket, in_lacpdu_p);
3719 return;
3720}
3721
3722static void
3723bondport_set_selected(bondport_ref p, SelectedState s)
3724{
3725 if (s != p->po_selected) {
3726 ifbond_ref bond = p->po_bond;
3727 LAG_ref lag = p->po_lag;
3728
3729 bondport_flags_set_selected_changed(p);
3730 if (lag != NULL && bond->ifb_active_lag == lag) {
3731 if (p->po_selected == SelectedState_SELECTED) {
3732 lag->lag_selected_port_count--;
3733 }
3734 else if (s == SelectedState_SELECTED) {
3735 lag->lag_selected_port_count++;
3736 }
3737 if (g_bond->verbose) {
3738 timestamp_printf("[%s] SetSelected: %s (was %s)\n",
3739 bondport_get_name(p),
3740 SelectedStateString(s),
3741 SelectedStateString(p->po_selected));
3742 }
3743 }
3744 }
3745 p->po_selected = s;
3746 return;
3747}
3748
3749/**
3750 ** Receive machine
3751 **/
3752
3753static void
3754bondport_UpdateDefaultSelected(bondport_ref p)
3755{
3756 bondport_set_selected(p, SelectedState_UNSELECTED);
3757 return;
3758}
3759
3760static void
3761bondport_RecordDefault(bondport_ref p)
3762{
3763 bzero(&p->po_partner_state, sizeof(p->po_partner_state));
3764 p->po_actor_state
3765 = lacp_actor_partner_state_set_defaulted(p->po_actor_state);
3766 bondport_assign_to_LAG(p);
3767 return;
3768}
3769
3770static void
3771bondport_UpdateSelected(bondport_ref p, lacpdu_ref lacpdu_p)
3772{
3773 lacp_actor_partner_tlv_ref actor;
3774 partner_state_ref ps;
3775 LAG_info_ref ps_li;
3776
3777 /* compare the PDU's Actor information to our Partner state */
3778 actor = (lacp_actor_partner_tlv_ref)lacpdu_p->la_actor_tlv;
3779 ps = &p->po_partner_state;
3780 ps_li = &ps->ps_lag_info;
3781 if (lacp_actor_partner_tlv_get_port(actor) != ps->ps_port
3782 || (lacp_actor_partner_tlv_get_port_priority(actor)
3783 != ps->ps_port_priority)
3784 || bcmp(actor->lap_system, &ps_li->li_system, sizeof(ps_li->li_system))
3785 || (lacp_actor_partner_tlv_get_system_priority(actor)
3786 != ps_li->li_system_priority)
3787 || (lacp_actor_partner_tlv_get_key(actor) != ps_li->li_key)
3788 || (lacp_actor_partner_state_aggregatable(actor->lap_state)
3789 != lacp_actor_partner_state_aggregatable(ps->ps_state))) {
3790 bondport_set_selected(p, SelectedState_UNSELECTED);
3791 if (g_bond->verbose) {
3792 timestamp_printf("[%s] updateSelected UNSELECTED\n",
3793 bondport_get_name(p));
3794 }
3795 }
3796 return;
3797}
3798
3799static void
3800bondport_RecordPDU(bondport_ref p, lacpdu_ref lacpdu_p)
3801{
3802 lacp_actor_partner_tlv_ref actor;
3803 ifbond_ref bond = p->po_bond;
3804 int lacp_maintain = 0;
3805 partner_state_ref ps;
3806 lacp_actor_partner_tlv_ref partner;
3807 LAG_info_ref ps_li;
3808
3809 /* copy the PDU's Actor information into our Partner state */
3810 actor = (lacp_actor_partner_tlv_ref)lacpdu_p->la_actor_tlv;
3811 ps = &p->po_partner_state;
3812 ps_li = &ps->ps_lag_info;
3813 ps->ps_port = lacp_actor_partner_tlv_get_port(actor);
3814 ps->ps_port_priority = lacp_actor_partner_tlv_get_port_priority(actor);
3815 ps_li->li_system = *((lacp_system_ref)actor->lap_system);
3816 ps_li->li_system_priority
3817 = lacp_actor_partner_tlv_get_system_priority(actor);
3818 ps_li->li_key = lacp_actor_partner_tlv_get_key(actor);
3819 ps->ps_state = lacp_actor_partner_state_set_out_of_sync(actor->lap_state);
3820 p->po_actor_state
3821 = lacp_actor_partner_state_set_not_defaulted(p->po_actor_state);
3822
3823 /* compare the PDU's Partner information to our own information */
3824 partner = (lacp_actor_partner_tlv_ref)lacpdu_p->la_partner_tlv;
3825
3826 if (lacp_actor_partner_state_active_lacp(ps->ps_state)
3827 || (lacp_actor_partner_state_active_lacp(p->po_actor_state)
3828 && lacp_actor_partner_state_active_lacp(partner->lap_state))) {
3829 if (g_bond->verbose) {
3830 timestamp_printf("[%s] recordPDU: LACP will maintain\n",
3831 bondport_get_name(p));
3832 }
3833 lacp_maintain = 1;
3834 }
3835 if ((lacp_actor_partner_tlv_get_port(partner)
3836 == bondport_get_index(p))
3837 && lacp_actor_partner_tlv_get_port_priority(partner) == p->po_priority
3838 && bcmp(partner->lap_system, &g_bond->system,
3839 sizeof(g_bond->system)) == 0
3840 && (lacp_actor_partner_tlv_get_system_priority(partner)
3841 == g_bond->system_priority)
3842 && lacp_actor_partner_tlv_get_key(partner) == bond->ifb_key
3843 && (lacp_actor_partner_state_aggregatable(partner->lap_state)
3844 == lacp_actor_partner_state_aggregatable(p->po_actor_state))
3845 && lacp_actor_partner_state_in_sync(actor->lap_state)
3846 && lacp_maintain) {
3847 ps->ps_state = lacp_actor_partner_state_set_in_sync(ps->ps_state);
3848 if (g_bond->verbose) {
3849 timestamp_printf("[%s] recordPDU: LACP partner in sync\n",
3850 bondport_get_name(p));
3851 }
3852 }
3853 else if (lacp_actor_partner_state_aggregatable(actor->lap_state) == 0
3854 && lacp_actor_partner_state_in_sync(actor->lap_state)
3855 && lacp_maintain) {
3856 ps->ps_state = lacp_actor_partner_state_set_in_sync(ps->ps_state);
3857 if (g_bond->verbose) {
3858 timestamp_printf("[%s] recordPDU: LACP partner in sync (ind)\n",
3859 bondport_get_name(p));
3860 }
3861 }
3862 bondport_assign_to_LAG(p);
3863 return;
3864}
3865
3866static __inline__ lacp_actor_partner_state
3867updateNTTBits(lacp_actor_partner_state s)
3868{
3869 return (s & (LACP_ACTOR_PARTNER_STATE_LACP_ACTIVITY
3870 | LACP_ACTOR_PARTNER_STATE_LACP_TIMEOUT
3871 | LACP_ACTOR_PARTNER_STATE_AGGREGATION
3872 | LACP_ACTOR_PARTNER_STATE_SYNCHRONIZATION));
3873}
3874
3875static void
3876bondport_UpdateNTT(bondport_ref p, lacpdu_ref lacpdu_p)
3877{
3878 ifbond_ref bond = p->po_bond;
3879 lacp_actor_partner_tlv_ref partner;
3880
3881 /* compare the PDU's Actor information to our Partner state */
3882 partner = (lacp_actor_partner_tlv_ref)lacpdu_p->la_partner_tlv;
3883 if ((lacp_actor_partner_tlv_get_port(partner) != bondport_get_index(p))
3884 || lacp_actor_partner_tlv_get_port_priority(partner) != p->po_priority
3885 || bcmp(partner->lap_system, &g_bond->system, sizeof(g_bond->system))
3886 || (lacp_actor_partner_tlv_get_system_priority(partner)
3887 != g_bond->system_priority)
3888 || lacp_actor_partner_tlv_get_key(partner) != bond->ifb_key
3889 || (updateNTTBits(partner->lap_state)
3890 != updateNTTBits(p->po_actor_state))) {
3891 bondport_flags_set_ntt(p);
3892 if (g_bond->verbose) {
3893 timestamp_printf("[%s] updateNTT: Need To Transmit\n",
3894 bondport_get_name(p));
3895 }
3896 }
3897 return;
3898}
3899
3900static void
3901bondport_AttachMuxToAggregator(bondport_ref p)
3902{
3903 if (bondport_flags_mux_attached(p) == 0) {
3904 if (g_bond->verbose) {
3905 timestamp_printf("[%s] Attached Mux To Aggregator\n",
3906 bondport_get_name(p));
3907 }
3908 bondport_flags_set_mux_attached(p);
3909 }
3910 return;
3911}
3912
3913static void
3914bondport_DetachMuxFromAggregator(bondport_ref p)
3915{
3916 if (bondport_flags_mux_attached(p)) {
3917 if (g_bond->verbose) {
3918 timestamp_printf("[%s] Detached Mux From Aggregator\n",
3919 bondport_get_name(p));
3920 }
3921 bondport_flags_clear_mux_attached(p);
3922 }
3923 return;
3924}
3925
3926static void
3927bondport_enable_distributing(bondport_ref p)
3928{
3929 if (bondport_flags_distributing(p) == 0) {
3930 ifbond_ref bond = p->po_bond;
3931
3932 bond->ifb_distributing_array[bond->ifb_distributing_count++] = p;
3933 if (g_bond->verbose) {
3934 timestamp_printf("[%s] Distribution Enabled\n",
3935 bondport_get_name(p));
3936 }
3937 bondport_flags_set_distributing(p);
3938 }
3939 return;
3940}
3941
3942static void
3943bondport_disable_distributing(bondport_ref p)
3944{
3945 if (bondport_flags_distributing(p)) {
3946 bondport_ref * array;
3947 ifbond_ref bond;
3948 int count;
3949 int i;
3950
3951 bond = p->po_bond;
3952 array = bond->ifb_distributing_array;
3953 count = bond->ifb_distributing_count;
3954 for (i = 0; i < count; i++) {
3955 if (array[i] == p) {
3956 int j;
3957
3958 for (j = i; j < (count - 1); j++) {
3959 array[j] = array[j + 1];
3960 }
3961 break;
3962 }
3963 }
3964 bond->ifb_distributing_count--;
3965 if (g_bond->verbose) {
3966 timestamp_printf("[%s] Distribution Disabled\n",
3967 bondport_get_name(p));
3968 }
3969 bondport_flags_clear_distributing(p);
3970 }
3971 return;
3972}
3973
3974/**
3975 ** Receive machine functions
3976 **/
3977static void
3978bondport_receive_machine_initialize(bondport_ref p, LAEvent event,
3979 void * event_data);
3980static void
3981bondport_receive_machine_port_disabled(bondport_ref p, LAEvent event,
3982 void * event_data);
3983static void
3984bondport_receive_machine_expired(bondport_ref p, LAEvent event,
3985 void * event_data);
3986static void
3987bondport_receive_machine_lacp_disabled(bondport_ref p, LAEvent event,
3988 void * event_data);
3989static void
3990bondport_receive_machine_defaulted(bondport_ref p, LAEvent event,
3991 void * event_data);
3992static void
3993bondport_receive_machine_current(bondport_ref p, LAEvent event,
3994 void * event_data);
3995
3996static void
3997bondport_receive_machine_event(bondport_ref p, LAEvent event,
3998 void * event_data)
3999{
4000 switch (p->po_receive_state) {
4001 case ReceiveState_none:
4002 bondport_receive_machine_initialize(p, LAEventStart, NULL);
4003 break;
4004 case ReceiveState_INITIALIZE:
4005 bondport_receive_machine_initialize(p, event, event_data);
4006 break;
4007 case ReceiveState_PORT_DISABLED:
4008 bondport_receive_machine_port_disabled(p, event, event_data);
4009 break;
4010 case ReceiveState_EXPIRED:
4011 bondport_receive_machine_expired(p, event, event_data);
4012 break;
4013 case ReceiveState_LACP_DISABLED:
4014 bondport_receive_machine_lacp_disabled(p, event, event_data);
4015 break;
4016 case ReceiveState_DEFAULTED:
4017 bondport_receive_machine_defaulted(p, event, event_data);
4018 break;
4019 case ReceiveState_CURRENT:
4020 bondport_receive_machine_current(p, event, event_data);
4021 break;
4022 default:
4023 break;
4024 }
4025 return;
4026}
4027
4028static void
4029bondport_receive_machine(bondport_ref p, LAEvent event,
4030 void * event_data)
4031{
4032 switch (event) {
4033 case LAEventPacket:
4034 if (p->po_receive_state != ReceiveState_LACP_DISABLED) {
4035 bondport_receive_machine_current(p, event, event_data);
4036 }
4037 break;
4038 case LAEventMediaChange:
4039 if (media_active(&p->po_media_info)) {
4040 switch (p->po_receive_state) {
4041 case ReceiveState_PORT_DISABLED:
4042 case ReceiveState_LACP_DISABLED:
4043 bondport_receive_machine_port_disabled(p, LAEventMediaChange, NULL);
4044 break;
4045 default:
4046 break;
4047 }
4048 }
4049 else {
4050 bondport_receive_machine_port_disabled(p, LAEventStart, NULL);
4051 }
4052 break;
4053 default:
4054 bondport_receive_machine_event(p, event, event_data);
4055 break;
4056 }
4057 return;
4058}
4059
4060static void
4061bondport_receive_machine_initialize(bondport_ref p, LAEvent event,
4062 __unused void * event_data)
4063{
4064 switch (event) {
4065 case LAEventStart:
4066 devtimer_cancel(p->po_current_while_timer);
4067 if (g_bond->verbose) {
4068 timestamp_printf("[%s] Receive INITIALIZE\n",
4069 bondport_get_name(p));
4070 }
4071 p->po_receive_state = ReceiveState_INITIALIZE;
4072 bondport_set_selected(p, SelectedState_UNSELECTED);
4073 bondport_RecordDefault(p);
4074 p->po_actor_state
4075 = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4076 bondport_receive_machine_port_disabled(p, LAEventStart, NULL);
4077 break;
4078 default:
4079 break;
4080 }
4081 return;
4082}
4083
4084static void
4085bondport_receive_machine_port_disabled(bondport_ref p, LAEvent event,
4086 __unused void * event_data)
4087{
4088 partner_state_ref ps;
4089
4090 switch (event) {
4091 case LAEventStart:
4092 devtimer_cancel(p->po_current_while_timer);
4093 if (g_bond->verbose) {
4094 timestamp_printf("[%s] Receive PORT_DISABLED\n",
4095 bondport_get_name(p));
4096 }
4097 p->po_receive_state = ReceiveState_PORT_DISABLED;
4098 ps = &p->po_partner_state;
4099 ps->ps_state = lacp_actor_partner_state_set_out_of_sync(ps->ps_state);
4100 /* FALL THROUGH */
4101 case LAEventMediaChange:
4102 if (media_active(&p->po_media_info)) {
4103 if (media_full_duplex(&p->po_media_info)) {
4104 bondport_receive_machine_expired(p, LAEventStart, NULL);
4105 }
4106 else {
4107 bondport_receive_machine_lacp_disabled(p, LAEventStart, NULL);
4108 }
4109 }
4110 else if (p->po_selected == SelectedState_SELECTED) {
4111 struct timeval tv;
4112
4113 if (g_bond->verbose) {
4114 timestamp_printf("[%s] Receive PORT_DISABLED: "
4115 "link timer started\n",
4116 bondport_get_name(p));
4117 }
4118 tv.tv_sec = 1;
4119 tv.tv_usec = 0;
4120 devtimer_set_relative(p->po_current_while_timer, tv,
4121 (devtimer_timeout_func)
4122 bondport_receive_machine_port_disabled,
4123 (void *)LAEventTimeout, NULL);
4124 }
4125 else if (p->po_selected == SelectedState_STANDBY) {
4126 bondport_set_selected(p, SelectedState_UNSELECTED);
4127 }
4128 break;
4129 case LAEventTimeout:
4130 if (p->po_selected == SelectedState_SELECTED) {
4131 if (g_bond->verbose) {
4132 timestamp_printf("[%s] Receive PORT_DISABLED: "
4133 "link timer completed, marking UNSELECTED\n",
4134 bondport_get_name(p));
4135 }
4136 bondport_set_selected(p, SelectedState_UNSELECTED);
4137 }
4138 break;
4139 case LAEventPortMoved:
4140 bondport_receive_machine_initialize(p, LAEventStart, NULL);
4141 break;
4142 default:
4143 break;
4144 }
4145 return;
4146}
4147
4148static void
4149bondport_receive_machine_expired(bondport_ref p, LAEvent event,
4150 __unused void * event_data)
4151{
4152 lacp_actor_partner_state s;
4153 struct timeval tv;
4154
4155 switch (event) {
4156 case LAEventStart:
4157 devtimer_cancel(p->po_current_while_timer);
4158 if (g_bond->verbose) {
4159 timestamp_printf("[%s] Receive EXPIRED\n",
4160 bondport_get_name(p));
4161 }
4162 p->po_receive_state = ReceiveState_EXPIRED;
4163 s = p->po_partner_state.ps_state;
4164 s = lacp_actor_partner_state_set_out_of_sync(s);
4165 s = lacp_actor_partner_state_set_short_timeout(s);
4166 p->po_partner_state.ps_state = s;
4167 p->po_actor_state
4168 = lacp_actor_partner_state_set_expired(p->po_actor_state);
4169 /* start current_while timer */
4170 tv.tv_sec = LACP_SHORT_TIMEOUT_TIME;
4171 tv.tv_usec = 0;
4172 devtimer_set_relative(p->po_current_while_timer, tv,
4173 (devtimer_timeout_func)
4174 bondport_receive_machine_expired,
4175 (void *)LAEventTimeout, NULL);
4176
4177 break;
4178 case LAEventTimeout:
4179 bondport_receive_machine_defaulted(p, LAEventStart, NULL);
4180 break;
4181 default:
4182 break;
4183 }
4184 return;
4185}
4186
4187static void
4188bondport_receive_machine_lacp_disabled(bondport_ref p, LAEvent event,
4189 __unused void * event_data)
4190{
4191 partner_state_ref ps;
4192 switch (event) {
4193 case LAEventStart:
4194 devtimer_cancel(p->po_current_while_timer);
4195 if (g_bond->verbose) {
4196 timestamp_printf("[%s] Receive LACP_DISABLED\n",
4197 bondport_get_name(p));
4198 }
4199 p->po_receive_state = ReceiveState_LACP_DISABLED;
4200 bondport_set_selected(p, SelectedState_UNSELECTED);
4201 bondport_RecordDefault(p);
4202 ps = &p->po_partner_state;
4203 ps->ps_state = lacp_actor_partner_state_set_individual(ps->ps_state);
4204 p->po_actor_state
4205 = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4206 break;
4207 default:
4208 break;
4209 }
4210 return;
4211}
4212
4213static void
4214bondport_receive_machine_defaulted(bondport_ref p, LAEvent event,
4215 __unused void * event_data)
4216{
4217 switch (event) {
4218 case LAEventStart:
4219 devtimer_cancel(p->po_current_while_timer);
4220 if (g_bond->verbose) {
4221 timestamp_printf("[%s] Receive DEFAULTED\n",
4222 bondport_get_name(p));
4223 }
4224 p->po_receive_state = ReceiveState_DEFAULTED;
4225 bondport_UpdateDefaultSelected(p);
4226 bondport_RecordDefault(p);
4227 p->po_actor_state
4228 = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4229 break;
4230 default:
4231 break;
4232 }
4233 return;
4234}
4235
4236static void
4237bondport_receive_machine_current(bondport_ref p, LAEvent event,
4238 void * event_data)
4239{
4240 partner_state_ref ps;
4241 struct timeval tv;
4242
4243 switch (event) {
4244 case LAEventPacket:
4245 devtimer_cancel(p->po_current_while_timer);
4246 if (g_bond->verbose) {
4247 timestamp_printf("[%s] Receive CURRENT\n",
4248 bondport_get_name(p));
4249 }
4250 p->po_receive_state = ReceiveState_CURRENT;
4251 bondport_UpdateSelected(p, event_data);
4252 bondport_UpdateNTT(p, event_data);
4253 bondport_RecordPDU(p, event_data);
4254 p->po_actor_state
4255 = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4256 bondport_assign_to_LAG(p);
4257 /* start current_while timer */
4258 ps = &p->po_partner_state;
4259 if (lacp_actor_partner_state_short_timeout(ps->ps_state)) {
4260 tv.tv_sec = LACP_SHORT_TIMEOUT_TIME;
4261 }
4262 else {
4263 tv.tv_sec = LACP_LONG_TIMEOUT_TIME;
4264 }
4265 tv.tv_usec = 0;
4266 devtimer_set_relative(p->po_current_while_timer, tv,
4267 (devtimer_timeout_func)
4268 bondport_receive_machine_current,
4269 (void *)LAEventTimeout, NULL);
4270 break;
4271 case LAEventTimeout:
4272 bondport_receive_machine_expired(p, LAEventStart, NULL);
4273 break;
4274 default:
4275 break;
4276 }
4277 return;
4278}
4279
4280/**
4281 ** Periodic Transmission machine
4282 **/
4283
4284static void
4285bondport_periodic_transmit_machine(bondport_ref p, LAEvent event,
4286 __unused void * event_data)
4287{
4288 int interval;
4289 partner_state_ref ps;
4290 struct timeval tv;
4291
4292 switch (event) {
4293 case LAEventStart:
4294 if (g_bond->verbose) {
4295 timestamp_printf("[%s] periodic_transmit Start\n",
4296 bondport_get_name(p));
4297 }
4298 /* FALL THROUGH */
4299 case LAEventMediaChange:
4300 devtimer_cancel(p->po_periodic_timer);
4301 p->po_periodic_interval = 0;
4302 if (media_active(&p->po_media_info) == 0
4303 || media_full_duplex(&p->po_media_info) == 0) {
4304 break;
4305 }
4306 case LAEventPacket:
4307 /* Neither Partner nor Actor are LACP Active, no periodic tx */
4308 ps = &p->po_partner_state;
4309 if (lacp_actor_partner_state_active_lacp(p->po_actor_state) == 0
4310 && (lacp_actor_partner_state_active_lacp(ps->ps_state)
4311 == 0)) {
4312 devtimer_cancel(p->po_periodic_timer);
4313 p->po_periodic_interval = 0;
4314 break;
4315 }
4316 if (lacp_actor_partner_state_short_timeout(ps->ps_state)) {
4317 interval = LACP_FAST_PERIODIC_TIME;
4318 }
4319 else {
4320 interval = LACP_SLOW_PERIODIC_TIME;
4321 }
4322 if (p->po_periodic_interval != interval) {
4323 if (interval == LACP_FAST_PERIODIC_TIME
4324 && p->po_periodic_interval == LACP_SLOW_PERIODIC_TIME) {
4325 if (g_bond->verbose) {
4326 timestamp_printf("[%s] periodic_transmit:"
4327 " Need To Transmit\n",
4328 bondport_get_name(p));
4329 }
4330 bondport_flags_set_ntt(p);
4331 }
4332 p->po_periodic_interval = interval;
4333 tv.tv_usec = 0;
4334 tv.tv_sec = interval;
4335 devtimer_set_relative(p->po_periodic_timer, tv,
4336 (devtimer_timeout_func)
4337 bondport_periodic_transmit_machine,
4338 (void *)LAEventTimeout, NULL);
4339 if (g_bond->verbose) {
4340 timestamp_printf("[%s] Periodic Transmission Timer: %d secs\n",
4341 bondport_get_name(p),
4342 p->po_periodic_interval);
4343 }
4344 }
4345 break;
4346 case LAEventTimeout:
4347 bondport_flags_set_ntt(p);
4348 tv.tv_sec = p->po_periodic_interval;
4349 tv.tv_usec = 0;
4350 devtimer_set_relative(p->po_periodic_timer, tv, (devtimer_timeout_func)
4351 bondport_periodic_transmit_machine,
4352 (void *)LAEventTimeout, NULL);
4353 if (g_bond->verbose > 1) {
4354 timestamp_printf("[%s] Periodic Transmission Timer: %d secs\n",
4355 bondport_get_name(p), p->po_periodic_interval);
4356 }
4357 break;
4358 default:
4359 break;
4360 }
4361 return;
4362}
4363
4364/**
4365 ** Transmit machine
4366 **/
4367static int
4368bondport_can_transmit(bondport_ref p, int32_t current_secs,
4369 __darwin_time_t * next_secs)
4370{
4371 if (p->po_last_transmit_secs != current_secs) {
4372 p->po_last_transmit_secs = current_secs;
4373 p->po_n_transmit = 0;
4374 }
4375 if (p->po_n_transmit < LACP_PACKET_RATE) {
4376 p->po_n_transmit++;
4377 return (1);
4378 }
4379 if (next_secs != NULL) {
4380 *next_secs = current_secs + 1;
4381 }
4382 return (0);
4383}
4384
4385static void
4386bondport_transmit_machine(bondport_ref p, LAEvent event,
4387 void * event_data)
4388{
4389 lacp_actor_partner_tlv_ref aptlv;
4390 lacp_collector_tlv_ref ctlv;
4391 struct timeval next_tick_time = {0, 0};
4392 lacpdu_ref out_lacpdu_p;
4393 packet_buffer_ref pkt;
4394 partner_state_ref ps;
4395 LAG_info_ref ps_li;
4396
4397 switch (event) {
4398 case LAEventTimeout:
4399 case LAEventStart:
4400 if (p->po_periodic_interval == 0 || bondport_flags_ntt(p) == 0) {
4401 break;
4402 }
4403 if (event_data == TRANSMIT_MACHINE_TX_IMMEDIATE) {
4404 /* we're going away, transmit the packet no matter what */
4405 }
4406 else if (bondport_can_transmit(p, devtimer_current_secs(),
4407 &next_tick_time.tv_sec) == 0) {
4408 if (devtimer_enabled(p->po_transmit_timer)) {
4409 if (g_bond->verbose > 0) {
4410 timestamp_printf("[%s] Transmit Timer Already Set\n",
4411 bondport_get_name(p));
4412 }
4413 }
4414 else {
4415 devtimer_set_absolute(p->po_transmit_timer, next_tick_time,
4416 (devtimer_timeout_func)
4417 bondport_transmit_machine,
4418 (void *)LAEventTimeout, NULL);
4419 if (g_bond->verbose > 0) {
4420 timestamp_printf("[%s] Transmit Timer Deadline %d secs\n",
4421 bondport_get_name(p),
4422 (int)next_tick_time.tv_sec);
4423 }
4424 }
4425 break;
4426 }
4427 if (g_bond->verbose > 0) {
4428 if (event == LAEventTimeout) {
4429 timestamp_printf("[%s] Transmit Timer Complete\n",
4430 bondport_get_name(p));
4431 }
4432 }
4433 pkt = packet_buffer_allocate(sizeof(*out_lacpdu_p));
4434 if (pkt == NULL) {
4435 printf("[%s] Transmit: failed to allocate packet buffer\n",
4436 bondport_get_name(p));
4437 break;
4438 }
4439 out_lacpdu_p = (lacpdu_ref)packet_buffer_byteptr(pkt);
4440 bzero(out_lacpdu_p, sizeof(*out_lacpdu_p));
4441 out_lacpdu_p->la_subtype = IEEE8023AD_SLOW_PROTO_SUBTYPE_LACP;
4442 out_lacpdu_p->la_version = LACPDU_VERSION_1;
4443
4444 /* Actor */
4445 aptlv = (lacp_actor_partner_tlv_ref)out_lacpdu_p->la_actor_tlv;
4446 aptlv->lap_tlv_type = LACPDU_TLV_TYPE_ACTOR;
4447 aptlv->lap_length = LACPDU_ACTOR_TLV_LENGTH;
4448 *((lacp_system_ref)aptlv->lap_system) = g_bond->system;
4449 lacp_actor_partner_tlv_set_system_priority(aptlv,
4450 g_bond->system_priority);
4451 lacp_actor_partner_tlv_set_port_priority(aptlv, p->po_priority);
4452 lacp_actor_partner_tlv_set_port(aptlv, bondport_get_index(p));
4453 lacp_actor_partner_tlv_set_key(aptlv, p->po_bond->ifb_key);
4454 aptlv->lap_state = p->po_actor_state;
4455
4456 /* Partner */
4457 aptlv = (lacp_actor_partner_tlv_ref)out_lacpdu_p->la_partner_tlv;
4458 aptlv->lap_tlv_type = LACPDU_TLV_TYPE_PARTNER;
4459 aptlv->lap_length = LACPDU_PARTNER_TLV_LENGTH;
4460 ps = &p->po_partner_state;
4461 ps_li = &ps->ps_lag_info;
4462 lacp_actor_partner_tlv_set_port(aptlv, ps->ps_port);
4463 lacp_actor_partner_tlv_set_port_priority(aptlv, ps->ps_port_priority);
4464 *((lacp_system_ref)aptlv->lap_system) = ps_li->li_system;
4465 lacp_actor_partner_tlv_set_system_priority(aptlv,
4466 ps_li->li_system_priority);
4467 lacp_actor_partner_tlv_set_key(aptlv, ps_li->li_key);
4468 aptlv->lap_state = ps->ps_state;
4469
4470 /* Collector */
4471 ctlv = (lacp_collector_tlv_ref)out_lacpdu_p->la_collector_tlv;
4472 ctlv->lac_tlv_type = LACPDU_TLV_TYPE_COLLECTOR;
4473 ctlv->lac_length = LACPDU_COLLECTOR_TLV_LENGTH;
4474
4475 bondport_slow_proto_transmit(p, pkt);
4476 bondport_flags_clear_ntt(p);
4477 if (g_bond->verbose > 0) {
4478 timestamp_printf("[%s] Transmit Packet %d\n",
4479 bondport_get_name(p), p->po_n_transmit);
4480 }
4481 break;
4482 default:
4483 break;
4484 }
4485 return;
4486}
4487
4488/**
4489 ** Mux machine functions
4490 **/
4491
4492static void
4493bondport_mux_machine_detached(bondport_ref p, LAEvent event,
4494 void * event_data);
4495static void
4496bondport_mux_machine_waiting(bondport_ref p, LAEvent event,
4497 void * event_data);
4498static void
4499bondport_mux_machine_attached(bondport_ref p, LAEvent event,
4500 void * event_data);
4501
4502static void
4503bondport_mux_machine_collecting_distributing(bondport_ref p, LAEvent event,
4504 void * event_data);
4505
4506static void
4507bondport_mux_machine(bondport_ref p, LAEvent event, void * event_data)
4508{
4509 switch (p->po_mux_state) {
4510 case MuxState_none:
4511 bondport_mux_machine_detached(p, LAEventStart, NULL);
4512 break;
4513 case MuxState_DETACHED:
4514 bondport_mux_machine_detached(p, event, event_data);
4515 break;
4516 case MuxState_WAITING:
4517 bondport_mux_machine_waiting(p, event, event_data);
4518 break;
4519 case MuxState_ATTACHED:
4520 bondport_mux_machine_attached(p, event, event_data);
4521 break;
4522 case MuxState_COLLECTING_DISTRIBUTING:
4523 bondport_mux_machine_collecting_distributing(p, event, event_data);
4524 break;
4525 default:
4526 break;
4527 }
4528 return;
4529}
4530
4531static void
4532bondport_mux_machine_detached(bondport_ref p, LAEvent event,
4533 __unused void * event_data)
4534{
4535 lacp_actor_partner_state s;
4536
4537 switch (event) {
4538 case LAEventStart:
4539 devtimer_cancel(p->po_wait_while_timer);
4540 if (g_bond->verbose) {
4541 timestamp_printf("[%s] Mux DETACHED\n",
4542 bondport_get_name(p));
4543 }
4544 p->po_mux_state = MuxState_DETACHED;
4545 bondport_flags_clear_ready(p);
4546 bondport_DetachMuxFromAggregator(p);
4547 bondport_disable_distributing(p);
4548 s = p->po_actor_state;
4549 s = lacp_actor_partner_state_set_out_of_sync(s);
4550 s = lacp_actor_partner_state_set_not_collecting(s);
4551 s = lacp_actor_partner_state_set_not_distributing(s);
4552 p->po_actor_state = s;
4553 bondport_flags_set_ntt(p);
4554 break;
4555 case LAEventSelectedChange:
4556 case LAEventPacket:
4557 case LAEventMediaChange:
4558 if (p->po_selected == SelectedState_SELECTED
4559 || p->po_selected == SelectedState_STANDBY) {
4560 bondport_mux_machine_waiting(p, LAEventStart, NULL);
4561 }
4562 break;
4563 default:
4564 break;
4565 }
4566 return;
4567}
4568
4569static void
4570bondport_mux_machine_waiting(bondport_ref p, LAEvent event,
4571 __unused void * event_data)
4572{
4573 struct timeval tv;
4574
4575 switch (event) {
4576 case LAEventStart:
4577 devtimer_cancel(p->po_wait_while_timer);
4578 if (g_bond->verbose) {
4579 timestamp_printf("[%s] Mux WAITING\n",
4580 bondport_get_name(p));
4581 }
4582 p->po_mux_state = MuxState_WAITING;
4583 /* FALL THROUGH */
4584 default:
4585 case LAEventSelectedChange:
4586 if (p->po_selected == SelectedState_UNSELECTED) {
4587 bondport_mux_machine_detached(p, LAEventStart, NULL);
4588 break;
4589 }
4590 if (p->po_selected == SelectedState_STANDBY) {
4591 devtimer_cancel(p->po_wait_while_timer);
4592 /* wait until state changes to SELECTED */
4593 if (g_bond->verbose) {
4594 timestamp_printf("[%s] Mux WAITING: Standby\n",
4595 bondport_get_name(p));
4596 }
4597 break;
4598 }
4599 if (bondport_flags_ready(p)) {
4600 if (g_bond->verbose) {
4601 timestamp_printf("[%s] Mux WAITING: Port is already ready\n",
4602 bondport_get_name(p));
4603 }
4604 break;
4605 }
4606 if (devtimer_enabled(p->po_wait_while_timer)) {
4607 if (g_bond->verbose) {
4608 timestamp_printf("[%s] Mux WAITING: Timer already set\n",
4609 bondport_get_name(p));
4610 }
4611 break;
4612 }
4613 if (ifbond_all_ports_attached(p->po_bond, p)) {
4614 devtimer_cancel(p->po_wait_while_timer);
4615 if (g_bond->verbose) {
4616 timestamp_printf("[%s] Mux WAITING: No waiting\n",
4617 bondport_get_name(p));
4618 }
4619 bondport_flags_set_ready(p);
4620 goto no_waiting;
4621 }
4622 if (g_bond->verbose) {
4623 timestamp_printf("[%s] Mux WAITING: 2 seconds\n",
4624 bondport_get_name(p));
4625 }
4626 tv.tv_sec = LACP_AGGREGATE_WAIT_TIME;
4627 tv.tv_usec = 0;
4628 devtimer_set_relative(p->po_wait_while_timer, tv,
4629 (devtimer_timeout_func)
4630 bondport_mux_machine_waiting,
4631 (void *)LAEventTimeout, NULL);
4632 break;
4633 case LAEventTimeout:
4634 if (g_bond->verbose) {
4635 timestamp_printf("[%s] Mux WAITING: Ready\n",
4636 bondport_get_name(p));
4637 }
4638 bondport_flags_set_ready(p);
4639 break;
4640 case LAEventReady:
4641 no_waiting:
4642 if (bondport_flags_ready(p)){
4643 if (g_bond->verbose) {
4644 timestamp_printf("[%s] Mux WAITING: All Ports Ready\n",
4645 bondport_get_name(p));
4646 }
4647 bondport_mux_machine_attached(p, LAEventStart, NULL);
4648 break;
4649 }
4650 break;
4651 }
4652 return;
4653}
4654
4655static void
4656bondport_mux_machine_attached(bondport_ref p, LAEvent event,
4657 __unused void * event_data)
4658{
4659 lacp_actor_partner_state s;
4660
4661 switch (event) {
4662 case LAEventStart:
4663 devtimer_cancel(p->po_wait_while_timer);
4664 if (g_bond->verbose) {
4665 timestamp_printf("[%s] Mux ATTACHED\n",
4666 bondport_get_name(p));
4667 }
4668 p->po_mux_state = MuxState_ATTACHED;
4669 bondport_AttachMuxToAggregator(p);
4670 s = p->po_actor_state;
4671 s = lacp_actor_partner_state_set_in_sync(s);
4672 s = lacp_actor_partner_state_set_not_collecting(s);
4673 s = lacp_actor_partner_state_set_not_distributing(s);
4674 bondport_disable_distributing(p);
4675 p->po_actor_state = s;
4676 bondport_flags_set_ntt(p);
4677 /* FALL THROUGH */
4678 default:
4679 switch (p->po_selected) {
4680 case SelectedState_SELECTED:
4681 s = p->po_partner_state.ps_state;
4682 if (lacp_actor_partner_state_in_sync(s)) {
4683 bondport_mux_machine_collecting_distributing(p, LAEventStart,
4684 NULL);
4685 }
4686 break;
4687 default:
4688 bondport_mux_machine_detached(p, LAEventStart, NULL);
4689 break;
4690 }
4691 break;
4692 }
4693 return;
4694}
4695
4696static void
4697bondport_mux_machine_collecting_distributing(bondport_ref p,
4698 LAEvent event,
4699 __unused void * event_data)
4700{
4701 lacp_actor_partner_state s;
4702
4703 switch (event) {
4704 case LAEventStart:
4705 devtimer_cancel(p->po_wait_while_timer);
4706 if (g_bond->verbose) {
4707 timestamp_printf("[%s] Mux COLLECTING_DISTRIBUTING\n",
4708 bondport_get_name(p));
4709 }
4710 p->po_mux_state = MuxState_COLLECTING_DISTRIBUTING;
4711 bondport_enable_distributing(p);
4712 s = p->po_actor_state;
4713 s = lacp_actor_partner_state_set_collecting(s);
4714 s = lacp_actor_partner_state_set_distributing(s);
4715 p->po_actor_state = s;
4716 bondport_flags_set_ntt(p);
4717 /* FALL THROUGH */
4718 default:
4719 s = p->po_partner_state.ps_state;
4720 if (lacp_actor_partner_state_in_sync(s) == 0) {
4721 bondport_mux_machine_attached(p, LAEventStart, NULL);
4722 break;
4723 }
4724 switch (p->po_selected) {
4725 case SelectedState_UNSELECTED:
4726 case SelectedState_STANDBY:
4727 bondport_mux_machine_attached(p, LAEventStart, NULL);
4728 break;
4729 default:
4730 break;
4731 }
4732 break;
4733 }
4734 return;
4735}