+ /* Main input thread cannot be terminated */
+ VERIFY(!(inp->input_waiting & DLIL_INPUT_TERMINATE));
+
+ proto_req = (inp->input_waiting &
+ (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER));
+
+ /* Packets for non-dedicated interfaces other than lo0 */
+ m_cnt = qlen(&inp->rcvq_pkts);
+ m = _getq_all(&inp->rcvq_pkts);
+
+ /* Packets exclusive to lo0 */
+ m_cnt_loop = qlen(&inpm->lo_rcvq_pkts);
+ m_loop = _getq_all(&inpm->lo_rcvq_pkts);
+
+ inp->wtot = 0;
+
+ lck_mtx_unlock(&inp->input_lck);
+
+ /*
+ * NOTE warning %%% attention !!!!
+ * We should think about putting some thread starvation
+ * safeguards if we deal with long chains of packets.
+ */
+ if (m_loop != NULL)
+ dlil_input_packet_list_extended(lo_ifp, m_loop,
+ m_cnt_loop, inp->mode);
+
+ if (m != NULL)
+ dlil_input_packet_list_extended(NULL, m,
+ m_cnt, inp->mode);
+
+ if (proto_req)
+ proto_input_run();
+ }
+
+ /* NOTREACHED */
+ VERIFY(0); /* we should never get here */
+}
+
+/*
+ * Input thread for interfaces with legacy input model.
+ */
+static void
+dlil_input_thread_func(void *v, wait_result_t w)
+{
+#pragma unused(w)
+ struct dlil_threading_info *inp = v;
+ struct ifnet *ifp = inp->ifp;
+
+ VERIFY(inp != dlil_main_input_thread);
+ VERIFY(ifp != NULL);
+ VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll);
+ VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
+
+ while (1) {
+ struct mbuf *m = NULL;
+ u_int32_t m_cnt;
+
+ lck_mtx_lock_spin(&inp->input_lck);
+
+ /* Wait until there is work to be done */
+ while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
+ inp->input_waiting &= ~DLIL_INPUT_RUNNING;
+ (void) msleep(&inp->input_waiting, &inp->input_lck,
+ (PZERO - 1) | PSPIN, inp->input_name, NULL);
+ }
+
+ inp->input_waiting |= DLIL_INPUT_RUNNING;
+ inp->input_waiting &= ~DLIL_INPUT_WAITING;
+
+ /*
+ * Protocol registration and injection must always use
+ * the main input thread; in theory the latter can utilize
+ * the corresponding input thread where the packet arrived
+ * on, but that requires our knowing the interface in advance
+ * (and the benefits might not worth the trouble.)
+ */
+ VERIFY(!(inp->input_waiting &
+ (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
+
+ /* Packets for this interface */
+ m_cnt = qlen(&inp->rcvq_pkts);
+ m = _getq_all(&inp->rcvq_pkts);
+
+ if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
+ lck_mtx_unlock(&inp->input_lck);
+
+ /* Free up pending packets */
+ if (m != NULL)
+ mbuf_freem_list(m);
+
+ dlil_terminate_input_thread(inp);
+ /* NOTREACHED */
+ return;
+ }
+
+ inp->wtot = 0;
+
+ dlil_input_stats_sync(ifp, inp);
+
+ lck_mtx_unlock(&inp->input_lck);
+
+ /*
+ * NOTE warning %%% attention !!!!
+ * We should think about putting some thread starvation
+ * safeguards if we deal with long chains of packets.
+ */
+ if (m != NULL)
+ dlil_input_packet_list_extended(NULL, m,
+ m_cnt, inp->mode);
+ }
+
+ /* NOTREACHED */
+ VERIFY(0); /* we should never get here */
+}
+
+/*
+ * Input thread for interfaces with opportunistic polling input model.
+ */
+static void
+dlil_rxpoll_input_thread_func(void *v, wait_result_t w)
+{
+#pragma unused(w)
+ struct dlil_threading_info *inp = v;
+ struct ifnet *ifp = inp->ifp;
+ struct timespec ts;
+
+ VERIFY(inp != dlil_main_input_thread);
+ VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL));
+
+ while (1) {
+ struct mbuf *m = NULL;
+ u_int32_t m_cnt, m_size, poll_req = 0;
+ ifnet_model_t mode;
+ struct timespec now, delta;
+ u_int64_t ival;
+
+ lck_mtx_lock_spin(&inp->input_lck);
+
+ if ((ival = inp->rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN)
+ ival = IF_RXPOLL_INTERVALTIME_MIN;
+
+ /* Link parameters changed? */
+ if (ifp->if_poll_update != 0) {
+ ifp->if_poll_update = 0;
+ (void) dlil_rxpoll_set_params(ifp, NULL, TRUE);
+ }
+
+ /* Current operating mode */
+ mode = inp->mode;
+
+ /* Wait until there is work to be done */
+ while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
+ inp->input_waiting &= ~DLIL_INPUT_RUNNING;
+ (void) msleep(&inp->input_waiting, &inp->input_lck,
+ (PZERO - 1) | PSPIN, inp->input_name, NULL);
+ }
+
+ inp->input_waiting |= DLIL_INPUT_RUNNING;
+ inp->input_waiting &= ~DLIL_INPUT_WAITING;
+
+ /*
+ * Protocol registration and injection must always use
+ * the main input thread; in theory the latter can utilize
+ * the corresponding input thread where the packet arrived
+ * on, but that requires our knowing the interface in advance
+ * (and the benefits might not worth the trouble.)
+ */
+ VERIFY(!(inp->input_waiting &
+ (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
+
+ if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
+ /* Free up pending packets */
+ _flushq(&inp->rcvq_pkts);
+ lck_mtx_unlock(&inp->input_lck);
+
+ dlil_terminate_input_thread(inp);
+ /* NOTREACHED */
+ return;
+ }
+
+ /* Total count of all packets */
+ m_cnt = qlen(&inp->rcvq_pkts);
+
+ /* Total bytes of all packets */
+ m_size = qsize(&inp->rcvq_pkts);
+
+ /* Packets for this interface */
+ m = _getq_all(&inp->rcvq_pkts);
+ VERIFY(m != NULL || m_cnt == 0);
+
+ nanouptime(&now);
+ if (!net_timerisset(&inp->sample_lasttime))
+ *(&inp->sample_lasttime) = *(&now);
+
+ net_timersub(&now, &inp->sample_lasttime, &delta);
+ if (if_rxpoll && net_timerisset(&inp->sample_holdtime)) {
+ u_int32_t ptot, btot;
+
+ /* Accumulate statistics for current sampling */
+ PKTCNTR_ADD(&inp->sstats, m_cnt, m_size);
+
+ if (net_timercmp(&delta, &inp->sample_holdtime, <))
+ goto skip;
+
+ *(&inp->sample_lasttime) = *(&now);
+
+ /* Calculate min/max of inbound bytes */
+ btot = (u_int32_t)inp->sstats.bytes;
+ if (inp->rxpoll_bmin == 0 || inp->rxpoll_bmin > btot)
+ inp->rxpoll_bmin = btot;
+ if (btot > inp->rxpoll_bmax)
+ inp->rxpoll_bmax = btot;
+
+ /* Calculate EWMA of inbound bytes */
+ DLIL_EWMA(inp->rxpoll_bavg, btot, if_rxpoll_decay);
+
+ /* Calculate min/max of inbound packets */
+ ptot = (u_int32_t)inp->sstats.packets;
+ if (inp->rxpoll_pmin == 0 || inp->rxpoll_pmin > ptot)
+ inp->rxpoll_pmin = ptot;
+ if (ptot > inp->rxpoll_pmax)
+ inp->rxpoll_pmax = ptot;
+
+ /* Calculate EWMA of inbound packets */
+ DLIL_EWMA(inp->rxpoll_pavg, ptot, if_rxpoll_decay);
+
+ /* Reset sampling statistics */
+ PKTCNTR_CLEAR(&inp->sstats);
+
+ /* Calculate EWMA of wakeup requests */
+ DLIL_EWMA(inp->rxpoll_wavg, inp->wtot, if_rxpoll_decay);
+ inp->wtot = 0;
+
+ if (dlil_verbose) {
+ if (!net_timerisset(&inp->dbg_lasttime))
+ *(&inp->dbg_lasttime) = *(&now);
+ net_timersub(&now, &inp->dbg_lasttime, &delta);
+ if (net_timercmp(&delta, &dlil_dbgrate, >=)) {
+ *(&inp->dbg_lasttime) = *(&now);
+ printf("%s: [%s] pkts avg %d max %d "
+ "limits [%d/%d], wreq avg %d "
+ "limits [%d/%d], bytes avg %d "
+ "limits [%d/%d]\n", if_name(ifp),
+ (inp->mode ==
+ IFNET_MODEL_INPUT_POLL_ON) ?
+ "ON" : "OFF", inp->rxpoll_pavg,
+ inp->rxpoll_pmax,
+ inp->rxpoll_plowat,
+ inp->rxpoll_phiwat,
+ inp->rxpoll_wavg,
+ inp->rxpoll_wlowat,
+ inp->rxpoll_whiwat,
+ inp->rxpoll_bavg,
+ inp->rxpoll_blowat,
+ inp->rxpoll_bhiwat);
+ }
+ }
+
+ /* Perform mode transition, if necessary */
+ if (!net_timerisset(&inp->mode_lasttime))
+ *(&inp->mode_lasttime) = *(&now);
+
+ net_timersub(&now, &inp->mode_lasttime, &delta);
+ if (net_timercmp(&delta, &inp->mode_holdtime, <))
+ goto skip;
+
+ if (inp->rxpoll_pavg <= inp->rxpoll_plowat &&
+ inp->rxpoll_bavg <= inp->rxpoll_blowat &&
+ inp->mode != IFNET_MODEL_INPUT_POLL_OFF) {
+ mode = IFNET_MODEL_INPUT_POLL_OFF;
+ } else if (inp->rxpoll_pavg >= inp->rxpoll_phiwat &&
+ (inp->rxpoll_bavg >= inp->rxpoll_bhiwat ||
+ inp->rxpoll_wavg >= inp->rxpoll_whiwat) &&
+ inp->mode != IFNET_MODEL_INPUT_POLL_ON) {
+ mode = IFNET_MODEL_INPUT_POLL_ON;
+ }
+
+ if (mode != inp->mode) {
+ inp->mode = mode;
+ *(&inp->mode_lasttime) = *(&now);
+ poll_req++;
+ }
+ }
+skip:
+ dlil_input_stats_sync(ifp, inp);
+
+ lck_mtx_unlock(&inp->input_lck);
+
+ /*
+ * If there's a mode change and interface is still attached,
+ * perform a downcall to the driver for the new mode. Also
+ * hold an IO refcnt on the interface to prevent it from
+ * being detached (will be release below.)
+ */
+ if (poll_req != 0 && ifnet_is_attached(ifp, 1)) {
+ struct ifnet_model_params p = { mode, { 0 } };
+ errno_t err;
+
+ if (dlil_verbose) {
+ printf("%s: polling is now %s, "
+ "pkts avg %d max %d limits [%d/%d], "
+ "wreq avg %d limits [%d/%d], "
+ "bytes avg %d limits [%d/%d]\n",
+ if_name(ifp),
+ (mode == IFNET_MODEL_INPUT_POLL_ON) ?
+ "ON" : "OFF", inp->rxpoll_pavg,
+ inp->rxpoll_pmax, inp->rxpoll_plowat,
+ inp->rxpoll_phiwat, inp->rxpoll_wavg,
+ inp->rxpoll_wlowat, inp->rxpoll_whiwat,
+ inp->rxpoll_bavg, inp->rxpoll_blowat,
+ inp->rxpoll_bhiwat);
+ }
+
+ if ((err = ((*ifp->if_input_ctl)(ifp,
+ IFNET_CTL_SET_INPUT_MODEL, sizeof (p), &p))) != 0) {
+ printf("%s: error setting polling mode "
+ "to %s (%d)\n", if_name(ifp),
+ (mode == IFNET_MODEL_INPUT_POLL_ON) ?
+ "ON" : "OFF", err);
+ }
+
+ switch (mode) {
+ case IFNET_MODEL_INPUT_POLL_OFF:
+ ifnet_set_poll_cycle(ifp, NULL);
+ inp->rxpoll_offreq++;
+ if (err != 0)
+ inp->rxpoll_offerr++;
+ break;
+
+ case IFNET_MODEL_INPUT_POLL_ON:
+ net_nsectimer(&ival, &ts);
+ ifnet_set_poll_cycle(ifp, &ts);
+ ifnet_poll(ifp);
+ inp->rxpoll_onreq++;
+ if (err != 0)
+ inp->rxpoll_onerr++;
+ break;
+
+ default:
+ VERIFY(0);
+ /* NOTREACHED */
+ }
+
+ /* Release the IO refcnt */
+ ifnet_decr_iorefcnt(ifp);
+ }
+
+ /*
+ * NOTE warning %%% attention !!!!
+ * We should think about putting some thread starvation
+ * safeguards if we deal with long chains of packets.
+ */
+ if (m != NULL)
+ dlil_input_packet_list_extended(NULL, m, m_cnt, mode);
+ }
+
+ /* NOTREACHED */
+ VERIFY(0); /* we should never get here */
+}
+
+/*
+ * Must be called on an attached ifnet (caller is expected to check.)
+ * Caller may pass NULL for poll parameters to indicate "auto-tuning."
+ */
+errno_t
+dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
+ boolean_t locked)
+{
+ struct dlil_threading_info *inp;
+ u_int64_t sample_holdtime, inbw;
+
+ VERIFY(ifp != NULL);
+ if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL)
+ return (ENXIO);
+
+ if (p != NULL) {
+ if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
+ (p->packets_lowat != 0 && p->packets_hiwat == 0))
+ return (EINVAL);
+ if (p->packets_lowat != 0 && /* hiwat must be non-zero */
+ p->packets_lowat >= p->packets_hiwat)
+ return (EINVAL);
+ if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
+ (p->bytes_lowat != 0 && p->bytes_hiwat == 0))
+ return (EINVAL);
+ if (p->bytes_lowat != 0 && /* hiwat must be non-zero */
+ p->bytes_lowat >= p->bytes_hiwat)
+ return (EINVAL);
+ if (p->interval_time != 0 &&
+ p->interval_time < IF_RXPOLL_INTERVALTIME_MIN)
+ p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
+ }
+
+ if (!locked)
+ lck_mtx_lock(&inp->input_lck);
+
+ lck_mtx_assert(&inp->input_lck, LCK_MTX_ASSERT_OWNED);
+
+ /*
+ * Normally, we'd reset the parameters to the auto-tuned values
+ * if the the input thread detects a change in link rate. If the
+ * driver provides its own parameters right after a link rate
+ * changes, but before the input thread gets to run, we want to
+ * make sure to keep the driver's values. Clearing if_poll_update
+ * will achieve that.
+ */
+ if (p != NULL && !locked && ifp->if_poll_update != 0)
+ ifp->if_poll_update = 0;
+
+ if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
+ sample_holdtime = 0; /* polling is disabled */
+ inp->rxpoll_wlowat = inp->rxpoll_plowat =
+ inp->rxpoll_blowat = 0;
+ inp->rxpoll_whiwat = inp->rxpoll_phiwat =
+ inp->rxpoll_bhiwat = (u_int32_t)-1;
+ inp->rxpoll_plim = 0;
+ inp->rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
+ } else {
+ u_int32_t plowat, phiwat, blowat, bhiwat, plim;
+ u_int64_t ival;
+ unsigned int n, i;
+
+ for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
+ if (inbw < rxpoll_tbl[i].speed)
+ break;
+ n = i;
+ }
+ /* auto-tune if caller didn't specify a value */
+ plowat = ((p == NULL || p->packets_lowat == 0) ?
+ rxpoll_tbl[n].plowat : p->packets_lowat);
+ phiwat = ((p == NULL || p->packets_hiwat == 0) ?
+ rxpoll_tbl[n].phiwat : p->packets_hiwat);
+ blowat = ((p == NULL || p->bytes_lowat == 0) ?
+ rxpoll_tbl[n].blowat : p->bytes_lowat);
+ bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
+ rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
+ plim = ((p == NULL || p->packets_limit == 0) ?
+ if_rxpoll_max : p->packets_limit);
+ ival = ((p == NULL || p->interval_time == 0) ?
+ if_rxpoll_interval_time : p->interval_time);
+
+ VERIFY(plowat != 0 && phiwat != 0);
+ VERIFY(blowat != 0 && bhiwat != 0);
+ VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
+
+ sample_holdtime = if_rxpoll_sample_holdtime;
+ inp->rxpoll_wlowat = if_rxpoll_wlowat;
+ inp->rxpoll_whiwat = if_rxpoll_whiwat;
+ inp->rxpoll_plowat = plowat;
+ inp->rxpoll_phiwat = phiwat;
+ inp->rxpoll_blowat = blowat;
+ inp->rxpoll_bhiwat = bhiwat;
+ inp->rxpoll_plim = plim;
+ inp->rxpoll_ival = ival;
+ }
+
+ net_nsectimer(&if_rxpoll_mode_holdtime, &inp->mode_holdtime);
+ net_nsectimer(&sample_holdtime, &inp->sample_holdtime);
+
+ if (dlil_verbose) {
+ printf("%s: speed %llu bps, sample per %llu nsec, "
+ "poll interval %llu nsec, pkts per poll %u, "
+ "pkt limits [%u/%u], wreq limits [%u/%u], "
+ "bytes limits [%u/%u]\n", if_name(ifp),
+ inbw, sample_holdtime, inp->rxpoll_ival, inp->rxpoll_plim,
+ inp->rxpoll_plowat, inp->rxpoll_phiwat, inp->rxpoll_wlowat,
+ inp->rxpoll_whiwat, inp->rxpoll_blowat, inp->rxpoll_bhiwat);
+ }
+
+ if (!locked)
+ lck_mtx_unlock(&inp->input_lck);
+
+ return (0);
+}
+
+/*
+ * Must be called on an attached ifnet (caller is expected to check.)
+ */
+errno_t
+dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
+{
+ struct dlil_threading_info *inp;
+
+ VERIFY(ifp != NULL && p != NULL);
+ if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL)
+ return (ENXIO);
+
+ bzero(p, sizeof (*p));
+
+ lck_mtx_lock(&inp->input_lck);
+ p->packets_limit = inp->rxpoll_plim;
+ p->packets_lowat = inp->rxpoll_plowat;
+ p->packets_hiwat = inp->rxpoll_phiwat;
+ p->bytes_lowat = inp->rxpoll_blowat;
+ p->bytes_hiwat = inp->rxpoll_bhiwat;
+ p->interval_time = inp->rxpoll_ival;
+ lck_mtx_unlock(&inp->input_lck);
+
+ return (0);
+}
+
+errno_t
+ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
+ const struct ifnet_stat_increment_param *s)
+{
+ return (ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE));
+}
+
+errno_t
+ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
+ struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
+{
+ return (ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE));
+}
+
+static errno_t
+ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
+ const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
+{
+ struct thread *tp = current_thread();
+ struct mbuf *last;
+ struct dlil_threading_info *inp;
+ u_int32_t m_cnt = 0, m_size = 0;
+
+ if ((m_head == NULL && !poll) || (s == NULL && ext)) {
+ if (m_head != NULL)
+ mbuf_freem_list(m_head);
+ return (EINVAL);
+ }
+
+ VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
+ VERIFY(m_tail == NULL || ext);
+ VERIFY(s != NULL || !ext);
+
+ /*
+ * Drop the packet(s) if the parameters are invalid, or if the
+ * interface is no longer attached; else hold an IO refcnt to
+ * prevent it from being detached (will be released below.)
+ */
+ if (ifp == NULL || (ifp != lo_ifp && !ifnet_is_attached(ifp, 1))) {
+ if (m_head != NULL)
+ mbuf_freem_list(m_head);
+ return (EINVAL);
+ }
+
+ if (m_tail == NULL) {
+ last = m_head;
+ while (m_head != NULL) {
+#if IFNET_INPUT_SANITY_CHK
+ if (dlil_input_sanity_check != 0)
+ DLIL_INPUT_CHECK(last, ifp);
+#endif /* IFNET_INPUT_SANITY_CHK */
+ m_cnt++;
+ m_size += m_length(last);
+ if (mbuf_nextpkt(last) == NULL)
+ break;
+ last = mbuf_nextpkt(last);
+ }
+ m_tail = last;
+ } else {
+#if IFNET_INPUT_SANITY_CHK
+ if (dlil_input_sanity_check != 0) {
+ last = m_head;
+ while (1) {
+ DLIL_INPUT_CHECK(last, ifp);
+ m_cnt++;
+ m_size += m_length(last);
+ if (mbuf_nextpkt(last) == NULL)
+ break;
+ last = mbuf_nextpkt(last);
+ }
+ } else {
+ m_cnt = s->packets_in;
+ m_size = s->bytes_in;
+ last = m_tail;
+ }
+#else
+ m_cnt = s->packets_in;
+ m_size = s->bytes_in;
+ last = m_tail;
+#endif /* IFNET_INPUT_SANITY_CHK */
+ }
+
+ if (last != m_tail) {
+ panic_plain("%s: invalid input packet chain for %s, "
+ "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
+ m_tail, last);
+ }
+
+ /*
+ * Assert packet count only for the extended variant, for backwards
+ * compatibility, since this came directly from the device driver.
+ * Relax this assertion for input bytes, as the driver may have
+ * included the link-layer headers in the computation; hence
+ * m_size is just an approximation.
+ */
+ if (ext && s->packets_in != m_cnt) {
+ panic_plain("%s: input packet count mismatch for %s, "
+ "%d instead of %d\n", __func__, if_name(ifp),
+ s->packets_in, m_cnt);
+ }
+
+ if ((inp = ifp->if_inp) == NULL)
+ inp = dlil_main_input_thread;
+
+ /*
+ * If there is a matching DLIL input thread associated with an
+ * affinity set, associate this thread with the same set. We
+ * will only do this once.
+ */
+ lck_mtx_lock_spin(&inp->input_lck);
+ if (inp != dlil_main_input_thread && inp->net_affinity &&
+ ((!poll && inp->wloop_thr == THREAD_NULL) ||
+ (poll && inp->poll_thr == THREAD_NULL))) {
+ u_int32_t tag = inp->tag;
+
+ if (poll) {
+ VERIFY(inp->poll_thr == THREAD_NULL);
+ inp->poll_thr = tp;
+ } else {
+ VERIFY(inp->wloop_thr == THREAD_NULL);
+ inp->wloop_thr = tp;
+ }
+ lck_mtx_unlock(&inp->input_lck);
+
+ /* Associate the current thread with the new affinity tag */
+ (void) dlil_affinity_set(tp, tag);
+
+ /*
+ * Take a reference on the current thread; during detach,
+ * we will need to refer to it in order ot tear down its
+ * affinity.
+ */
+ thread_reference(tp);
+ lck_mtx_lock_spin(&inp->input_lck);
+ }
+
+ VERIFY(m_head != NULL || (m_tail == NULL && m_cnt == 0));
+
+ /*
+ * Because of loopbacked multicast we cannot stuff the ifp in
+ * the rcvif of the packet header: loopback (lo0) packets use a
+ * dedicated list so that we can later associate them with lo_ifp
+ * on their way up the stack. Packets for other interfaces without
+ * dedicated input threads go to the regular list.
+ */
+ if (m_head != NULL) {
+ if (inp == dlil_main_input_thread && ifp == lo_ifp) {
+ struct dlil_main_threading_info *inpm =
+ (struct dlil_main_threading_info *)inp;
+ _addq_multi(&inpm->lo_rcvq_pkts, m_head, m_tail,
+ m_cnt, m_size);
+ } else {
+ _addq_multi(&inp->rcvq_pkts, m_head, m_tail,
+ m_cnt, m_size);
+ }
+ }
+
+#if IFNET_INPUT_SANITY_CHK
+ if (dlil_input_sanity_check != 0) {
+ u_int32_t count;
+ struct mbuf *m0;
+
+ for (m0 = m_head, count = 0; m0; m0 = mbuf_nextpkt(m0))
+ count++;
+
+ if (count != m_cnt) {
+ panic_plain("%s: invalid packet count %d "
+ "(expected %d)\n", if_name(ifp),
+ count, m_cnt);
+ /* NOTREACHED */
+ }
+
+ inp->input_mbuf_cnt += m_cnt;
+ }
+#endif /* IFNET_INPUT_SANITY_CHK */
+
+ if (s != NULL) {
+ dlil_input_stats_add(s, inp, poll);
+ /*
+ * If we're using the main input thread, synchronize the
+ * stats now since we have the interface context. All
+ * other cases involving dedicated input threads will
+ * have their stats synchronized there.
+ */
+ if (inp == dlil_main_input_thread)
+ dlil_input_stats_sync(ifp, inp);
+ }
+
+ inp->input_waiting |= DLIL_INPUT_WAITING;
+ if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
+ inp->wtot++;
+ wakeup_one((caddr_t)&inp->input_waiting);
+ }
+ lck_mtx_unlock(&inp->input_lck);
+
+ if (ifp != lo_ifp) {
+ /* Release the IO refcnt */
+ ifnet_decr_iorefcnt(ifp);
+ }
+
+ return (0);
+}
+
+static void
+ifnet_start_common(struct ifnet *ifp, int resetfc)
+{
+ if (!(ifp->if_eflags & IFEF_TXSTART))
+ return;
+ /*
+ * If the starter thread is inactive, signal it to do work,
+ * unless the interface is being flow controlled from below,
+ * e.g. a virtual interface being flow controlled by a real
+ * network interface beneath it.
+ */
+ lck_mtx_lock_spin(&ifp->if_start_lock);
+ if (resetfc) {
+ ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
+ } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
+ lck_mtx_unlock(&ifp->if_start_lock);
+ return;
+ }
+ ifp->if_start_req++;
+ if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL) {
+ wakeup_one((caddr_t)&ifp->if_start_thread);
+ }
+ lck_mtx_unlock(&ifp->if_start_lock);
+}
+
+void
+ifnet_start(struct ifnet *ifp)
+{
+ ifnet_start_common(ifp, 0);
+}
+
+static void
+ifnet_start_thread_fn(void *v, wait_result_t w)
+{
+#pragma unused(w)
+ struct ifnet *ifp = v;
+ char ifname[IFNAMSIZ + 1];
+ struct timespec *ts = NULL;
+ struct ifclassq *ifq = &ifp->if_snd;
+
+ /*
+ * Treat the dedicated starter thread for lo0 as equivalent to
+ * the driver workloop thread; if net_affinity is enabled for
+ * the main input thread, associate this starter thread to it
+ * by binding them with the same affinity tag. This is done
+ * only once (as we only have one lo_ifp which never goes away.)
+ */
+ if (ifp == lo_ifp) {
+ struct dlil_threading_info *inp = dlil_main_input_thread;
+ struct thread *tp = current_thread();
+
+ lck_mtx_lock(&inp->input_lck);
+ if (inp->net_affinity) {
+ u_int32_t tag = inp->tag;
+
+ VERIFY(inp->wloop_thr == THREAD_NULL);
+ VERIFY(inp->poll_thr == THREAD_NULL);
+ inp->wloop_thr = tp;
+ lck_mtx_unlock(&inp->input_lck);
+
+ /* Associate this thread with the affinity tag */
+ (void) dlil_affinity_set(tp, tag);
+ } else {
+ lck_mtx_unlock(&inp->input_lck);
+ }
+ }
+
+ snprintf(ifname, sizeof (ifname), "%s_starter",
+ if_name(ifp));
+
+ lck_mtx_lock_spin(&ifp->if_start_lock);
+
+ for (;;) {
+ (void) msleep(&ifp->if_start_thread, &ifp->if_start_lock,
+ (PZERO - 1) | PSPIN, ifname, ts);
+
+ /* interface is detached? */
+ if (ifp->if_start_thread == THREAD_NULL) {
+ ifnet_set_start_cycle(ifp, NULL);
+ lck_mtx_unlock(&ifp->if_start_lock);
+ ifnet_purge(ifp);
+
+ if (dlil_verbose) {
+ printf("%s: starter thread terminated\n",
+ if_name(ifp));
+ }
+
+ /* for the extra refcnt from kernel_thread_start() */
+ thread_deallocate(current_thread());
+ /* this is the end */
+ thread_terminate(current_thread());
+ /* NOTREACHED */
+ return;
+ }
+
+ ifp->if_start_active = 1;
+ for (;;) {
+ u_int32_t req = ifp->if_start_req;
+
+ lck_mtx_unlock(&ifp->if_start_lock);
+ /* invoke the driver's start routine */
+ ((*ifp->if_start)(ifp));
+ lck_mtx_lock_spin(&ifp->if_start_lock);
+
+ /* if there's no pending request, we're done */
+ if (req == ifp->if_start_req)
+ break;
+ }
+ ifp->if_start_req = 0;
+ ifp->if_start_active = 0;
+ /*
+ * Wakeup N ns from now if rate-controlled by TBR, and if
+ * there are still packets in the send queue which haven't
+ * been dequeued so far; else sleep indefinitely (ts = NULL)
+ * until ifnet_start() is called again.
+ */
+ ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
+ &ifp->if_start_cycle : NULL);
+
+ if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0)
+ ts = NULL;
+ }
+
+ /* NOTREACHED */
+}
+
+void
+ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
+{
+ if (ts == NULL)
+ bzero(&ifp->if_start_cycle, sizeof (ifp->if_start_cycle));
+ else
+ *(&ifp->if_start_cycle) = *ts;
+
+ if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
+ printf("%s: restart interval set to %lu nsec\n",
+ if_name(ifp), ts->tv_nsec);
+}
+
+static void
+ifnet_poll(struct ifnet *ifp)
+{
+ /*
+ * If the poller thread is inactive, signal it to do work.
+ */
+ lck_mtx_lock_spin(&ifp->if_poll_lock);
+ ifp->if_poll_req++;
+ if (!ifp->if_poll_active && ifp->if_poll_thread != THREAD_NULL) {
+ wakeup_one((caddr_t)&ifp->if_poll_thread);
+ }
+ lck_mtx_unlock(&ifp->if_poll_lock);
+}
+
+static void
+ifnet_poll_thread_fn(void *v, wait_result_t w)
+{
+#pragma unused(w)
+ struct dlil_threading_info *inp;
+ struct ifnet *ifp = v;
+ char ifname[IFNAMSIZ + 1];
+ struct timespec *ts = NULL;
+ struct ifnet_stat_increment_param s;
+
+ snprintf(ifname, sizeof (ifname), "%s_poller",
+ if_name(ifp));
+ bzero(&s, sizeof (s));
+
+ lck_mtx_lock_spin(&ifp->if_poll_lock);
+
+ inp = ifp->if_inp;
+ VERIFY(inp != NULL);
+
+ for (;;) {
+ if (ifp->if_poll_thread != THREAD_NULL) {
+ (void) msleep(&ifp->if_poll_thread, &ifp->if_poll_lock,
+ (PZERO - 1) | PSPIN, ifname, ts);
+ }
+
+ /* interface is detached (maybe while asleep)? */
+ if (ifp->if_poll_thread == THREAD_NULL) {
+ ifnet_set_poll_cycle(ifp, NULL);
+ lck_mtx_unlock(&ifp->if_poll_lock);
+
+ if (dlil_verbose) {
+ printf("%s: poller thread terminated\n",
+ if_name(ifp));
+ }
+
+ /* for the extra refcnt from kernel_thread_start() */
+ thread_deallocate(current_thread());
+ /* this is the end */
+ thread_terminate(current_thread());
+ /* NOTREACHED */
+ return;
+ }
+
+ ifp->if_poll_active = 1;
+ for (;;) {
+ struct mbuf *m_head, *m_tail;
+ u_int32_t m_lim, m_cnt, m_totlen;
+ u_int16_t req = ifp->if_poll_req;
+
+ lck_mtx_unlock(&ifp->if_poll_lock);
+
+ /*
+ * If no longer attached, there's nothing to do;
+ * else hold an IO refcnt to prevent the interface
+ * from being detached (will be released below.)
+ */
+ if (!ifnet_is_attached(ifp, 1)) {
+ lck_mtx_lock_spin(&ifp->if_poll_lock);
+ break;
+ }
+
+ m_lim = (inp->rxpoll_plim != 0) ? inp->rxpoll_plim :
+ MAX((qlimit(&inp->rcvq_pkts)),
+ (inp->rxpoll_phiwat << 2));
+
+ if (dlil_verbose > 1) {
+ printf("%s: polling up to %d pkts, "
+ "pkts avg %d max %d, wreq avg %d, "
+ "bytes avg %d\n",
+ if_name(ifp), m_lim,
+ inp->rxpoll_pavg, inp->rxpoll_pmax,
+ inp->rxpoll_wavg, inp->rxpoll_bavg);
+ }
+
+ /* invoke the driver's input poll routine */
+ ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
+ &m_cnt, &m_totlen));
+
+ if (m_head != NULL) {
+ VERIFY(m_tail != NULL && m_cnt > 0);
+
+ if (dlil_verbose > 1) {
+ printf("%s: polled %d pkts, "
+ "pkts avg %d max %d, wreq avg %d, "
+ "bytes avg %d\n",
+ if_name(ifp), m_cnt,
+ inp->rxpoll_pavg, inp->rxpoll_pmax,
+ inp->rxpoll_wavg, inp->rxpoll_bavg);
+ }
+
+ /* stats are required for extended variant */
+ s.packets_in = m_cnt;
+ s.bytes_in = m_totlen;
+
+ (void) ifnet_input_common(ifp, m_head, m_tail,
+ &s, TRUE, TRUE);
+ } else {
+ if (dlil_verbose > 1) {
+ printf("%s: no packets, "
+ "pkts avg %d max %d, wreq avg %d, "
+ "bytes avg %d\n",
+ if_name(ifp), inp->rxpoll_pavg,
+ inp->rxpoll_pmax, inp->rxpoll_wavg,
+ inp->rxpoll_bavg);
+ }
+
+ (void) ifnet_input_common(ifp, NULL, NULL,
+ NULL, FALSE, TRUE);
+ }
+
+ /* Release the io ref count */
+ ifnet_decr_iorefcnt(ifp);
+
+ lck_mtx_lock_spin(&ifp->if_poll_lock);
+
+ /* if there's no pending request, we're done */
+ if (req == ifp->if_poll_req)
+ break;
+ }
+ ifp->if_poll_req = 0;
+ ifp->if_poll_active = 0;
+
+ /*
+ * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
+ * until ifnet_poll() is called again.
+ */
+ ts = &ifp->if_poll_cycle;
+ if (ts->tv_sec == 0 && ts->tv_nsec == 0)
+ ts = NULL;
+ }
+
+ /* NOTREACHED */
+}
+
+void
+ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
+{
+ if (ts == NULL)
+ bzero(&ifp->if_poll_cycle, sizeof (ifp->if_poll_cycle));
+ else
+ *(&ifp->if_poll_cycle) = *ts;
+
+ if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
+ printf("%s: poll interval set to %lu nsec\n",
+ if_name(ifp), ts->tv_nsec);
+}
+
+void
+ifnet_purge(struct ifnet *ifp)
+{
+ if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART))
+ if_qflush(ifp, 0);
+}
+
+void
+ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
+{
+ IFCQ_LOCK_ASSERT_HELD(ifq);
+
+ if (!(IFCQ_IS_READY(ifq)))
+ return;
+
+ if (IFCQ_TBR_IS_ENABLED(ifq)) {
+ struct tb_profile tb = { ifq->ifcq_tbr.tbr_rate_raw,
+ ifq->ifcq_tbr.tbr_percent, 0 };
+ (void) ifclassq_tbr_set(ifq, &tb, FALSE);
+ }
+
+ ifclassq_update(ifq, ev);
+}
+
+void
+ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
+{
+ switch (ev) {
+ case CLASSQ_EV_LINK_BANDWIDTH:
+ if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL))
+ ifp->if_poll_update++;
+ break;
+
+ default:
+ break;
+ }
+}
+
+errno_t
+ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
+{
+ struct ifclassq *ifq;
+ u_int32_t omodel;
+ errno_t err;
+
+ if (ifp == NULL || (model != IFNET_SCHED_MODEL_DRIVER_MANAGED &&
+ model != IFNET_SCHED_MODEL_NORMAL))
+ return (EINVAL);
+ else if (!(ifp->if_eflags & IFEF_TXSTART))
+ return (ENXIO);
+
+ ifq = &ifp->if_snd;
+ IFCQ_LOCK(ifq);
+ omodel = ifp->if_output_sched_model;
+ ifp->if_output_sched_model = model;
+ if ((err = ifclassq_pktsched_setup(ifq)) != 0)
+ ifp->if_output_sched_model = omodel;
+ IFCQ_UNLOCK(ifq);
+
+ return (err);
+}
+
+errno_t
+ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
+{
+ if (ifp == NULL)
+ return (EINVAL);
+ else if (!(ifp->if_eflags & IFEF_TXSTART))
+ return (ENXIO);
+
+ ifclassq_set_maxlen(&ifp->if_snd, maxqlen);
+
+ return (0);
+}
+
+errno_t
+ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
+{
+ if (ifp == NULL || maxqlen == NULL)
+ return (EINVAL);
+ else if (!(ifp->if_eflags & IFEF_TXSTART))
+ return (ENXIO);
+
+ *maxqlen = ifclassq_get_maxlen(&ifp->if_snd);
+
+ return (0);
+}
+
+errno_t
+ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
+{
+ errno_t err;
+
+ if (ifp == NULL || pkts == NULL)
+ err = EINVAL;
+ else if (!(ifp->if_eflags & IFEF_TXSTART))
+ err = ENXIO;
+ else
+ err = ifclassq_get_len(&ifp->if_snd, MBUF_SC_UNSPEC,
+ pkts, NULL);
+
+ return (err);
+}
+
+errno_t
+ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
+ u_int32_t *pkts, u_int32_t *bytes)
+{
+ errno_t err;
+
+ if (ifp == NULL || !MBUF_VALID_SC(sc) ||
+ (pkts == NULL && bytes == NULL))
+ err = EINVAL;
+ else if (!(ifp->if_eflags & IFEF_TXSTART))
+ err = ENXIO;
+ else
+ err = ifclassq_get_len(&ifp->if_snd, sc, pkts, bytes);
+
+ return (err);
+}
+
+errno_t
+ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
+{
+ struct dlil_threading_info *inp;
+
+ if (ifp == NULL)
+ return (EINVAL);
+ else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
+ return (ENXIO);
+
+ if (maxqlen == 0)
+ maxqlen = if_rcvq_maxlen;
+ else if (maxqlen < IF_RCVQ_MINLEN)
+ maxqlen = IF_RCVQ_MINLEN;
+
+ inp = ifp->if_inp;
+ lck_mtx_lock(&inp->input_lck);
+ qlimit(&inp->rcvq_pkts) = maxqlen;
+ lck_mtx_unlock(&inp->input_lck);
+
+ return (0);
+}
+
+errno_t
+ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
+{
+ struct dlil_threading_info *inp;
+
+ if (ifp == NULL || maxqlen == NULL)
+ return (EINVAL);
+ else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
+ return (ENXIO);
+
+ inp = ifp->if_inp;
+ lck_mtx_lock(&inp->input_lck);
+ *maxqlen = qlimit(&inp->rcvq_pkts);
+ lck_mtx_unlock(&inp->input_lck);
+ return (0);
+}
+
+errno_t
+ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
+{
+ int error;
+
+ if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
+ m->m_nextpkt != NULL) {
+ if (m != NULL)
+ m_freem_list(m);
+ return (EINVAL);
+ } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
+ !(ifp->if_refflags & IFRF_ATTACHED)) {
+ /* flag tested without lock for performance */
+ m_freem(m);
+ return (ENXIO);
+ } else if (!(ifp->if_flags & IFF_UP)) {
+ m_freem(m);
+ return (ENETDOWN);
+ }
+
+ /* enqueue the packet */
+ error = ifclassq_enqueue(&ifp->if_snd, m);
+
+ /*
+ * Tell the driver to start dequeueing; do this even when the queue
+ * for the packet is suspended (EQSUSPENDED), as the driver could still
+ * be dequeueing from other unsuspended queues.
+ */
+ if (error == 0 || error == EQFULL || error == EQSUSPENDED)
+ ifnet_start(ifp);
+
+ return (error);
+}
+
+errno_t
+ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
+{
+ errno_t rc;
+ if (ifp == NULL || mp == NULL)
+ return (EINVAL);
+ else if (!(ifp->if_eflags & IFEF_TXSTART) ||
+ (ifp->if_output_sched_model != IFNET_SCHED_MODEL_NORMAL))
+ return (ENXIO);
+ if (!ifnet_is_attached(ifp, 1))
+ return (ENXIO);
+ rc = ifclassq_dequeue(&ifp->if_snd, 1, mp, NULL, NULL, NULL);
+ ifnet_decr_iorefcnt(ifp);
+
+ return (rc);
+}
+
+errno_t
+ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
+ struct mbuf **mp)
+{
+ errno_t rc;
+ if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc))
+ return (EINVAL);
+ else if (!(ifp->if_eflags & IFEF_TXSTART) ||
+ (ifp->if_output_sched_model != IFNET_SCHED_MODEL_DRIVER_MANAGED))
+ return (ENXIO);
+ if (!ifnet_is_attached(ifp, 1))
+ return (ENXIO);
+
+ rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, 1, mp, NULL, NULL, NULL);
+ ifnet_decr_iorefcnt(ifp);
+ return (rc);
+}
+
+errno_t
+ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t limit, struct mbuf **head,
+ struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
+{
+ errno_t rc;
+ if (ifp == NULL || head == NULL || limit < 1)
+ return (EINVAL);
+ else if (!(ifp->if_eflags & IFEF_TXSTART) ||
+ (ifp->if_output_sched_model != IFNET_SCHED_MODEL_NORMAL))
+ return (ENXIO);
+ if (!ifnet_is_attached(ifp, 1))
+ return (ENXIO);
+
+ rc = ifclassq_dequeue(&ifp->if_snd, limit, head, tail, cnt, len);
+ ifnet_decr_iorefcnt(ifp);
+ return (rc);
+}
+
+errno_t
+ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
+ u_int32_t limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
+ u_int32_t *len)
+{
+ errno_t rc;
+ if (ifp == NULL || head == NULL || limit < 1 || !MBUF_VALID_SC(sc))
+ return (EINVAL);
+ else if (!(ifp->if_eflags & IFEF_TXSTART) ||
+ (ifp->if_output_sched_model != IFNET_SCHED_MODEL_DRIVER_MANAGED))
+ return (ENXIO);
+ if (!ifnet_is_attached(ifp, 1))
+ return (ENXIO);
+ rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, limit, head,
+ tail, cnt, len);
+ ifnet_decr_iorefcnt(ifp);
+ return (rc);
+}
+
+errno_t
+ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
+ const struct sockaddr *dest, const char *dest_linkaddr,
+ const char *frame_type, u_int32_t *pre, u_int32_t *post)
+{
+ if (pre != NULL)
+ *pre = 0;
+ if (post != NULL)
+ *post = 0;
+
+ return (ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type));
+}
+
+static int
+dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
+ char **frame_header_p, protocol_family_t protocol_family)
+{
+ struct ifnet_filter *filter;
+
+ /*
+ * Pass the inbound packet to the interface filters
+ */
+ lck_mtx_lock_spin(&ifp->if_flt_lock);
+ /* prevent filter list from changing in case we drop the lock */
+ if_flt_monitor_busy(ifp);
+ TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
+ int result;
+
+ if (!filter->filt_skip && filter->filt_input != NULL &&
+ (filter->filt_protocol == 0 ||
+ filter->filt_protocol == protocol_family)) {
+ lck_mtx_unlock(&ifp->if_flt_lock);
+
+ result = (*filter->filt_input)(filter->filt_cookie,
+ ifp, protocol_family, m_p, frame_header_p);
+
+ lck_mtx_lock_spin(&ifp->if_flt_lock);
+ if (result != 0) {
+ /* we're done with the filter list */
+ if_flt_monitor_unbusy(ifp);
+ lck_mtx_unlock(&ifp->if_flt_lock);
+ return (result);
+ }
+ }
+ }
+ /* we're done with the filter list */
+ if_flt_monitor_unbusy(ifp);
+ lck_mtx_unlock(&ifp->if_flt_lock);
+
+ /*
+ * Strip away M_PROTO1 bit prior to sending packet up the stack as
+ * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
+ */
+ if (*m_p != NULL)
+ (*m_p)->m_flags &= ~M_PROTO1;
+
+ return (0);
+}
+
+static int
+dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
+ protocol_family_t protocol_family)
+{
+ struct ifnet_filter *filter;
+
+ /*
+ * Pass the outbound packet to the interface filters
+ */
+ lck_mtx_lock_spin(&ifp->if_flt_lock);
+ /* prevent filter list from changing in case we drop the lock */
+ if_flt_monitor_busy(ifp);
+ TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
+ int result;
+
+ if (!filter->filt_skip && filter->filt_output != NULL &&
+ (filter->filt_protocol == 0 ||
+ filter->filt_protocol == protocol_family)) {
+ lck_mtx_unlock(&ifp->if_flt_lock);
+
+ result = filter->filt_output(filter->filt_cookie, ifp,
+ protocol_family, m_p);
+
+ lck_mtx_lock_spin(&ifp->if_flt_lock);
+ if (result != 0) {
+ /* we're done with the filter list */
+ if_flt_monitor_unbusy(ifp);
+ lck_mtx_unlock(&ifp->if_flt_lock);
+ return (result);
+ }
+ }
+ }
+ /* we're done with the filter list */
+ if_flt_monitor_unbusy(ifp);
+ lck_mtx_unlock(&ifp->if_flt_lock);
+
+ return (0);
+}
+
+static void
+dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
+{
+ int error;
+
+ if (ifproto->proto_kpi == kProtoKPI_v1) {
+ /* Version 1 protocols get one packet at a time */
+ while (m != NULL) {
+ char * frame_header;
+ mbuf_t next_packet;
+
+ next_packet = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+ frame_header = m->m_pkthdr.pkt_hdr;
+ m->m_pkthdr.pkt_hdr = NULL;
+ error = (*ifproto->kpi.v1.input)(ifproto->ifp,
+ ifproto->protocol_family, m, frame_header);
+ if (error != 0 && error != EJUSTRETURN)
+ m_freem(m);
+ m = next_packet;
+ }
+ } else if (ifproto->proto_kpi == kProtoKPI_v2) {
+ /* Version 2 protocols support packet lists */
+ error = (*ifproto->kpi.v2.input)(ifproto->ifp,
+ ifproto->protocol_family, m);
+ if (error != 0 && error != EJUSTRETURN)
+ m_freem_list(m);
+ }
+ return;
+}
+
+static void
+dlil_input_stats_add(const struct ifnet_stat_increment_param *s,
+ struct dlil_threading_info *inp, boolean_t poll)
+{
+ struct ifnet_stat_increment_param *d = &inp->stats;
+
+ if (s->packets_in != 0)
+ d->packets_in += s->packets_in;
+ if (s->bytes_in != 0)
+ d->bytes_in += s->bytes_in;
+ if (s->errors_in != 0)
+ d->errors_in += s->errors_in;
+
+ if (s->packets_out != 0)
+ d->packets_out += s->packets_out;
+ if (s->bytes_out != 0)
+ d->bytes_out += s->bytes_out;
+ if (s->errors_out != 0)
+ d->errors_out += s->errors_out;
+
+ if (s->collisions != 0)
+ d->collisions += s->collisions;
+ if (s->dropped != 0)
+ d->dropped += s->dropped;
+
+ if (poll)
+ PKTCNTR_ADD(&inp->tstats, s->packets_in, s->bytes_in);
+}
+
+static void
+dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp)
+{
+ struct ifnet_stat_increment_param *s = &inp->stats;
+
+ /*
+ * Use of atomic operations is unavoidable here because
+ * these stats may also be incremented elsewhere via KPIs.
+ */
+ if (s->packets_in != 0) {
+ atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in);
+ s->packets_in = 0;
+ }
+ if (s->bytes_in != 0) {
+ atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in);
+ s->bytes_in = 0;
+ }
+ if (s->errors_in != 0) {
+ atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in);
+ s->errors_in = 0;
+ }
+
+ if (s->packets_out != 0) {
+ atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out);
+ s->packets_out = 0;
+ }
+ if (s->bytes_out != 0) {
+ atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out);
+ s->bytes_out = 0;
+ }
+ if (s->errors_out != 0) {
+ atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out);
+ s->errors_out = 0;
+ }
+
+ if (s->collisions != 0) {
+ atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions);
+ s->collisions = 0;
+ }
+ if (s->dropped != 0) {
+ atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped);
+ s->dropped = 0;
+ }
+ /*
+ * If we went over the threshold, notify NetworkStatistics.
+ */
+ if (ifp->if_data_threshold &&
+ (ifp->if_ibytes + ifp->if_obytes) - ifp->if_dt_bytes >
+ ifp->if_data_threshold) {
+ ifp->if_dt_bytes = ifp->if_ibytes + ifp->if_obytes;
+ nstat_ifnet_threshold_reached(ifp->if_index);
+ }
+ /*
+ * No need for atomic operations as they are modified here
+ * only from within the DLIL input thread context.
+ */
+ if (inp->tstats.packets != 0) {
+ inp->pstats.ifi_poll_packets += inp->tstats.packets;
+ inp->tstats.packets = 0;
+ }
+ if (inp->tstats.bytes != 0) {
+ inp->pstats.ifi_poll_bytes += inp->tstats.bytes;
+ inp->tstats.bytes = 0;
+ }
+}
+
+__private_extern__ void
+dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m)
+{
+ return (dlil_input_packet_list_common(ifp, m, 0,
+ IFNET_MODEL_INPUT_POLL_OFF, FALSE));
+}
+
+__private_extern__ void
+dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m,
+ u_int32_t cnt, ifnet_model_t mode)
+{
+ return (dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE));
+}
+
+static void
+dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m,
+ u_int32_t cnt, ifnet_model_t mode, boolean_t ext)
+{
+ int error = 0;
+ protocol_family_t protocol_family;
+ mbuf_t next_packet;
+ ifnet_t ifp = ifp_param;
+ char * frame_header;
+ struct if_proto * last_ifproto = NULL;
+ mbuf_t pkt_first = NULL;
+ mbuf_t * pkt_next = NULL;
+ u_int32_t poll_thresh = 0, poll_ival = 0;
+
+ KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START,0,0,0,0,0);
+
+ if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 &&
+ (poll_ival = if_rxpoll_interval_pkts) > 0)
+ poll_thresh = cnt;
+
+ while (m != NULL) {
+ struct if_proto *ifproto = NULL;
+ int iorefcnt = 0;
+ uint32_t pktf_mask; /* pkt flags to preserve */
+
+ if (ifp_param == NULL)
+ ifp = m->m_pkthdr.rcvif;
+
+ if ((ifp->if_eflags & IFEF_RXPOLL) && poll_thresh != 0 &&
+ poll_ival > 0 && (--poll_thresh % poll_ival) == 0)
+ ifnet_poll(ifp);
+
+ /* Check if this mbuf looks valid */
+ MBUF_INPUT_CHECK(m, ifp);
+
+ next_packet = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+ frame_header = m->m_pkthdr.pkt_hdr;
+ m->m_pkthdr.pkt_hdr = NULL;
+
+ /*
+ * Get an IO reference count if the interface is not
+ * loopback (lo0) and it is attached; lo0 never goes
+ * away, so optimize for that.
+ */
+ if (ifp != lo_ifp) {
+ if (!ifnet_is_attached(ifp, 1)) {
+ m_freem(m);
+ goto next;
+ }
+ iorefcnt = 1;
+ pktf_mask = 0;
+ } else {
+ /*
+ * If this arrived on lo0, preserve interface addr
+ * info to allow for connectivity between loopback
+ * and local interface addresses.
+ */
+ pktf_mask = (PKTF_LOOP|PKTF_IFAINFO);
+ }
+
+ /* make sure packet comes in clean */
+ m_classifier_init(m, pktf_mask);
+
+ ifp_inc_traffic_class_in(ifp, m);
+
+ /* find which protocol family this packet is for */
+ ifnet_lock_shared(ifp);
+ error = (*ifp->if_demux)(ifp, m, frame_header,
+ &protocol_family);
+ ifnet_lock_done(ifp);
+ if (error != 0) {
+ if (error == EJUSTRETURN)
+ goto next;
+ protocol_family = 0;
+ }
+
+ if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK) &&
+ !(m->m_pkthdr.pkt_flags & PKTF_LOOP))
+ dlil_input_cksum_dbg(ifp, m, frame_header,
+ protocol_family);
+
+ /*
+ * For partial checksum offload, we expect the driver to
+ * set the start offset indicating the start of the span
+ * that is covered by the hardware-computed checksum;
+ * adjust this start offset accordingly because the data
+ * pointer has been advanced beyond the link-layer header.
+ *
+ * Don't adjust if the interface is a bridge member, as
+ * the adjustment will occur from the context of the
+ * bridge interface during input.
+ */
+ if (ifp->if_bridge == NULL && (m->m_pkthdr.csum_flags &
+ (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
+ (CSUM_DATA_VALID | CSUM_PARTIAL)) {
+ int adj;
+
+ if (frame_header == NULL ||
+ frame_header < (char *)mbuf_datastart(m) ||
+ frame_header > (char *)m->m_data ||
+ (adj = (m->m_data - frame_header)) >
+ m->m_pkthdr.csum_rx_start) {
+ m->m_pkthdr.csum_data = 0;
+ m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
+ hwcksum_in_invalidated++;
+ } else {
+ m->m_pkthdr.csum_rx_start -= adj;
+ }
+ }
+
+ pktap_input(ifp, protocol_family, m, frame_header);
+
+ if (m->m_flags & (M_BCAST|M_MCAST))
+ atomic_add_64(&ifp->if_imcasts, 1);
+
+ /* run interface filters, exclude VLAN packets PR-3586856 */
+ if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
+ error = dlil_interface_filters_input(ifp, &m,
+ &frame_header, protocol_family);
+ if (error != 0) {
+ if (error != EJUSTRETURN)
+ m_freem(m);
+ goto next;
+ }
+ }
+ if (error != 0 || ((m->m_flags & M_PROMISC) != 0) ) {
+ m_freem(m);
+ goto next;
+ }
+
+ /* Lookup the protocol attachment to this interface */
+ if (protocol_family == 0) {
+ ifproto = NULL;
+ } else if (last_ifproto != NULL && last_ifproto->ifp == ifp &&
+ (last_ifproto->protocol_family == protocol_family)) {
+ VERIFY(ifproto == NULL);
+ ifproto = last_ifproto;
+ if_proto_ref(last_ifproto);
+ } else {
+ VERIFY(ifproto == NULL);
+ ifnet_lock_shared(ifp);
+ /* callee holds a proto refcnt upon success */
+ ifproto = find_attached_proto(ifp, protocol_family);
+ ifnet_lock_done(ifp);
+ }
+ if (ifproto == NULL) {
+ /* no protocol for this packet, discard */
+ m_freem(m);
+ goto next;
+ }
+ if (ifproto != last_ifproto) {
+ if (last_ifproto != NULL) {
+ /* pass up the list for the previous protocol */
+ dlil_ifproto_input(last_ifproto, pkt_first);
+ pkt_first = NULL;
+ if_proto_free(last_ifproto);
+ }
+ last_ifproto = ifproto;
+ if_proto_ref(ifproto);
+ }
+ /* extend the list */
+ m->m_pkthdr.pkt_hdr = frame_header;
+ if (pkt_first == NULL) {
+ pkt_first = m;
+ } else {
+ *pkt_next = m;
+ }
+ pkt_next = &m->m_nextpkt;
+
+next:
+ if (next_packet == NULL && last_ifproto != NULL) {
+ /* pass up the last list of packets */
+ dlil_ifproto_input(last_ifproto, pkt_first);
+ if_proto_free(last_ifproto);
+ last_ifproto = NULL;
+ }
+ if (ifproto != NULL) {
+ if_proto_free(ifproto);
+ ifproto = NULL;
+ }
+
+ m = next_packet;
+
+ /* update the driver's multicast filter, if needed */
+ if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
+ ifp->if_updatemcasts = 0;
+ if (iorefcnt == 1)
+ ifnet_decr_iorefcnt(ifp);
+ }
+
+ KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END,0,0,0,0,0);
+}
+
+errno_t
+if_mcasts_update(struct ifnet *ifp)
+{
+ errno_t err;
+
+ err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
+ if (err == EAFNOSUPPORT)
+ err = 0;
+ printf("%s: %s %d suspended link-layer multicast membership(s) "
+ "(err=%d)\n", if_name(ifp),
+ (err == 0 ? "successfully restored" : "failed to restore"),
+ ifp->if_updatemcasts, err);
+
+ /* just return success */
+ return (0);
+}
+
+
+#define TMP_IF_PROTO_ARR_SIZE 10
+static int
+dlil_event_internal(struct ifnet *ifp, struct kev_msg *event)
+{
+ struct ifnet_filter *filter = NULL;
+ struct if_proto *proto = NULL;
+ int if_proto_count = 0;
+ struct if_proto **tmp_ifproto_arr = NULL;
+ struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
+ int tmp_ifproto_arr_idx = 0;
+ bool tmp_malloc = false;
+
+ /* Get an io ref count if the interface is attached */
+ if (!ifnet_is_attached(ifp, 1))
+ goto done;
+
+ /*
+ * Pass the event to the interface filters
+ */
+ lck_mtx_lock_spin(&ifp->if_flt_lock);
+ /* prevent filter list from changing in case we drop the lock */
+ if_flt_monitor_busy(ifp);
+ TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
+ if (filter->filt_event != NULL) {
+ lck_mtx_unlock(&ifp->if_flt_lock);
+
+ filter->filt_event(filter->filt_cookie, ifp,
+ filter->filt_protocol, event);
+
+ lck_mtx_lock_spin(&ifp->if_flt_lock);
+ }
+ }
+ /* we're done with the filter list */
+ if_flt_monitor_unbusy(ifp);
+ lck_mtx_unlock(&ifp->if_flt_lock);
+
+ /*
+ * An embedded tmp_list_entry in if_proto may still get
+ * over-written by another thread after giving up ifnet lock,
+ * therefore we are avoiding embedded pointers here.
+ */
+ ifnet_lock_shared(ifp);
+ if_proto_count = dlil_ifp_proto_count(ifp);
+ if (if_proto_count) {
+ int i;
+ VERIFY(ifp->if_proto_hash != NULL);
+ if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
+ tmp_ifproto_arr = tmp_ifproto_stack_arr;
+ } else {
+ MALLOC(tmp_ifproto_arr, struct if_proto **,
+ sizeof (*tmp_ifproto_arr) * if_proto_count,
+ M_TEMP, M_ZERO);
+ if (tmp_ifproto_arr == NULL) {
+ ifnet_lock_done(ifp);
+ goto cleanup;
+ }
+ tmp_malloc = true;
+ }
+
+ for (i = 0; i < PROTO_HASH_SLOTS; i++) {
+ SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
+ next_hash) {
+ if_proto_ref(proto);
+ tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
+ tmp_ifproto_arr_idx++;
+ }
+ }
+ VERIFY(if_proto_count == tmp_ifproto_arr_idx);
+ }
+ ifnet_lock_done(ifp);
+
+ for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
+ tmp_ifproto_arr_idx++) {
+ proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
+ VERIFY(proto != NULL);
+ proto_media_event eventp =
+ (proto->proto_kpi == kProtoKPI_v1 ?
+ proto->kpi.v1.event :
+ proto->kpi.v2.event);
+
+ if (eventp != NULL) {
+ eventp(ifp, proto->protocol_family,
+ event);
+ }
+ if_proto_free(proto);
+ }
+
+cleanup:
+ if (tmp_malloc) {
+ FREE(tmp_ifproto_arr, M_TEMP);
+ }
+
+ /* Pass the event to the interface */
+ if (ifp->if_event != NULL)
+ ifp->if_event(ifp, event);
+
+ /* Release the io ref count */
+ ifnet_decr_iorefcnt(ifp);
+done:
+ return (kev_post_msg(event));
+}
+
+errno_t
+ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
+{
+ struct kev_msg kev_msg;
+ int result = 0;
+
+ if (ifp == NULL || event == NULL)
+ return (EINVAL);
+
+ bzero(&kev_msg, sizeof (kev_msg));
+ kev_msg.vendor_code = event->vendor_code;
+ kev_msg.kev_class = event->kev_class;
+ kev_msg.kev_subclass = event->kev_subclass;
+ kev_msg.event_code = event->event_code;
+ kev_msg.dv[0].data_ptr = &event->event_data[0];
+ kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
+ kev_msg.dv[1].data_length = 0;
+
+ result = dlil_event_internal(ifp, &kev_msg);
+
+ return (result);
+}
+
+#if CONFIG_MACF_NET
+#include <netinet/ip6.h>
+#include <netinet/ip.h>
+static int
+dlil_get_socket_type(struct mbuf **mp, int family, int raw)
+{
+ struct mbuf *m;
+ struct ip *ip;
+ struct ip6_hdr *ip6;
+ int type = SOCK_RAW;
+
+ if (!raw) {
+ switch (family) {
+ case PF_INET:
+ m = m_pullup(*mp, sizeof(struct ip));
+ if (m == NULL)
+ break;
+ *mp = m;
+ ip = mtod(m, struct ip *);
+ if (ip->ip_p == IPPROTO_TCP)
+ type = SOCK_STREAM;
+ else if (ip->ip_p == IPPROTO_UDP)
+ type = SOCK_DGRAM;
+ break;
+ case PF_INET6:
+ m = m_pullup(*mp, sizeof(struct ip6_hdr));
+ if (m == NULL)
+ break;
+ *mp = m;
+ ip6 = mtod(m, struct ip6_hdr *);
+ if (ip6->ip6_nxt == IPPROTO_TCP)
+ type = SOCK_STREAM;
+ else if (ip6->ip6_nxt == IPPROTO_UDP)
+ type = SOCK_DGRAM;
+ break;
+ }
+ }
+
+ return (type);
+}
+#endif
+
+/*
+ * This is mostly called from the context of the DLIL input thread;
+ * because of that there is no need for atomic operations.
+ */
+static __inline void
+ifp_inc_traffic_class_in(struct ifnet *ifp, struct mbuf *m)
+{
+ if (!(m->m_flags & M_PKTHDR))
+ return;
+
+ switch (m_get_traffic_class(m)) {
+ case MBUF_TC_BE:
+ ifp->if_tc.ifi_ibepackets++;
+ ifp->if_tc.ifi_ibebytes += m->m_pkthdr.len;
+ break;
+ case MBUF_TC_BK:
+ ifp->if_tc.ifi_ibkpackets++;
+ ifp->if_tc.ifi_ibkbytes += m->m_pkthdr.len;
+ break;
+ case MBUF_TC_VI:
+ ifp->if_tc.ifi_ivipackets++;
+ ifp->if_tc.ifi_ivibytes += m->m_pkthdr.len;
+ break;
+ case MBUF_TC_VO:
+ ifp->if_tc.ifi_ivopackets++;
+ ifp->if_tc.ifi_ivobytes += m->m_pkthdr.len;
+ break;
+ default:
+ break;
+ }
+
+ if (mbuf_is_traffic_class_privileged(m)) {
+ ifp->if_tc.ifi_ipvpackets++;
+ ifp->if_tc.ifi_ipvbytes += m->m_pkthdr.len;
+ }
+}
+
+/*
+ * This is called from DLIL output, hence multiple threads could end
+ * up modifying the statistics. We trade off acccuracy for performance
+ * by not using atomic operations here.
+ */
+static __inline void
+ifp_inc_traffic_class_out(struct ifnet *ifp, struct mbuf *m)
+{
+ if (!(m->m_flags & M_PKTHDR))
+ return;
+
+ switch (m_get_traffic_class(m)) {
+ case MBUF_TC_BE:
+ ifp->if_tc.ifi_obepackets++;
+ ifp->if_tc.ifi_obebytes += m->m_pkthdr.len;
+ break;
+ case MBUF_TC_BK:
+ ifp->if_tc.ifi_obkpackets++;
+ ifp->if_tc.ifi_obkbytes += m->m_pkthdr.len;
+ break;
+ case MBUF_TC_VI:
+ ifp->if_tc.ifi_ovipackets++;
+ ifp->if_tc.ifi_ovibytes += m->m_pkthdr.len;
+ break;
+ case MBUF_TC_VO:
+ ifp->if_tc.ifi_ovopackets++;
+ ifp->if_tc.ifi_ovobytes += m->m_pkthdr.len;
+ break;
+ default:
+ break;
+ }
+
+ if (mbuf_is_traffic_class_privileged(m)) {
+ ifp->if_tc.ifi_opvpackets++;
+ ifp->if_tc.ifi_opvbytes += m->m_pkthdr.len;
+ }
+}
+
+/*
+ * dlil_output
+ *
+ * Caller should have a lock on the protocol domain if the protocol
+ * doesn't support finer grained locking. In most cases, the lock
+ * will be held from the socket layer and won't be released until
+ * we return back to the socket layer.
+ *
+ * This does mean that we must take a protocol lock before we take
+ * an interface lock if we're going to take both. This makes sense
+ * because a protocol is likely to interact with an ifp while it
+ * is under the protocol lock.
+ *
+ * An advisory code will be returned if adv is not null. This
+ * can be used to provide feedback about interface queues to the
+ * application.
+ */
+errno_t
+dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
+ void *route, const struct sockaddr *dest, int raw, struct flowadv *adv)
+{
+ char *frame_type = NULL;
+ char *dst_linkaddr = NULL;
+ int retval = 0;
+ char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
+ char dst_linkaddr_buffer[MAX_LINKADDR * 4];
+ struct if_proto *proto = NULL;
+ mbuf_t m;
+ mbuf_t send_head = NULL;
+ mbuf_t *send_tail = &send_head;
+ int iorefcnt = 0;
+ u_int32_t pre = 0, post = 0;
+ u_int32_t fpkts = 0, fbytes = 0;
+ int32_t flen = 0;
+
+ KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
+
+ /* Get an io refcnt if the interface is attached to prevent ifnet_detach
+ * from happening while this operation is in progress */
+ if (!ifnet_is_attached(ifp, 1)) {
+ retval = ENXIO;
+ goto cleanup;
+ }
+ iorefcnt = 1;
+
+ /* update the driver's multicast filter, if needed */
+ if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
+ ifp->if_updatemcasts = 0;
+
+ frame_type = frame_type_buffer;
+ dst_linkaddr = dst_linkaddr_buffer;
+
+ if (raw == 0) {
+ ifnet_lock_shared(ifp);
+ /* callee holds a proto refcnt upon success */
+ proto = find_attached_proto(ifp, proto_family);
+ if (proto == NULL) {
+ ifnet_lock_done(ifp);
+ retval = ENXIO;
+ goto cleanup;
+ }
+ ifnet_lock_done(ifp);
+ }
+
+preout_again:
+ if (packetlist == NULL)
+ goto cleanup;
+
+ m = packetlist;
+ packetlist = packetlist->m_nextpkt;
+ m->m_nextpkt = NULL;
+
+ if (raw == 0) {
+ proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
+ proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
+ retval = 0;
+ if (preoutp != NULL) {
+ retval = preoutp(ifp, proto_family, &m, dest, route,
+ frame_type, dst_linkaddr);
+
+ if (retval != 0) {
+ if (retval == EJUSTRETURN)
+ goto preout_again;
+ m_freem(m);
+ goto cleanup;
+ }
+ }
+ }
+
+#if CONFIG_MACF_NET
+ retval = mac_ifnet_check_transmit(ifp, m, proto_family,
+ dlil_get_socket_type(&m, proto_family, raw));
+ if (retval != 0) {
+ m_freem(m);
+ goto cleanup;
+ }
+#endif
+
+ do {
+#if CONFIG_DTRACE
+ if (!raw && proto_family == PF_INET) {
+ struct ip *ip = mtod(m, struct ip*);
+ DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
+ struct ip *, ip, struct ifnet *, ifp,
+ struct ip *, ip, struct ip6_hdr *, NULL);
+
+ } else if (!raw && proto_family == PF_INET6) {
+ struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr*);
+ DTRACE_IP6(send, struct mbuf*, m, struct inpcb *, NULL,
+ struct ip6_hdr *, ip6, struct ifnet*, ifp,
+ struct ip*, NULL, struct ip6_hdr *, ip6);
+ }
+#endif /* CONFIG_DTRACE */
+
+ if (raw == 0 && ifp->if_framer != NULL) {
+ int rcvif_set = 0;
+
+ /*
+ * If this is a broadcast packet that needs to be
+ * looped back into the system, set the inbound ifp
+ * to that of the outbound ifp. This will allow
+ * us to determine that it is a legitimate packet
+ * for the system. Only set the ifp if it's not
+ * already set, just to be safe.
+ */
+ if ((m->m_flags & (M_BCAST | M_LOOP)) &&
+ m->m_pkthdr.rcvif == NULL) {
+ m->m_pkthdr.rcvif = ifp;
+ rcvif_set = 1;
+ }
+
+ retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
+ frame_type, &pre, &post);
+ if (retval != 0) {
+ if (retval != EJUSTRETURN)
+ m_freem(m);
+ goto next;
+ }
+
+ /*
+ * For partial checksum offload, adjust the start
+ * and stuff offsets based on the prepended header.
+ */
+ if ((m->m_pkthdr.csum_flags &
+ (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
+ (CSUM_DATA_VALID | CSUM_PARTIAL)) {
+ m->m_pkthdr.csum_tx_stuff += pre;
+ m->m_pkthdr.csum_tx_start += pre;
+ }
+
+ if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK))
+ dlil_output_cksum_dbg(ifp, m, pre,
+ proto_family);
+
+ /*
+ * Clear the ifp if it was set above, and to be
+ * safe, only if it is still the same as the
+ * outbound ifp we have in context. If it was
+ * looped back, then a copy of it was sent to the
+ * loopback interface with the rcvif set, and we
+ * are clearing the one that will go down to the
+ * layer below.
+ */
+ if (rcvif_set && m->m_pkthdr.rcvif == ifp)
+ m->m_pkthdr.rcvif = NULL;
+ }
+
+ /*
+ * Let interface filters (if any) do their thing ...
+ */
+ /* Do not pass VLAN tagged packets to filters PR-3586856 */
+ if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
+ retval = dlil_interface_filters_output(ifp,
+ &m, proto_family);
+ if (retval != 0) {
+ if (retval != EJUSTRETURN)
+ m_freem(m);
+ goto next;
+ }
+ }
+ /*
+ * Strip away M_PROTO1 bit prior to sending packet
+ * to the driver as this field may be used by the driver
+ */
+ m->m_flags &= ~M_PROTO1;
+
+ /*
+ * If the underlying interface is not capable of handling a
+ * packet whose data portion spans across physically disjoint
+ * pages, we need to "normalize" the packet so that we pass
+ * down a chain of mbufs where each mbuf points to a span that
+ * resides in the system page boundary. If the packet does
+ * not cross page(s), the following is a no-op.
+ */
+ if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) {
+ if ((m = m_normalize(m)) == NULL)
+ goto next;
+ }
+
+ /*
+ * If this is a TSO packet, make sure the interface still
+ * advertise TSO capability.
+ */
+ if (TSO_IPV4_NOTOK(ifp, m) || TSO_IPV6_NOTOK(ifp, m)) {
+ retval = EMSGSIZE;
+ m_freem(m);
+ goto cleanup;
+ }
+
+ /*
+ * If the packet service class is not background,
+ * update the timestamp to indicate recent activity
+ * on a foreground socket.
+ */
+ if (!(m->m_pkthdr.pkt_flags & PKTF_SO_BACKGROUND) &&
+ (m->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
+ m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB)
+ ifp->if_fg_sendts = net_uptime();
+
+ ifp_inc_traffic_class_out(ifp, m);
+ pktap_output(ifp, proto_family, m, pre, post);
+
+ /*
+ * Finally, call the driver.
+ */
+ if (ifp->if_eflags & IFEF_SENDLIST) {
+ if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
+ flen += (m_pktlen(m) - (pre + post));
+ m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
+ }
+ *send_tail = m;
+ send_tail = &m->m_nextpkt;
+ } else {
+ if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
+ flen = (m_pktlen(m) - (pre + post));
+ m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
+ } else {
+ flen = 0;
+ }
+ KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
+ 0, 0, 0, 0, 0);
+ retval = (*ifp->if_output)(ifp, m);
+ if (retval == EQFULL || retval == EQSUSPENDED) {
+ if (adv != NULL && adv->code == FADV_SUCCESS) {
+ adv->code = (retval == EQFULL ?
+ FADV_FLOW_CONTROLLED :
+ FADV_SUSPENDED);
+ }
+ retval = 0;
+ }
+ if (retval == 0 && flen > 0) {
+ fbytes += flen;
+ fpkts++;
+ }
+ if (retval != 0 && dlil_verbose) {
+ printf("%s: output error on %s retval = %d\n",
+ __func__, if_name(ifp),
+ retval);
+ }
+ KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
+ 0, 0, 0, 0, 0);
+ }
+ KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
+
+next:
+ m = packetlist;
+ if (m != NULL) {
+ packetlist = packetlist->m_nextpkt;
+ m->m_nextpkt = NULL;
+ }
+ } while (m != NULL);
+
+ if (send_head != NULL) {
+ VERIFY(ifp->if_eflags & IFEF_SENDLIST);
+ KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
+ 0, 0, 0, 0, 0);
+ retval = (*ifp->if_output)(ifp, send_head);
+ if (retval == EQFULL || retval == EQSUSPENDED) {
+ if (adv != NULL) {
+ adv->code = (retval == EQFULL ?
+ FADV_FLOW_CONTROLLED : FADV_SUSPENDED);
+ }
+ retval = 0;
+ }
+ if (retval == 0 && flen > 0) {
+ fbytes += flen;
+ fpkts++;
+ }
+ if (retval != 0 && dlil_verbose) {
+ printf("%s: output error on %s retval = %d\n",
+ __func__, if_name(ifp), retval);
+ }
+ KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
+ }
+
+ KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
+
+cleanup:
+ if (fbytes > 0)
+ ifp->if_fbytes += fbytes;
+ if (fpkts > 0)
+ ifp->if_fpackets += fpkts;
+ if (proto != NULL)
+ if_proto_free(proto);
+ if (packetlist) /* if any packets are left, clean up */
+ mbuf_freem_list(packetlist);
+ if (retval == EJUSTRETURN)
+ retval = 0;
+ if (iorefcnt == 1)
+ ifnet_decr_iorefcnt(ifp);
+
+ return (retval);
+}
+
+errno_t
+ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
+ void *ioctl_arg)
+{
+ struct ifnet_filter *filter;
+ int retval = EOPNOTSUPP;
+ int result = 0;
+
+ if (ifp == NULL || ioctl_code == 0)
+ return (EINVAL);
+
+ /* Get an io ref count if the interface is attached */
+ if (!ifnet_is_attached(ifp, 1))
+ return (EOPNOTSUPP);
+
+ /* Run the interface filters first.
+ * We want to run all filters before calling the protocol,
+ * interface family, or interface.
+ */
+ lck_mtx_lock_spin(&ifp->if_flt_lock);
+ /* prevent filter list from changing in case we drop the lock */
+ if_flt_monitor_busy(ifp);
+ TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
+ if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
+ filter->filt_protocol == proto_fam)) {
+ lck_mtx_unlock(&ifp->if_flt_lock);
+
+ result = filter->filt_ioctl(filter->filt_cookie, ifp,
+ proto_fam, ioctl_code, ioctl_arg);
+
+ lck_mtx_lock_spin(&ifp->if_flt_lock);
+
+ /* Only update retval if no one has handled the ioctl */
+ if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
+ if (result == ENOTSUP)
+ result = EOPNOTSUPP;
+ retval = result;
+ if (retval != 0 && retval != EOPNOTSUPP) {
+ /* we're done with the filter list */
+ if_flt_monitor_unbusy(ifp);
+ lck_mtx_unlock(&ifp->if_flt_lock);
+ goto cleanup;
+ }
+ }
+ }
+ }
+ /* we're done with the filter list */
+ if_flt_monitor_unbusy(ifp);
+ lck_mtx_unlock(&ifp->if_flt_lock);
+
+ /* Allow the protocol to handle the ioctl */
+ if (proto_fam != 0) {
+ struct if_proto *proto;
+
+ /* callee holds a proto refcnt upon success */
+ ifnet_lock_shared(ifp);
+ proto = find_attached_proto(ifp, proto_fam);
+ ifnet_lock_done(ifp);
+ if (proto != NULL) {
+ proto_media_ioctl ioctlp =
+ (proto->proto_kpi == kProtoKPI_v1 ?
+ proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
+ result = EOPNOTSUPP;
+ if (ioctlp != NULL)
+ result = ioctlp(ifp, proto_fam, ioctl_code,
+ ioctl_arg);
+ if_proto_free(proto);
+
+ /* Only update retval if no one has handled the ioctl */
+ if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
+ if (result == ENOTSUP)
+ result = EOPNOTSUPP;
+ retval = result;
+ if (retval && retval != EOPNOTSUPP)
+ goto cleanup;
+ }
+ }
+ }
+
+ /* retval is either 0 or EOPNOTSUPP */
+
+ /*
+ * Let the interface handle this ioctl.
+ * If it returns EOPNOTSUPP, ignore that, we may have
+ * already handled this in the protocol or family.
+ */
+ if (ifp->if_ioctl)
+ result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
+
+ /* Only update retval if no one has handled the ioctl */
+ if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
+ if (result == ENOTSUP)
+ result = EOPNOTSUPP;
+ retval = result;
+ if (retval && retval != EOPNOTSUPP) {
+ goto cleanup;
+ }
+ }
+
+cleanup:
+ if (retval == EJUSTRETURN)
+ retval = 0;
+
+ ifnet_decr_iorefcnt(ifp);
+
+ return (retval);
+}
+
+__private_extern__ errno_t
+dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
+{
+ errno_t error = 0;
+
+
+ if (ifp->if_set_bpf_tap) {
+ /* Get an io reference on the interface if it is attached */
+ if (!ifnet_is_attached(ifp, 1))
+ return ENXIO;
+ error = ifp->if_set_bpf_tap(ifp, mode, callback);
+ ifnet_decr_iorefcnt(ifp);
+ }
+ return (error);
+}
+
+errno_t
+dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
+ struct sockaddr *ll_addr, size_t ll_len)
+{
+ errno_t result = EOPNOTSUPP;
+ struct if_proto *proto;
+ const struct sockaddr *verify;
+ proto_media_resolve_multi resolvep;
+
+ if (!ifnet_is_attached(ifp, 1))
+ return result;
+
+ bzero(ll_addr, ll_len);
+
+ /* Call the protocol first; callee holds a proto refcnt upon success */
+ ifnet_lock_shared(ifp);
+ proto = find_attached_proto(ifp, proto_addr->sa_family);
+ ifnet_lock_done(ifp);
+ if (proto != NULL) {
+ resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
+ proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
+ if (resolvep != NULL)
+ result = resolvep(ifp, proto_addr,
+ (struct sockaddr_dl*)(void *)ll_addr, ll_len);
+ if_proto_free(proto);
+ }
+
+ /* Let the interface verify the multicast address */
+ if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
+ if (result == 0)
+ verify = ll_addr;
+ else
+ verify = proto_addr;
+ result = ifp->if_check_multi(ifp, verify);
+ }
+
+ ifnet_decr_iorefcnt(ifp);
+ return (result);
+}
+
+__private_extern__ errno_t
+dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
+ const struct sockaddr_dl* sender_hw, const struct sockaddr* sender_proto,
+ const struct sockaddr_dl* target_hw, const struct sockaddr* target_proto)
+{
+ struct if_proto *proto;
+ errno_t result = 0;
+
+ /* callee holds a proto refcnt upon success */
+ ifnet_lock_shared(ifp);
+ proto = find_attached_proto(ifp, target_proto->sa_family);
+ ifnet_lock_done(ifp);
+ if (proto == NULL) {
+ result = ENOTSUP;
+ } else {
+ proto_media_send_arp arpp;
+ arpp = (proto->proto_kpi == kProtoKPI_v1 ?
+ proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
+ if (arpp == NULL) {
+ result = ENOTSUP;
+ } else {
+ switch (arpop) {
+ case ARPOP_REQUEST:
+ arpstat.txrequests++;
+ if (target_hw != NULL)
+ arpstat.txurequests++;
+ break;
+ case ARPOP_REPLY:
+ arpstat.txreplies++;
+ break;
+ }
+ result = arpp(ifp, arpop, sender_hw, sender_proto,
+ target_hw, target_proto);
+ }
+ if_proto_free(proto);
+ }
+
+ return (result);
+}
+
+struct net_thread_marks { };
+static const struct net_thread_marks net_thread_marks_base = { };
+
+__private_extern__ const net_thread_marks_t net_thread_marks_none =
+ &net_thread_marks_base;
+
+__private_extern__ net_thread_marks_t
+net_thread_marks_push(u_int32_t push)
+{
+ static const char *const base = (const void*)&net_thread_marks_base;
+ u_int32_t pop = 0;
+
+ if (push != 0) {
+ struct uthread *uth = get_bsdthread_info(current_thread());
+
+ pop = push & ~uth->uu_network_marks;
+ if (pop != 0)
+ uth->uu_network_marks |= pop;
+ }
+
+ return ((net_thread_marks_t)&base[pop]);
+}
+
+__private_extern__ net_thread_marks_t
+net_thread_unmarks_push(u_int32_t unpush)
+{
+ static const char *const base = (const void*)&net_thread_marks_base;
+ u_int32_t unpop = 0;
+
+ if (unpush != 0) {
+ struct uthread *uth = get_bsdthread_info(current_thread());
+
+ unpop = unpush & uth->uu_network_marks;
+ if (unpop != 0)
+ uth->uu_network_marks &= ~unpop;
+ }
+
+ return ((net_thread_marks_t)&base[unpop]);
+}
+
+__private_extern__ void
+net_thread_marks_pop(net_thread_marks_t popx)
+{
+ static const char *const base = (const void*)&net_thread_marks_base;
+ ptrdiff_t pop = (caddr_t)popx - (caddr_t)base;
+
+ if (pop != 0) {
+ static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
+ struct uthread *uth = get_bsdthread_info(current_thread());
+
+ VERIFY((pop & ones) == pop);
+ VERIFY((ptrdiff_t)(uth->uu_network_marks & pop) == pop);
+ uth->uu_network_marks &= ~pop;
+ }
+}
+
+__private_extern__ void
+net_thread_unmarks_pop(net_thread_marks_t unpopx)
+{
+ static const char *const base = (const void*)&net_thread_marks_base;
+ ptrdiff_t unpop = (caddr_t)unpopx - (caddr_t)base;
+
+ if (unpop != 0) {
+ static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
+ struct uthread *uth = get_bsdthread_info(current_thread());
+
+ VERIFY((unpop & ones) == unpop);
+ VERIFY((ptrdiff_t)(uth->uu_network_marks & unpop) == 0);
+ uth->uu_network_marks |= unpop;
+ }
+}
+
+__private_extern__ u_int32_t
+net_thread_is_marked(u_int32_t check)
+{
+ if (check != 0) {
+ struct uthread *uth = get_bsdthread_info(current_thread());
+ return (uth->uu_network_marks & check);
+ }
+ else
+ return (0);
+}
+
+__private_extern__ u_int32_t
+net_thread_is_unmarked(u_int32_t check)
+{
+ if (check != 0) {
+ struct uthread *uth = get_bsdthread_info(current_thread());
+ return (~uth->uu_network_marks & check);
+ }
+ else
+ return (0);
+}
+
+static __inline__ int
+_is_announcement(const struct sockaddr_in * sender_sin,
+ const struct sockaddr_in * target_sin)
+{
+ if (sender_sin == NULL) {
+ return (FALSE);
+ }
+ return (sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr);
+}
+
+__private_extern__ errno_t
+dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl* sender_hw,
+ const struct sockaddr* sender_proto, const struct sockaddr_dl* target_hw,
+ const struct sockaddr* target_proto0, u_int32_t rtflags)
+{
+ errno_t result = 0;
+ const struct sockaddr_in * sender_sin;
+ const struct sockaddr_in * target_sin;
+ struct sockaddr_inarp target_proto_sinarp;
+ struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0;
+
+ if (target_proto == NULL || (sender_proto != NULL &&
+ sender_proto->sa_family != target_proto->sa_family))
+ return (EINVAL);
+
+ /*
+ * If the target is a (default) router, provide that
+ * information to the send_arp callback routine.
+ */
+ if (rtflags & RTF_ROUTER) {
+ bcopy(target_proto, &target_proto_sinarp,
+ sizeof (struct sockaddr_in));
+ target_proto_sinarp.sin_other |= SIN_ROUTER;
+ target_proto = (struct sockaddr *)&target_proto_sinarp;
+ }
+
+ /*
+ * If this is an ARP request and the target IP is IPv4LL,
+ * send the request on all interfaces. The exception is
+ * an announcement, which must only appear on the specific
+ * interface.
+ */
+ sender_sin = (struct sockaddr_in *)(void *)(uintptr_t)sender_proto;
+ target_sin = (struct sockaddr_in *)(void *)(uintptr_t)target_proto;
+ if (target_proto->sa_family == AF_INET &&
+ IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
+ ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
+ !_is_announcement(target_sin, sender_sin)) {
+ ifnet_t *ifp_list;
+ u_int32_t count;
+ u_int32_t ifp_on;
+
+ result = ENOTSUP;
+
+ if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
+ for (ifp_on = 0; ifp_on < count; ifp_on++) {
+ errno_t new_result;
+ ifaddr_t source_hw = NULL;
+ ifaddr_t source_ip = NULL;
+ struct sockaddr_in source_ip_copy;
+ struct ifnet *cur_ifp = ifp_list[ifp_on];
+
+ /*
+ * Only arp on interfaces marked for IPv4LL
+ * ARPing. This may mean that we don't ARP on
+ * the interface the subnet route points to.
+ */
+ if (!(cur_ifp->if_eflags & IFEF_ARPLL))
+ continue;
+
+ /* Find the source IP address */
+ ifnet_lock_shared(cur_ifp);
+ source_hw = cur_ifp->if_lladdr;
+ TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
+ ifa_link) {
+ IFA_LOCK(source_ip);
+ if (source_ip->ifa_addr != NULL &&
+ source_ip->ifa_addr->sa_family ==
+ AF_INET) {
+ /* Copy the source IP address */
+ source_ip_copy =
+ *(struct sockaddr_in *)
+ (void *)source_ip->ifa_addr;
+ IFA_UNLOCK(source_ip);
+ break;
+ }
+ IFA_UNLOCK(source_ip);
+ }
+
+ /* No IP Source, don't arp */
+ if (source_ip == NULL) {
+ ifnet_lock_done(cur_ifp);
+ continue;
+ }
+
+ IFA_ADDREF(source_hw);
+ ifnet_lock_done(cur_ifp);
+
+ /* Send the ARP */
+ new_result = dlil_send_arp_internal(cur_ifp,
+ arpop, (struct sockaddr_dl *)(void *)
+ source_hw->ifa_addr,
+ (struct sockaddr *)&source_ip_copy, NULL,
+ target_proto);
+
+ IFA_REMREF(source_hw);
+ if (result == ENOTSUP) {
+ result = new_result;
+ }
+ }
+ ifnet_list_free(ifp_list);
+ }
+ } else {
+ result = dlil_send_arp_internal(ifp, arpop, sender_hw,
+ sender_proto, target_hw, target_proto);
+ }
+
+ return (result);
+}
+
+/*
+ * Caller must hold ifnet head lock.
+ */
+static int
+ifnet_lookup(struct ifnet *ifp)
+{
+ struct ifnet *_ifp;
+
+ lck_rw_assert(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
+ TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
+ if (_ifp == ifp)
+ break;
+ }
+ return (_ifp != NULL);
+}
+/*
+ * Caller has to pass a non-zero refio argument to get a
+ * IO reference count. This will prevent ifnet_detach from
+ * being called when there are outstanding io reference counts.
+ */
+int
+ifnet_is_attached(struct ifnet *ifp, int refio)
+{
+ int ret;
+
+ lck_mtx_lock_spin(&ifp->if_ref_lock);
+ if ((ret = ((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) ==
+ IFRF_ATTACHED))) {
+ if (refio > 0)
+ ifp->if_refio++;
+ }
+ lck_mtx_unlock(&ifp->if_ref_lock);
+
+ return (ret);
+}
+
+void
+ifnet_decr_iorefcnt(struct ifnet *ifp)
+{
+ lck_mtx_lock_spin(&ifp->if_ref_lock);
+ VERIFY(ifp->if_refio > 0);
+ VERIFY((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) != 0);
+ ifp->if_refio--;
+
+ /* if there are no more outstanding io references, wakeup the
+ * ifnet_detach thread if detaching flag is set.
+ */
+ if (ifp->if_refio == 0 &&
+ (ifp->if_refflags & IFRF_DETACHING) != 0) {
+ wakeup(&(ifp->if_refio));
+ }
+ lck_mtx_unlock(&ifp->if_ref_lock);
+}
+
+static void
+dlil_if_trace(struct dlil_ifnet *dl_if, int refhold)
+{
+ struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if;
+ ctrace_t *tr;
+ u_int32_t idx;
+ u_int16_t *cnt;
+
+ if (!(dl_if->dl_if_flags & DLIF_DEBUG)) {
+ panic("%s: dl_if %p has no debug structure", __func__, dl_if);
+ /* NOTREACHED */
+ }
+
+ if (refhold) {
+ cnt = &dl_if_dbg->dldbg_if_refhold_cnt;
+ tr = dl_if_dbg->dldbg_if_refhold;
+ } else {
+ cnt = &dl_if_dbg->dldbg_if_refrele_cnt;
+ tr = dl_if_dbg->dldbg_if_refrele;
+ }
+
+ idx = atomic_add_16_ov(cnt, 1) % IF_REF_TRACE_HIST_SIZE;
+ ctrace_record(&tr[idx]);
+}
+
+errno_t
+dlil_if_ref(struct ifnet *ifp)
+{
+ struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
+
+ if (dl_if == NULL)
+ return (EINVAL);
+
+ lck_mtx_lock_spin(&dl_if->dl_if_lock);
+ ++dl_if->dl_if_refcnt;
+ if (dl_if->dl_if_refcnt == 0) {
+ panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
+ /* NOTREACHED */
+ }
+ if (dl_if->dl_if_trace != NULL)
+ (*dl_if->dl_if_trace)(dl_if, TRUE);
+ lck_mtx_unlock(&dl_if->dl_if_lock);
+
+ return (0);
+}
+
+errno_t
+dlil_if_free(struct ifnet *ifp)
+{
+ struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
+
+ if (dl_if == NULL)
+ return (EINVAL);
+
+ lck_mtx_lock_spin(&dl_if->dl_if_lock);
+ if (dl_if->dl_if_refcnt == 0) {
+ panic("%s: negative refcnt for ifp=%p", __func__, ifp);
+ /* NOTREACHED */
+ }
+ --dl_if->dl_if_refcnt;
+ if (dl_if->dl_if_trace != NULL)
+ (*dl_if->dl_if_trace)(dl_if, FALSE);
+ lck_mtx_unlock(&dl_if->dl_if_lock);
+
+ return (0);
+}
+
+static errno_t
+dlil_attach_protocol_internal(struct if_proto *proto,
+ const struct ifnet_demux_desc *demux_list, u_int32_t demux_count)
+{
+ struct kev_dl_proto_data ev_pr_data;
+ struct ifnet *ifp = proto->ifp;
+ int retval = 0;
+ u_int32_t hash_value = proto_hash_value(proto->protocol_family);
+ struct if_proto *prev_proto;
+ struct if_proto *_proto;
+
+ /* callee holds a proto refcnt upon success */
+ ifnet_lock_exclusive(ifp);
+ _proto = find_attached_proto(ifp, proto->protocol_family);
+ if (_proto != NULL) {
+ ifnet_lock_done(ifp);
+ if_proto_free(_proto);
+ return (EEXIST);
+ }
+
+ /*
+ * Call family module add_proto routine so it can refine the
+ * demux descriptors as it wishes.
+ */
+ retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
+ demux_count);
+ if (retval) {
+ ifnet_lock_done(ifp);
+ return (retval);
+ }
+
+ /*
+ * Insert the protocol in the hash
+ */
+ prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
+ while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL)
+ prev_proto = SLIST_NEXT(prev_proto, next_hash);
+ if (prev_proto)
+ SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
+ else
+ SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
+ proto, next_hash);
+
+ /* hold a proto refcnt for attach */
+ if_proto_ref(proto);
+
+ /*
+ * The reserved field carries the number of protocol still attached
+ * (subject to change)
+ */
+ ev_pr_data.proto_family = proto->protocol_family;
+ ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp);
+ ifnet_lock_done(ifp);
+
+ dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
+ (struct net_event_data *)&ev_pr_data,
+ sizeof (struct kev_dl_proto_data));
+ return (retval);
+}
+
+errno_t
+ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
+ const struct ifnet_attach_proto_param *proto_details)
+{
+ int retval = 0;
+ struct if_proto *ifproto = NULL;
+
+ ifnet_head_lock_shared();
+ if (ifp == NULL || protocol == 0 || proto_details == NULL) {
+ retval = EINVAL;
+ goto end;
+ }
+ /* Check that the interface is in the global list */
+ if (!ifnet_lookup(ifp)) {
+ retval = ENXIO;
+ goto end;
+ }
+
+ ifproto = zalloc(dlif_proto_zone);
+ if (ifproto == NULL) {
+ retval = ENOMEM;
+ goto end;
+ }
+ bzero(ifproto, dlif_proto_size);
+
+ /* refcnt held above during lookup */
+ ifproto->ifp = ifp;
+ ifproto->protocol_family = protocol;
+ ifproto->proto_kpi = kProtoKPI_v1;
+ ifproto->kpi.v1.input = proto_details->input;
+ ifproto->kpi.v1.pre_output = proto_details->pre_output;
+ ifproto->kpi.v1.event = proto_details->event;
+ ifproto->kpi.v1.ioctl = proto_details->ioctl;
+ ifproto->kpi.v1.detached = proto_details->detached;
+ ifproto->kpi.v1.resolve_multi = proto_details->resolve;
+ ifproto->kpi.v1.send_arp = proto_details->send_arp;
+
+ retval = dlil_attach_protocol_internal(ifproto,
+ proto_details->demux_list, proto_details->demux_count);
+
+ if (dlil_verbose) {
+ printf("%s: attached v1 protocol %d\n", if_name(ifp),
+ protocol);
+ }
+
+end:
+ if (retval != 0 && retval != EEXIST && ifp != NULL) {
+ DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
+ if_name(ifp), protocol, retval);
+ }
+ ifnet_head_done();
+ if (retval != 0 && ifproto != NULL)
+ zfree(dlif_proto_zone, ifproto);
+ return (retval);
+}
+
+errno_t
+ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
+ const struct ifnet_attach_proto_param_v2 *proto_details)
+{
+ int retval = 0;
+ struct if_proto *ifproto = NULL;
+
+ ifnet_head_lock_shared();
+ if (ifp == NULL || protocol == 0 || proto_details == NULL) {
+ retval = EINVAL;
+ goto end;
+ }
+ /* Check that the interface is in the global list */
+ if (!ifnet_lookup(ifp)) {
+ retval = ENXIO;
+ goto end;
+ }
+
+ ifproto = zalloc(dlif_proto_zone);
+ if (ifproto == NULL) {
+ retval = ENOMEM;
+ goto end;
+ }
+ bzero(ifproto, sizeof(*ifproto));
+
+ /* refcnt held above during lookup */
+ ifproto->ifp = ifp;
+ ifproto->protocol_family = protocol;
+ ifproto->proto_kpi = kProtoKPI_v2;
+ ifproto->kpi.v2.input = proto_details->input;
+ ifproto->kpi.v2.pre_output = proto_details->pre_output;
+ ifproto->kpi.v2.event = proto_details->event;
+ ifproto->kpi.v2.ioctl = proto_details->ioctl;
+ ifproto->kpi.v2.detached = proto_details->detached;
+ ifproto->kpi.v2.resolve_multi = proto_details->resolve;
+ ifproto->kpi.v2.send_arp = proto_details->send_arp;
+
+ retval = dlil_attach_protocol_internal(ifproto,
+ proto_details->demux_list, proto_details->demux_count);
+
+ if (dlil_verbose) {
+ printf("%s: attached v2 protocol %d\n", if_name(ifp),
+ protocol);
+ }
+
+end:
+ if (retval != 0 && retval != EEXIST && ifp != NULL) {
+ DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
+ if_name(ifp), protocol, retval);
+ }
+ ifnet_head_done();
+ if (retval != 0 && ifproto != NULL)
+ zfree(dlif_proto_zone, ifproto);
+ return (retval);
+}
+
+errno_t
+ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
+{
+ struct if_proto *proto = NULL;
+ int retval = 0;
+
+ if (ifp == NULL || proto_family == 0) {
+ retval = EINVAL;
+ goto end;
+ }
+
+ ifnet_lock_exclusive(ifp);
+ /* callee holds a proto refcnt upon success */
+ proto = find_attached_proto(ifp, proto_family);
+ if (proto == NULL) {
+ retval = ENXIO;
+ ifnet_lock_done(ifp);
+ goto end;
+ }
+
+ /* call family module del_proto */
+ if (ifp->if_del_proto)
+ ifp->if_del_proto(ifp, proto->protocol_family);
+
+ SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
+ proto, if_proto, next_hash);
+
+ if (proto->proto_kpi == kProtoKPI_v1) {
+ proto->kpi.v1.input = ifproto_media_input_v1;
+ proto->kpi.v1.pre_output= ifproto_media_preout;
+ proto->kpi.v1.event = ifproto_media_event;
+ proto->kpi.v1.ioctl = ifproto_media_ioctl;
+ proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
+ proto->kpi.v1.send_arp = ifproto_media_send_arp;
+ } else {
+ proto->kpi.v2.input = ifproto_media_input_v2;
+ proto->kpi.v2.pre_output = ifproto_media_preout;
+ proto->kpi.v2.event = ifproto_media_event;
+ proto->kpi.v2.ioctl = ifproto_media_ioctl;
+ proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
+ proto->kpi.v2.send_arp = ifproto_media_send_arp;
+ }
+ proto->detached = 1;
+ ifnet_lock_done(ifp);
+
+ if (dlil_verbose) {
+ printf("%s: detached %s protocol %d\n", if_name(ifp),
+ (proto->proto_kpi == kProtoKPI_v1) ?
+ "v1" : "v2", proto_family);
+ }
+
+ /* release proto refcnt held during protocol attach */
+ if_proto_free(proto);
+
+ /*
+ * Release proto refcnt held during lookup; the rest of
+ * protocol detach steps will happen when the last proto
+ * reference is released.
+ */
+ if_proto_free(proto);
+
+end:
+ return (retval);
+}
+
+
+static errno_t
+ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
+ struct mbuf *packet, char *header)
+{
+#pragma unused(ifp, protocol, packet, header)
+ return (ENXIO);
+}
+
+static errno_t
+ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
+ struct mbuf *packet)
+{
+#pragma unused(ifp, protocol, packet)
+ return (ENXIO);
+
+}
+
+static errno_t
+ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
+ mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type,
+ char *link_layer_dest)
+{
+#pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
+ return (ENXIO);
+
+}
+
+static void
+ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
+ const struct kev_msg *event)
+{
+#pragma unused(ifp, protocol, event)
+}
+
+static errno_t
+ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
+ unsigned long command, void *argument)
+{
+#pragma unused(ifp, protocol, command, argument)
+ return (ENXIO);
+}
+
+static errno_t
+ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
+ struct sockaddr_dl *out_ll, size_t ll_len)
+{
+#pragma unused(ifp, proto_addr, out_ll, ll_len)
+ return (ENXIO);
+}
+
+static errno_t
+ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
+ const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
+ const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
+{
+#pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
+ return (ENXIO);
+}
+
+extern int if_next_index(void);
+
+errno_t
+ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
+{
+ struct ifnet *tmp_if;
+ struct ifaddr *ifa;
+ struct if_data_internal if_data_saved;
+ struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
+ struct dlil_threading_info *dl_inp;
+ u_int32_t sflags = 0;
+ int err;
+
+ if (ifp == NULL)
+ return (EINVAL);
+
+ /*
+ * Serialize ifnet attach using dlil_ifnet_lock, in order to
+ * prevent the interface from being configured while it is
+ * embryonic, as ifnet_head_lock is dropped and reacquired
+ * below prior to marking the ifnet with IFRF_ATTACHED.
+ */
+ dlil_if_lock();
+ ifnet_head_lock_exclusive();
+ /* Verify we aren't already on the list */
+ TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
+ if (tmp_if == ifp) {
+ ifnet_head_done();
+ dlil_if_unlock();
+ return (EEXIST);
+ }
+ }
+
+ lck_mtx_lock_spin(&ifp->if_ref_lock);
+ if (ifp->if_refflags & IFRF_ATTACHED) {
+ panic_plain("%s: flags mismatch (attached set) ifp=%p",
+ __func__, ifp);
+ /* NOTREACHED */
+ }
+ lck_mtx_unlock(&ifp->if_ref_lock);
+
+ ifnet_lock_exclusive(ifp);
+
+ /* Sanity check */
+ VERIFY(ifp->if_detaching_link.tqe_next == NULL);
+ VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
+
+ if (ll_addr != NULL) {
+ if (ifp->if_addrlen == 0) {
+ ifp->if_addrlen = ll_addr->sdl_alen;
+ } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
+ ifnet_lock_done(ifp);
+ ifnet_head_done();
+ dlil_if_unlock();
+ return (EINVAL);
+ }
+ }
+
+ /*
+ * Allow interfaces without protocol families to attach
+ * only if they have the necessary fields filled out.
+ */
+ if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
+ DLIL_PRINTF("%s: Attempt to attach interface without "
+ "family module - %d\n", __func__, ifp->if_family);
+ ifnet_lock_done(ifp);
+ ifnet_head_done();
+ dlil_if_unlock();
+ return (ENODEV);
+ }
+
+ /* Allocate protocol hash table */
+ VERIFY(ifp->if_proto_hash == NULL);
+ ifp->if_proto_hash = zalloc(dlif_phash_zone);
+ if (ifp->if_proto_hash == NULL) {
+ ifnet_lock_done(ifp);
+ ifnet_head_done();
+ dlil_if_unlock();
+ return (ENOBUFS);
+ }
+ bzero(ifp->if_proto_hash, dlif_phash_size);
+
+ lck_mtx_lock_spin(&ifp->if_flt_lock);
+ VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
+ TAILQ_INIT(&ifp->if_flt_head);
+ VERIFY(ifp->if_flt_busy == 0);
+ VERIFY(ifp->if_flt_waiters == 0);
+ lck_mtx_unlock(&ifp->if_flt_lock);
+
+ VERIFY(TAILQ_EMPTY(&ifp->if_prefixhead));
+ TAILQ_INIT(&ifp->if_prefixhead);
+
+ if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
+ VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
+ LIST_INIT(&ifp->if_multiaddrs);
+ }
+
+ VERIFY(ifp->if_allhostsinm == NULL);
+ VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
+ TAILQ_INIT(&ifp->if_addrhead);
+
+ if (ifp->if_index == 0) {
+ int idx = if_next_index();
+
+ if (idx == -1) {
+ ifp->if_index = 0;
+ ifnet_lock_done(ifp);
+ ifnet_head_done();
+ dlil_if_unlock();
+ return (ENOBUFS);
+ }
+ ifp->if_index = idx;
+ }
+ /* There should not be anything occupying this slot */
+ VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
+
+ /* allocate (if needed) and initialize a link address */
+ VERIFY(!(dl_if->dl_if_flags & DLIF_REUSE) || ifp->if_lladdr != NULL);
+ ifa = dlil_alloc_lladdr(ifp, ll_addr);
+ if (ifa == NULL) {
+ ifnet_lock_done(ifp);
+ ifnet_head_done();
+ dlil_if_unlock();
+ return (ENOBUFS);
+ }
+
+ VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
+ ifnet_addrs[ifp->if_index - 1] = ifa;
+
+ /* make this address the first on the list */
+ IFA_LOCK(ifa);
+ /* hold a reference for ifnet_addrs[] */
+ IFA_ADDREF_LOCKED(ifa);
+ /* if_attach_link_ifa() holds a reference for ifa_link */
+ if_attach_link_ifa(ifp, ifa);
+ IFA_UNLOCK(ifa);
+
+#if CONFIG_MACF_NET
+ mac_ifnet_label_associate(ifp);
+#endif
+
+ TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
+ ifindex2ifnet[ifp->if_index] = ifp;
+
+ /* Hold a reference to the underlying dlil_ifnet */
+ ifnet_reference(ifp);
+
+ /* Clear stats (save and restore other fields that we care) */
+ if_data_saved = ifp->if_data;
+ bzero(&ifp->if_data, sizeof (ifp->if_data));
+ ifp->if_data.ifi_type = if_data_saved.ifi_type;
+ ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
+ ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
+ ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
+ ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
+ ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
+ ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
+ ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
+ ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
+ ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
+ ifnet_touch_lastchange(ifp);
+
+ VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
+ ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED);
+
+ /* By default, use SFB and enable flow advisory */
+ sflags = PKTSCHEDF_QALG_SFB;
+ if (if_flowadv)
+ sflags |= PKTSCHEDF_QALG_FLOWCTL;
+
+ if (if_delaybased_queue)
+ sflags |= PKTSCHEDF_QALG_DELAYBASED;
+
+ /* Initialize transmit queue(s) */
+ err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE));
+ if (err != 0) {
+ panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
+ "err=%d", __func__, ifp, err);
+ /* NOTREACHED */
+ }
+
+ /* Sanity checks on the input thread storage */
+ dl_inp = &dl_if->dl_if_inpstorage;
+ bzero(&dl_inp->stats, sizeof (dl_inp->stats));
+ VERIFY(dl_inp->input_waiting == 0);
+ VERIFY(dl_inp->wtot == 0);
+ VERIFY(dl_inp->ifp == NULL);
+ VERIFY(qhead(&dl_inp->rcvq_pkts) == NULL && qempty(&dl_inp->rcvq_pkts));
+ VERIFY(qlimit(&dl_inp->rcvq_pkts) == 0);
+ VERIFY(!dl_inp->net_affinity);
+ VERIFY(ifp->if_inp == NULL);
+ VERIFY(dl_inp->input_thr == THREAD_NULL);
+ VERIFY(dl_inp->wloop_thr == THREAD_NULL);
+ VERIFY(dl_inp->poll_thr == THREAD_NULL);
+ VERIFY(dl_inp->tag == 0);
+ VERIFY(dl_inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
+ bzero(&dl_inp->tstats, sizeof (dl_inp->tstats));
+ bzero(&dl_inp->pstats, sizeof (dl_inp->pstats));
+ bzero(&dl_inp->sstats, sizeof (dl_inp->sstats));
+#if IFNET_INPUT_SANITY_CHK
+ VERIFY(dl_inp->input_mbuf_cnt == 0);
+#endif /* IFNET_INPUT_SANITY_CHK */
+
+ /*
+ * A specific DLIL input thread is created per Ethernet/cellular
+ * interface or for an interface which supports opportunistic
+ * input polling. Pseudo interfaces or other types of interfaces
+ * use the main input thread instead.
+ */
+ if ((net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) ||
+ ifp->if_type == IFT_ETHER || ifp->if_type == IFT_CELLULAR) {
+ ifp->if_inp = dl_inp;
+ err = dlil_create_input_thread(ifp, ifp->if_inp);
+ if (err != 0) {
+ panic_plain("%s: ifp=%p couldn't get an input thread; "
+ "err=%d", __func__, ifp, err);
+ /* NOTREACHED */
+ }
+ }
+
+ /*
+ * If the driver supports the new transmit model, calculate flow hash
+ * and create a workloop starter thread to invoke the if_start callback
+ * where the packets may be dequeued and transmitted.
+ */
+ if (ifp->if_eflags & IFEF_TXSTART) {
+ ifp->if_flowhash = ifnet_calc_flowhash(ifp);
+ VERIFY(ifp->if_flowhash != 0);
+
+ VERIFY(ifp->if_start != NULL);
+ VERIFY(ifp->if_start_thread == THREAD_NULL);
+
+ ifnet_set_start_cycle(ifp, NULL);
+ ifp->if_start_active = 0;
+ ifp->if_start_req = 0;
+ ifp->if_start_flags = 0;
+ if ((err = kernel_thread_start(ifnet_start_thread_fn, ifp,
+ &ifp->if_start_thread)) != KERN_SUCCESS) {
+ panic_plain("%s: ifp=%p couldn't get a start thread; "
+ "err=%d", __func__, ifp, err);
+ /* NOTREACHED */
+ }
+ ml_thread_policy(ifp->if_start_thread, MACHINE_GROUP,
+ (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
+ } else {
+ ifp->if_flowhash = 0;
+ }
+
+ /*
+ * If the driver supports the new receive model, create a poller
+ * thread to invoke if_input_poll callback where the packets may
+ * be dequeued from the driver and processed for reception.
+ */
+ if (ifp->if_eflags & IFEF_RXPOLL) {
+ VERIFY(ifp->if_input_poll != NULL);
+ VERIFY(ifp->if_input_ctl != NULL);
+ VERIFY(ifp->if_poll_thread == THREAD_NULL);
+
+ ifnet_set_poll_cycle(ifp, NULL);
+ ifp->if_poll_update = 0;
+ ifp->if_poll_active = 0;
+ ifp->if_poll_req = 0;
+ if ((err = kernel_thread_start(ifnet_poll_thread_fn, ifp,
+ &ifp->if_poll_thread)) != KERN_SUCCESS) {
+ panic_plain("%s: ifp=%p couldn't get a poll thread; "
+ "err=%d", __func__, ifp, err);
+ /* NOTREACHED */
+ }
+ ml_thread_policy(ifp->if_poll_thread, MACHINE_GROUP,
+ (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
+ }
+
+ VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
+ VERIFY(ifp->if_desc.ifd_len == 0);
+ VERIFY(ifp->if_desc.ifd_desc != NULL);
+
+ /* Record attach PC stacktrace */
+ ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
+
+ ifp->if_updatemcasts = 0;
+ if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
+ struct ifmultiaddr *ifma;
+ LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+ IFMA_LOCK(ifma);
+ if (ifma->ifma_addr->sa_family == AF_LINK ||
+ ifma->ifma_addr->sa_family == AF_UNSPEC)
+ ifp->if_updatemcasts++;
+ IFMA_UNLOCK(ifma);
+ }
+
+ printf("%s: attached with %d suspended link-layer multicast "
+ "membership(s)\n", if_name(ifp),
+ ifp->if_updatemcasts);
+ }
+
+ /* Clear logging parameters */
+ bzero(&ifp->if_log, sizeof (ifp->if_log));
+ ifp->if_fg_sendts = 0;
+
+ VERIFY(ifp->if_delegated.ifp == NULL);
+ VERIFY(ifp->if_delegated.type == 0);
+ VERIFY(ifp->if_delegated.family == 0);
+ VERIFY(ifp->if_delegated.subfamily == 0);
+ VERIFY(ifp->if_delegated.expensive == 0);
+
+ ifnet_lock_done(ifp);
+ ifnet_head_done();
+
+ lck_mtx_lock(&ifp->if_cached_route_lock);
+ /* Enable forwarding cached route */
+ ifp->if_fwd_cacheok = 1;
+ /* Clean up any existing cached routes */
+ ROUTE_RELEASE(&ifp->if_fwd_route);
+ bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
+ ROUTE_RELEASE(&ifp->if_src_route);
+ bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
+ ROUTE_RELEASE(&ifp->if_src_route6);
+ bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
+ lck_mtx_unlock(&ifp->if_cached_route_lock);
+
+ ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
+
+ /*
+ * Allocate and attach IGMPv3/MLDv2 interface specific variables
+ * and trees; do this before the ifnet is marked as attached.
+ * The ifnet keeps the reference to the info structures even after
+ * the ifnet is detached, since the network-layer records still
+ * refer to the info structures even after that. This also
+ * makes it possible for them to still function after the ifnet
+ * is recycled or reattached.
+ */
+#if INET
+ if (IGMP_IFINFO(ifp) == NULL) {
+ IGMP_IFINFO(ifp) = igmp_domifattach(ifp, M_WAITOK);
+ VERIFY(IGMP_IFINFO(ifp) != NULL);
+ } else {
+ VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
+ igmp_domifreattach(IGMP_IFINFO(ifp));
+ }
+#endif /* INET */
+#if INET6
+ if (MLD_IFINFO(ifp) == NULL) {
+ MLD_IFINFO(ifp) = mld_domifattach(ifp, M_WAITOK);
+ VERIFY(MLD_IFINFO(ifp) != NULL);
+ } else {
+ VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
+ mld_domifreattach(MLD_IFINFO(ifp));
+ }
+#endif /* INET6 */
+
+ VERIFY(ifp->if_data_threshold == 0);
+
+ /*
+ * Finally, mark this ifnet as attached.
+ */
+ lck_mtx_lock(rnh_lock);
+ ifnet_lock_exclusive(ifp);
+ /* Initialize Link Quality Metric (loopback [lo0] is always good) */
+ ifp->if_lqm = (ifp == lo_ifp) ? IFNET_LQM_THRESH_GOOD :
+ IFNET_LQM_THRESH_UNKNOWN;
+ lck_mtx_lock_spin(&ifp->if_ref_lock);
+ ifp->if_refflags = IFRF_ATTACHED;
+ lck_mtx_unlock(&ifp->if_ref_lock);
+ if (net_rtref) {
+ /* boot-args override; enable idle notification */
+ (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
+ IFRF_IDLE_NOTIFY);
+ } else {
+ /* apply previous request(s) to set the idle flags, if any */
+ (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
+ ifp->if_idle_new_flags_mask);
+
+ }
+ ifnet_lock_done(ifp);
+ lck_mtx_unlock(rnh_lock);
+ dlil_if_unlock();
+
+#if PF
+ /*
+ * Attach packet filter to this interface, if enabled.
+ */
+ pf_ifnet_hook(ifp, 1);
+#endif /* PF */
+
+ dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0);
+
+ if (dlil_verbose) {
+ printf("%s: attached%s\n", if_name(ifp),
+ (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
+ }
+
+ return (0);
+}
+
+/*
+ * Prepare the storage for the first/permanent link address, which must
+ * must have the same lifetime as the ifnet itself. Although the link
+ * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
+ * its location in memory must never change as it may still be referred
+ * to by some parts of the system afterwards (unfortunate implementation
+ * artifacts inherited from BSD.)
+ *
+ * Caller must hold ifnet lock as writer.
+ */
+static struct ifaddr *
+dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
+{
+ struct ifaddr *ifa, *oifa;
+ struct sockaddr_dl *asdl, *msdl;
+ char workbuf[IFNAMSIZ*2];
+ int namelen, masklen, socksize;
+ struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
+
+ ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
+ VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
+
+ namelen = snprintf(workbuf, sizeof (workbuf), "%s",
+ if_name(ifp));
+ masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
+ socksize = masklen + ifp->if_addrlen;
+#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
+ if ((u_int32_t)socksize < sizeof (struct sockaddr_dl))
+ socksize = sizeof(struct sockaddr_dl);
+ socksize = ROUNDUP(socksize);
+#undef ROUNDUP
+
+ ifa = ifp->if_lladdr;
+ if (socksize > DLIL_SDLMAXLEN ||
+ (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
+ /*
+ * Rare, but in the event that the link address requires
+ * more storage space than DLIL_SDLMAXLEN, allocate the
+ * largest possible storages for address and mask, such
+ * that we can reuse the same space when if_addrlen grows.
+ * This same space will be used when if_addrlen shrinks.
+ */
+ if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
+ int ifasize = sizeof (*ifa) + 2 * SOCK_MAXADDRLEN;
+ ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
+ if (ifa == NULL)
+ return (NULL);
+ ifa_lock_init(ifa);
+ /* Don't set IFD_ALLOC, as this is permanent */
+ ifa->ifa_debug = IFD_LINK;
+ }
+ IFA_LOCK(ifa);
+ /* address and mask sockaddr_dl locations */
+ asdl = (struct sockaddr_dl *)(ifa + 1);
+ bzero(asdl, SOCK_MAXADDRLEN);
+ msdl = (struct sockaddr_dl *)(void *)
+ ((char *)asdl + SOCK_MAXADDRLEN);
+ bzero(msdl, SOCK_MAXADDRLEN);
+ } else {
+ VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
+ /*
+ * Use the storage areas for address and mask within the
+ * dlil_ifnet structure. This is the most common case.
+ */
+ if (ifa == NULL) {
+ ifa = &dl_if->dl_if_lladdr.ifa;
+ ifa_lock_init(ifa);
+ /* Don't set IFD_ALLOC, as this is permanent */
+ ifa->ifa_debug = IFD_LINK;
+ }
+ IFA_LOCK(ifa);
+ /* address and mask sockaddr_dl locations */
+ asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl;
+ bzero(asdl, sizeof (dl_if->dl_if_lladdr.asdl));
+ msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl;
+ bzero(msdl, sizeof (dl_if->dl_if_lladdr.msdl));
+ }
+
+ /* hold a permanent reference for the ifnet itself */
+ IFA_ADDREF_LOCKED(ifa);
+ oifa = ifp->if_lladdr;
+ ifp->if_lladdr = ifa;
+
+ VERIFY(ifa->ifa_debug == IFD_LINK);
+ ifa->ifa_ifp = ifp;
+ ifa->ifa_rtrequest = link_rtrequest;
+ ifa->ifa_addr = (struct sockaddr *)asdl;
+ asdl->sdl_len = socksize;
+ asdl->sdl_family = AF_LINK;
+ bcopy(workbuf, asdl->sdl_data, namelen);
+ asdl->sdl_nlen = namelen;
+ asdl->sdl_index = ifp->if_index;
+ asdl->sdl_type = ifp->if_type;
+ if (ll_addr != NULL) {
+ asdl->sdl_alen = ll_addr->sdl_alen;
+ bcopy(CONST_LLADDR(ll_addr), LLADDR(asdl), asdl->sdl_alen);
+ } else {
+ asdl->sdl_alen = 0;
+ }
+ ifa->ifa_netmask = (struct sockaddr*)msdl;
+ msdl->sdl_len = masklen;
+ while (namelen != 0)
+ msdl->sdl_data[--namelen] = 0xff;
+ IFA_UNLOCK(ifa);
+
+ if (oifa != NULL)
+ IFA_REMREF(oifa);
+
+ return (ifa);
+}
+
+static void
+if_purgeaddrs(struct ifnet *ifp)
+{
+#if INET
+ in_purgeaddrs(ifp);
+#endif /* INET */
+#if INET6
+ in6_purgeaddrs(ifp);
+#endif /* INET6 */
+}
+
+errno_t
+ifnet_detach(ifnet_t ifp)
+{
+ struct ifnet *delegated_ifp;
+
+ if (ifp == NULL)
+ return (EINVAL);
+
+ lck_mtx_lock(rnh_lock);
+ ifnet_head_lock_exclusive();
+ ifnet_lock_exclusive(ifp);
+
+ /*
+ * Check to see if this interface has previously triggered
+ * aggressive protocol draining; if so, decrement the global
+ * refcnt and clear PR_AGGDRAIN on the route domain if
+ * there are no more of such an interface around.
+ */
+ (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
+
+ lck_mtx_lock_spin(&ifp->if_ref_lock);
+ if (!(ifp->if_refflags & IFRF_ATTACHED)) {
+ lck_mtx_unlock(&ifp->if_ref_lock);
+ ifnet_lock_done(ifp);
+ ifnet_head_done();
+ lck_mtx_unlock(rnh_lock);
+ return (EINVAL);
+ } else if (ifp->if_refflags & IFRF_DETACHING) {
+ /* Interface has already been detached */
+ lck_mtx_unlock(&ifp->if_ref_lock);
+ ifnet_lock_done(ifp);
+ ifnet_head_done();
+ lck_mtx_unlock(rnh_lock);
+ return (ENXIO);
+ }
+ /* Indicate this interface is being detached */
+ ifp->if_refflags &= ~IFRF_ATTACHED;
+ ifp->if_refflags |= IFRF_DETACHING;
+ lck_mtx_unlock(&ifp->if_ref_lock);
+
+ if (dlil_verbose)
+ printf("%s: detaching\n", if_name(ifp));
+
+ /*
+ * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
+ * no longer be visible during lookups from this point.
+ */
+ VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
+ TAILQ_REMOVE(&ifnet_head, ifp, if_link);
+ ifp->if_link.tqe_next = NULL;
+ ifp->if_link.tqe_prev = NULL;
+ ifindex2ifnet[ifp->if_index] = NULL;
+
+ /* Record detach PC stacktrace */
+ ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
+
+ /* Clear logging parameters */
+ bzero(&ifp->if_log, sizeof (ifp->if_log));
+
+ /* Clear delegated interface info (reference released below) */
+ delegated_ifp = ifp->if_delegated.ifp;
+ bzero(&ifp->if_delegated, sizeof (ifp->if_delegated));
+
+ ifnet_lock_done(ifp);
+ ifnet_head_done();
+ lck_mtx_unlock(rnh_lock);
+
+ /* Release reference held on the delegated interface */
+ if (delegated_ifp != NULL)
+ ifnet_release(delegated_ifp);
+
+ /* Reset Link Quality Metric (unless loopback [lo0]) */
+ if (ifp != lo_ifp)
+ if_lqm_update(ifp, IFNET_LQM_THRESH_OFF);
+
+ /* Reset TCP local statistics */
+ if (ifp->if_tcp_stat != NULL)
+ bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
+
+ /* Reset UDP local statistics */
+ if (ifp->if_udp_stat != NULL)
+ bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
+
+ /* Let BPF know we're detaching */
+ bpfdetach(ifp);
+
+ /* Mark the interface as DOWN */
+ if_down(ifp);
+
+ /* Disable forwarding cached route */
+ lck_mtx_lock(&ifp->if_cached_route_lock);
+ ifp->if_fwd_cacheok = 0;
+ lck_mtx_unlock(&ifp->if_cached_route_lock);
+
+ ifp->if_data_threshold = 0;
+ /*
+ * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
+ * references to the info structures and leave them attached to
+ * this ifnet.
+ */
+#if INET
+ igmp_domifdetach(ifp);
+#endif /* INET */
+#if INET6
+ mld_domifdetach(ifp);
+#endif /* INET6 */
+
+ dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0);
+
+ /* Let worker thread take care of the rest, to avoid reentrancy */
+ dlil_if_lock();
+ ifnet_detaching_enqueue(ifp);
+ dlil_if_unlock();
+
+ return (0);
+}
+
+static void
+ifnet_detaching_enqueue(struct ifnet *ifp)
+{
+ dlil_if_lock_assert();
+
+ ++ifnet_detaching_cnt;
+ VERIFY(ifnet_detaching_cnt != 0);
+ TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
+ wakeup((caddr_t)&ifnet_delayed_run);
+}
+
+static struct ifnet *
+ifnet_detaching_dequeue(void)
+{
+ struct ifnet *ifp;
+
+ dlil_if_lock_assert();
+
+ ifp = TAILQ_FIRST(&ifnet_detaching_head);
+ VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
+ if (ifp != NULL) {
+ VERIFY(ifnet_detaching_cnt != 0);
+ --ifnet_detaching_cnt;
+ TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
+ ifp->if_detaching_link.tqe_next = NULL;
+ ifp->if_detaching_link.tqe_prev = NULL;
+ }
+ return (ifp);
+}
+
+static int
+ifnet_detacher_thread_cont(int err)
+{
+#pragma unused(err)
+ struct ifnet *ifp;
+
+ for (;;) {
+ dlil_if_lock_assert();
+ while (ifnet_detaching_cnt == 0) {
+ (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
+ (PZERO - 1), "ifnet_detacher_cont", 0,
+ ifnet_detacher_thread_cont);
+ /* NOTREACHED */
+ }
+
+ VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
+
+ /* Take care of detaching ifnet */
+ ifp = ifnet_detaching_dequeue();
+ if (ifp != NULL) {
+ dlil_if_unlock();
+ ifnet_detach_final(ifp);
+ dlil_if_lock();
+ }
+ }
+ /* NOTREACHED */
+ return (0);
+}
+
+static void
+ifnet_detacher_thread_func(void *v, wait_result_t w)
+{
+#pragma unused(v, w)
+ dlil_if_lock();
+ (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
+ (PZERO - 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont);
+ /*
+ * msleep0() shouldn't have returned as PCATCH was not set;
+ * therefore assert in this case.
+ */
+ dlil_if_unlock();
+ VERIFY(0);
+}
+
+static void
+ifnet_detach_final(struct ifnet *ifp)
+{
+ struct ifnet_filter *filter, *filter_next;
+ struct ifnet_filter_head fhead;
+ struct dlil_threading_info *inp;
+ struct ifaddr *ifa;
+ ifnet_detached_func if_free;
+ int i;
+
+ lck_mtx_lock(&ifp->if_ref_lock);
+ if (!(ifp->if_refflags & IFRF_DETACHING)) {
+ panic("%s: flags mismatch (detaching not set) ifp=%p",
+ __func__, ifp);
+ /* NOTREACHED */
+ }
+
+ /*
+ * Wait until the existing IO references get released
+ * before we proceed with ifnet_detach. This is not a
+ * common case, so block without using a continuation.
+ */
+ while (ifp->if_refio > 0) {
+ printf("%s: Waiting for IO references on %s interface "
+ "to be released\n", __func__, if_name(ifp));
+ (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
+ (PZERO - 1), "ifnet_ioref_wait", NULL);
+ }
+ lck_mtx_unlock(&ifp->if_ref_lock);
+
+ /* Drain and destroy send queue */
+ ifclassq_teardown(ifp);
+
+ /* Detach interface filters */
+ lck_mtx_lock(&ifp->if_flt_lock);
+ if_flt_monitor_enter(ifp);
+
+ lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
+ fhead = ifp->if_flt_head;
+ TAILQ_INIT(&ifp->if_flt_head);
+
+ for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
+ filter_next = TAILQ_NEXT(filter, filt_next);
+ lck_mtx_unlock(&ifp->if_flt_lock);
+
+ dlil_detach_filter_internal(filter, 1);
+ lck_mtx_lock(&ifp->if_flt_lock);
+ }
+ if_flt_monitor_leave(ifp);
+ lck_mtx_unlock(&ifp->if_flt_lock);
+
+ /* Tell upper layers to drop their network addresses */
+ if_purgeaddrs(ifp);
+
+ ifnet_lock_exclusive(ifp);
+
+ /* Uplumb all protocols */
+ for (i = 0; i < PROTO_HASH_SLOTS; i++) {
+ struct if_proto *proto;
+
+ proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
+ while (proto != NULL) {
+ protocol_family_t family = proto->protocol_family;
+ ifnet_lock_done(ifp);
+ proto_unplumb(family, ifp);
+ ifnet_lock_exclusive(ifp);
+ proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
+ }
+ /* There should not be any protocols left */
+ VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
+ }
+ zfree(dlif_phash_zone, ifp->if_proto_hash);
+ ifp->if_proto_hash = NULL;
+
+ /* Detach (permanent) link address from if_addrhead */
+ ifa = TAILQ_FIRST(&ifp->if_addrhead);
+ VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
+ IFA_LOCK(ifa);
+ if_detach_link_ifa(ifp, ifa);
+ IFA_UNLOCK(ifa);
+
+ /* Remove (permanent) link address from ifnet_addrs[] */
+ IFA_REMREF(ifa);
+ ifnet_addrs[ifp->if_index - 1] = NULL;
+
+ /* This interface should not be on {ifnet_head,detaching} */
+ VERIFY(ifp->if_link.tqe_next == NULL);
+ VERIFY(ifp->if_link.tqe_prev == NULL);
+ VERIFY(ifp->if_detaching_link.tqe_next == NULL);
+ VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
+
+ /* Prefix list should be empty by now */
+ VERIFY(TAILQ_EMPTY(&ifp->if_prefixhead));
+
+ /* The slot should have been emptied */
+ VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
+
+ /* There should not be any addresses left */
+ VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
+
+ /*
+ * Signal the starter thread to terminate itself.
+ */
+ if (ifp->if_start_thread != THREAD_NULL) {
+ lck_mtx_lock_spin(&ifp->if_start_lock);
+ ifp->if_start_flags = 0;
+ ifp->if_start_thread = THREAD_NULL;
+ wakeup_one((caddr_t)&ifp->if_start_thread);
+ lck_mtx_unlock(&ifp->if_start_lock);
+ }
+
+ /*
+ * Signal the poller thread to terminate itself.
+ */
+ if (ifp->if_poll_thread != THREAD_NULL) {
+ lck_mtx_lock_spin(&ifp->if_poll_lock);
+ ifp->if_poll_thread = THREAD_NULL;
+ wakeup_one((caddr_t)&ifp->if_poll_thread);
+ lck_mtx_unlock(&ifp->if_poll_lock);
+ }
+
+ /*
+ * If thread affinity was set for the workloop thread, we will need
+ * to tear down the affinity and release the extra reference count
+ * taken at attach time. Does not apply to lo0 or other interfaces
+ * without dedicated input threads.
+ */
+ if ((inp = ifp->if_inp) != NULL) {
+ VERIFY(inp != dlil_main_input_thread);
+
+ if (inp->net_affinity) {
+ struct thread *tp, *wtp, *ptp;
+
+ lck_mtx_lock_spin(&inp->input_lck);
+ wtp = inp->wloop_thr;
+ inp->wloop_thr = THREAD_NULL;
+ ptp = inp->poll_thr;
+ inp->poll_thr = THREAD_NULL;
+ tp = inp->input_thr; /* don't nullify now */
+ inp->tag = 0;
+ inp->net_affinity = FALSE;
+ lck_mtx_unlock(&inp->input_lck);
+
+ /* Tear down poll thread affinity */
+ if (ptp != NULL) {
+ VERIFY(ifp->if_eflags & IFEF_RXPOLL);
+ (void) dlil_affinity_set(ptp,
+ THREAD_AFFINITY_TAG_NULL);
+ thread_deallocate(ptp);
+ }
+
+ /* Tear down workloop thread affinity */
+ if (wtp != NULL) {
+ (void) dlil_affinity_set(wtp,
+ THREAD_AFFINITY_TAG_NULL);
+ thread_deallocate(wtp);
+ }
+
+ /* Tear down DLIL input thread affinity */
+ (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
+ thread_deallocate(tp);
+ }
+
+ /* disassociate ifp DLIL input thread */
+ ifp->if_inp = NULL;
+
+ lck_mtx_lock_spin(&inp->input_lck);
+ inp->input_waiting |= DLIL_INPUT_TERMINATE;
+ if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
+ wakeup_one((caddr_t)&inp->input_waiting);
+ }
+ lck_mtx_unlock(&inp->input_lck);
+ }
+
+ /* The driver might unload, so point these to ourselves */
+ if_free = ifp->if_free;
+ ifp->if_output = ifp_if_output;
+ ifp->if_pre_enqueue = ifp_if_output;
+ ifp->if_start = ifp_if_start;
+ ifp->if_output_ctl = ifp_if_ctl;
+ ifp->if_input_poll = ifp_if_input_poll;
+ ifp->if_input_ctl = ifp_if_ctl;
+ ifp->if_ioctl = ifp_if_ioctl;
+ ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
+ ifp->if_free = ifp_if_free;
+ ifp->if_demux = ifp_if_demux;
+ ifp->if_event = ifp_if_event;
+ ifp->if_framer_legacy = ifp_if_framer;
+ ifp->if_framer = ifp_if_framer_extended;
+ ifp->if_add_proto = ifp_if_add_proto;
+ ifp->if_del_proto = ifp_if_del_proto;
+ ifp->if_check_multi = ifp_if_check_multi;
+
+ /* wipe out interface description */
+ VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
+ ifp->if_desc.ifd_len = 0;
+ VERIFY(ifp->if_desc.ifd_desc != NULL);
+ bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
+
+ /* there shouldn't be any delegation by now */
+ VERIFY(ifp->if_delegated.ifp == NULL);
+ VERIFY(ifp->if_delegated.type == 0);
+ VERIFY(ifp->if_delegated.family == 0);
+ VERIFY(ifp->if_delegated.subfamily == 0);
+ VERIFY(ifp->if_delegated.expensive == 0);
+
+ ifnet_lock_done(ifp);
+
+#if PF
+ /*
+ * Detach this interface from packet filter, if enabled.
+ */
+ pf_ifnet_hook(ifp, 0);
+#endif /* PF */
+
+ /* Filter list should be empty */
+ lck_mtx_lock_spin(&ifp->if_flt_lock);
+ VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
+ VERIFY(ifp->if_flt_busy == 0);
+ VERIFY(ifp->if_flt_waiters == 0);
+ lck_mtx_unlock(&ifp->if_flt_lock);
+
+ /* Last chance to drain send queue */
+ if_qflush(ifp, 0);
+
+ /* Last chance to cleanup any cached route */
+ lck_mtx_lock(&ifp->if_cached_route_lock);
+ VERIFY(!ifp->if_fwd_cacheok);
+ ROUTE_RELEASE(&ifp->if_fwd_route);
+ bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
+ ROUTE_RELEASE(&ifp->if_src_route);
+ bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
+ ROUTE_RELEASE(&ifp->if_src_route6);
+ bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
+ lck_mtx_unlock(&ifp->if_cached_route_lock);
+
+ VERIFY(ifp->if_data_threshold == 0);
+
+ ifnet_llreach_ifdetach(ifp);
+
+ dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0);
+
+ if (if_free != NULL)
+ if_free(ifp);
+
+ /*
+ * Finally, mark this ifnet as detached.
+ */
+ lck_mtx_lock_spin(&ifp->if_ref_lock);
+ if (!(ifp->if_refflags & IFRF_DETACHING)) {
+ panic("%s: flags mismatch (detaching not set) ifp=%p",
+ __func__, ifp);
+ /* NOTREACHED */
+ }
+ ifp->if_refflags &= ~IFRF_DETACHING;
+ lck_mtx_unlock(&ifp->if_ref_lock);
+
+ if (dlil_verbose)
+ printf("%s: detached\n", if_name(ifp));
+
+ /* Release reference held during ifnet attach */
+ ifnet_release(ifp);
+}
+
+static errno_t
+ifp_if_output(struct ifnet *ifp, struct mbuf *m)
+{
+#pragma unused(ifp)
+ m_freem(m);
+ return (0);
+}
+
+static void
+ifp_if_start(struct ifnet *ifp)
+{
+ ifnet_purge(ifp);
+}
+
+static void
+ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
+ struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
+{
+#pragma unused(ifp, flags, max_cnt)
+ if (m_head != NULL)
+ *m_head = NULL;
+ if (m_tail != NULL)
+ *m_tail = NULL;
+ if (cnt != NULL)
+ *cnt = 0;
+ if (len != NULL)
+ *len = 0;
+}
+
+static errno_t
+ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
+{
+#pragma unused(ifp, cmd, arglen, arg)
+ return (EOPNOTSUPP);
+}
+
+static errno_t
+ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
+{
+#pragma unused(ifp, fh, pf)
+ m_freem(m);
+ return (EJUSTRETURN);
+}
+
+static errno_t
+ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
+ const struct ifnet_demux_desc *da, u_int32_t dc)
+{
+#pragma unused(ifp, pf, da, dc)
+ return (EINVAL);
+}
+
+static errno_t
+ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
+{
+#pragma unused(ifp, pf)
+ return (EINVAL);
+}
+
+static errno_t
+ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
+{
+#pragma unused(ifp, sa)
+ return (EOPNOTSUPP);
+}
+
+static errno_t
+ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
+ const struct sockaddr *sa, const char *ll, const char *t)
+{
+#pragma unused(ifp, m, sa, ll, t)
+ return (ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL));
+}
+
+static errno_t
+ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
+ const struct sockaddr *sa, const char *ll, const char *t,
+ u_int32_t *pre, u_int32_t *post)
+{
+#pragma unused(ifp, sa, ll, t)
+ m_freem(*m);
+ *m = NULL;
+
+ if (pre != NULL)
+ *pre = 0;
+ if (post != NULL)
+ *post = 0;
+
+ return (EJUSTRETURN);
+}
+
+errno_t
+ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
+{
+#pragma unused(ifp, cmd, arg)
+ return (EOPNOTSUPP);
+}
+
+static errno_t
+ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
+{
+#pragma unused(ifp, tm, f)
+ /* XXX not sure what to do here */
+ return (0);
+}
+
+static void
+ifp_if_free(struct ifnet *ifp)
+{
+#pragma unused(ifp)
+}
+
+static void
+ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
+{
+#pragma unused(ifp, e)
+}
+
+__private_extern__
+int dlil_if_acquire(u_int32_t family, const void *uniqueid,
+ size_t uniqueid_len, struct ifnet **ifp)
+{
+ struct ifnet *ifp1 = NULL;
+ struct dlil_ifnet *dlifp1 = NULL;
+ void *buf, *base, **pbuf;
+ int ret = 0;
+
+ dlil_if_lock();
+ TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
+ ifp1 = (struct ifnet *)dlifp1;
+
+ if (ifp1->if_family != family)
+ continue;
+
+ lck_mtx_lock(&dlifp1->dl_if_lock);
+ /* same uniqueid and same len or no unique id specified */
+ if ((uniqueid_len == dlifp1->dl_if_uniqueid_len) &&
+ !bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len)) {
+ /* check for matching interface in use */
+ if (dlifp1->dl_if_flags & DLIF_INUSE) {
+ if (uniqueid_len) {
+ ret = EBUSY;
+ lck_mtx_unlock(&dlifp1->dl_if_lock);
+ goto end;
+ }
+ } else {
+ dlifp1->dl_if_flags |= (DLIF_INUSE|DLIF_REUSE);
+ lck_mtx_unlock(&dlifp1->dl_if_lock);
+ *ifp = ifp1;
+ goto end;
+ }
+ }
+ lck_mtx_unlock(&dlifp1->dl_if_lock);
+ }
+
+ /* no interface found, allocate a new one */
+ buf = zalloc(dlif_zone);
+ if (buf == NULL) {
+ ret = ENOMEM;
+ goto end;
+ }
+ bzero(buf, dlif_bufsize);
+
+ /* Get the 64-bit aligned base address for this object */
+ base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
+ sizeof (u_int64_t));
+ VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize));
+
+ /*
+ * Wind back a pointer size from the aligned base and
+ * save the original address so we can free it later.
+ */
+ pbuf = (void **)((intptr_t)base - sizeof (void *));
+ *pbuf = buf;
+ dlifp1 = base;
+
+ if (uniqueid_len) {
+ MALLOC(dlifp1->dl_if_uniqueid, void *, uniqueid_len,
+ M_NKE, M_WAITOK);
+ if (dlifp1->dl_if_uniqueid == NULL) {
+ zfree(dlif_zone, dlifp1);
+ ret = ENOMEM;
+ goto end;
+ }
+ bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len);
+ dlifp1->dl_if_uniqueid_len = uniqueid_len;
+ }
+
+ ifp1 = (struct ifnet *)dlifp1;
+ dlifp1->dl_if_flags = DLIF_INUSE;
+ if (ifnet_debug) {
+ dlifp1->dl_if_flags |= DLIF_DEBUG;
+ dlifp1->dl_if_trace = dlil_if_trace;
+ }
+ ifp1->if_name = dlifp1->dl_if_namestorage;
+ ifp1->if_xname = dlifp1->dl_if_xnamestorage;
+
+ /* initialize interface description */
+ ifp1->if_desc.ifd_maxlen = IF_DESCSIZE;
+ ifp1->if_desc.ifd_len = 0;
+ ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage;
+
+#if CONFIG_MACF_NET
+ mac_ifnet_label_init(ifp1);
+#endif
+
+ if ((ret = dlil_alloc_local_stats(ifp1)) != 0) {
+ DLIL_PRINTF("%s: failed to allocate if local stats, "
+ "error: %d\n", __func__, ret);
+ /* This probably shouldn't be fatal */
+ ret = 0;
+ }
+
+ lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr);
+ lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr);
+ lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr);
+ lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr);
+ lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
+ ifnet_lock_attr);
+ lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
+#if INET6
+ lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group, ifnet_lock_attr);
+ ifp1->if_inet6data = NULL;
+#endif
+
+ /* for send data paths */
+ lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group,
+ ifnet_lock_attr);
+ lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_snd_lock_group,
+ ifnet_lock_attr);
+ lck_mtx_init(&ifp1->if_snd.ifcq_lock, ifnet_snd_lock_group,
+ ifnet_lock_attr);
+
+ /* for receive data paths */
+ lck_mtx_init(&ifp1->if_poll_lock, ifnet_rcv_lock_group,
+ ifnet_lock_attr);
+
+ TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
+
+ *ifp = ifp1;
+
+end:
+ dlil_if_unlock();
+
+ VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof (u_int64_t)) &&
+ IS_P2ALIGNED(&ifp1->if_data, sizeof (u_int64_t))));
+
+ return (ret);
+}
+
+__private_extern__ void
+dlil_if_release(ifnet_t ifp)
+{
+ struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
+
+ ifnet_lock_exclusive(ifp);
+ lck_mtx_lock(&dlifp->dl_if_lock);
+ dlifp->dl_if_flags &= ~DLIF_INUSE;
+ strlcpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ);
+ ifp->if_name = dlifp->dl_if_namestorage;
+ /* Reset external name (name + unit) */
+ ifp->if_xname = dlifp->dl_if_xnamestorage;
+ snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ,
+ "%s?", ifp->if_name);
+ lck_mtx_unlock(&dlifp->dl_if_lock);
+#if CONFIG_MACF_NET
+ /*
+ * We can either recycle the MAC label here or in dlil_if_acquire().
+ * It seems logical to do it here but this means that anything that
+ * still has a handle on ifp will now see it as unlabeled.
+ * Since the interface is "dead" that may be OK. Revisit later.
+ */
+ mac_ifnet_label_recycle(ifp);
+#endif
+ ifnet_lock_done(ifp);
+}
+
+__private_extern__ void
+dlil_if_lock(void)
+{
+ lck_mtx_lock(&dlil_ifnet_lock);
+}
+
+__private_extern__ void
+dlil_if_unlock(void)
+{
+ lck_mtx_unlock(&dlil_ifnet_lock);
+}
+
+__private_extern__ void
+dlil_if_lock_assert(void)
+{
+ lck_mtx_assert(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
+}
+
+__private_extern__ void
+dlil_proto_unplumb_all(struct ifnet *ifp)
+{
+ /*
+ * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
+ * each bucket contains exactly one entry; PF_VLAN does not need an
+ * explicit unplumb.
+ *
+ * if_proto_hash[3] is for other protocols; we expect anything
+ * in this bucket to respond to the DETACHING event (which would
+ * have happened by now) and do the unplumb then.
+ */
+ (void) proto_unplumb(PF_INET, ifp);
+#if INET6
+ (void) proto_unplumb(PF_INET6, ifp);
+#endif /* INET6 */
+}
+
+static void
+ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
+{
+ lck_mtx_lock_spin(&ifp->if_cached_route_lock);
+ lck_mtx_convert_spin(&ifp->if_cached_route_lock);
+
+ route_copyout(dst, &ifp->if_src_route, sizeof (*dst));
+
+ lck_mtx_unlock(&ifp->if_cached_route_lock);
+}
+
+static void
+ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
+{
+ lck_mtx_lock_spin(&ifp->if_cached_route_lock);
+ lck_mtx_convert_spin(&ifp->if_cached_route_lock);
+
+ if (ifp->if_fwd_cacheok) {
+ route_copyin(src, &ifp->if_src_route, sizeof (*src));
+ } else {
+ ROUTE_RELEASE(src);
+ }
+ lck_mtx_unlock(&ifp->if_cached_route_lock);
+}
+
+#if INET6
+static void
+ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
+{
+ lck_mtx_lock_spin(&ifp->if_cached_route_lock);
+ lck_mtx_convert_spin(&ifp->if_cached_route_lock);
+
+ route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
+ sizeof (*dst));
+
+ lck_mtx_unlock(&ifp->if_cached_route_lock);
+}
+
+static void
+ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
+{
+ lck_mtx_lock_spin(&ifp->if_cached_route_lock);
+ lck_mtx_convert_spin(&ifp->if_cached_route_lock);
+
+ if (ifp->if_fwd_cacheok) {
+ route_copyin((struct route *)src,
+ (struct route *)&ifp->if_src_route6, sizeof (*src));
+ } else {
+ ROUTE_RELEASE(src);
+ }
+ lck_mtx_unlock(&ifp->if_cached_route_lock);
+}
+#endif /* INET6 */
+
+struct rtentry *
+ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
+{
+ struct route src_rt;
+ struct sockaddr_in *dst;
+
+ dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst);
+
+ ifp_src_route_copyout(ifp, &src_rt);
+
+ if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
+ ROUTE_RELEASE(&src_rt);
+ if (dst->sin_family != AF_INET) {
+ bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
+ dst->sin_len = sizeof (src_rt.ro_dst);
+ dst->sin_family = AF_INET;
+ }
+ dst->sin_addr = src_ip;
+
+ if (src_rt.ro_rt == NULL) {
+ src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst,
+ 0, 0, ifp->if_index);
+
+ if (src_rt.ro_rt != NULL) {
+ /* retain a ref, copyin consumes one */
+ struct rtentry *rte = src_rt.ro_rt;
+ RT_ADDREF(rte);
+ ifp_src_route_copyin(ifp, &src_rt);
+ src_rt.ro_rt = rte;
+ }
+ }