]> git.saurik.com Git - apple/xnu.git/commitdiff
xnu-6153.41.3.tar.gz macos-10151 v6153.41.3
authorApple <opensource@apple.com>
Fri, 10 Apr 2020 19:49:29 +0000 (19:49 +0000)
committerApple <opensource@apple.com>
Fri, 10 Apr 2020 19:49:29 +0000 (19:49 +0000)
170 files changed:
bsd/dev/dtrace/fbt_blacklist.c
bsd/dev/i386/sysctl.c
bsd/kern/bsd_init.c
bsd/kern/kdebug.c
bsd/kern/kern_control.c
bsd/kern/kern_event.c
bsd/kern/kern_exec.c
bsd/kern/kern_fork.c
bsd/kern/kern_lockf.c
bsd/kern/kern_memorystatus.c
bsd/kern/kern_synch.c
bsd/kern/trace_codes
bsd/kern/uipc_socket.c
bsd/kern/uipc_syscalls.c
bsd/miscfs/specfs/spec_vnops.c
bsd/net/if_ipsec.c
bsd/net/if_ports_used.c
bsd/net/if_ports_used.h
bsd/net/kpi_interface.c
bsd/net/kpi_interface.h
bsd/net/ndrv.c
bsd/net/necp.c
bsd/net/necp_client.c
bsd/net/net_kev.h
bsd/net/pf_if.c
bsd/netinet/in.h
bsd/netinet/in_pcb.h
bsd/netinet/in_pcblist.c
bsd/netinet/in_tclass.c
bsd/netinet/ip_output.c
bsd/netinet/mptcp.c
bsd/netinet/mptcp_subr.c
bsd/netinet/mptcp_usrreq.c
bsd/netinet/mptcp_var.h
bsd/netinet/raw_ip.c
bsd/netinet/tcp_timer.c
bsd/netinet/tcp_timer.h
bsd/netinet/tcp_var.h
bsd/netinet/udp_usrreq.c
bsd/netinet6/ah_input.c
bsd/netinet6/esp_chachapoly.c
bsd/netinet6/esp_core.c
bsd/netinet6/esp_input.c
bsd/netinet6/esp_rijndael.c
bsd/netinet6/ipsec.c
bsd/netinet6/ipsec.h
bsd/netinet6/udp6_usrreq.c
bsd/netkey/key.c
bsd/netkey/key.h
bsd/nfs/nfs_subs.c
bsd/nfs/nfs_syscalls.c
bsd/nfs/nfs_vnops.c
bsd/pthread/pthread_workqueue.c
bsd/pthread/workqueue_internal.h
bsd/security/audit/audit.c
bsd/security/audit/audit.h
bsd/sys/event.h
bsd/sys/eventvar.h
bsd/sys/kdebug.h
bsd/sys/kern_memorystatus.h
bsd/sys/lockf.h
bsd/sys/socketvar.h
bsd/sys/user.h
bsd/sys/vnode.h
bsd/tests/ctrr_test_sysctl.c [deleted file]
bsd/vfs/kpi_vfs.c
bsd/vfs/vfs_lookup.c
bsd/vfs/vfs_subr.c
bsd/vfs/vfs_syscalls.c
iokit/IOKit/pwr_mgt/IOPM.h
iokit/IOKit/pwr_mgt/IOPMPrivate.h
iokit/Kernel/IOInterruptController.cpp
iokit/Kernel/IOKitDebug.cpp
iokit/Kernel/IOPMrootDomain.cpp
iokit/Kernel/IOService.cpp
iokit/Kernel/IOUserServer.cpp
libkern/os/reason_private.h
libsyscall/wrappers/persona.c
osfmk/arm/cswitch.s
osfmk/arm/genassym.c
osfmk/arm/machine_routines_asm.s
osfmk/arm/pcb.c
osfmk/arm/pmap.c
osfmk/arm/pmap.h
osfmk/arm/status.c
osfmk/arm/thread.h
osfmk/arm64/cswitch.s
osfmk/arm64/genassym.c
osfmk/arm64/locore.s
osfmk/arm64/loose_ends.c
osfmk/arm64/machine_routines_asm.s
osfmk/arm64/pcb.c
osfmk/arm64/platform_tests.c
osfmk/arm64/proc_reg.h
osfmk/arm64/status.c
osfmk/corpses/corpse.c
osfmk/i386/cpu_data.h
osfmk/i386/cpu_topology.c
osfmk/i386/cpuid.c
osfmk/i386/cpuid.h
osfmk/i386/fpu.c
osfmk/i386/fpu.h
osfmk/i386/i386_init.c
osfmk/i386/proc_reg.h
osfmk/i386/trap.c
osfmk/i386/trap_native.c
osfmk/i386/ucode.c
osfmk/ipc/ipc_mqueue.c
osfmk/ipc/ipc_mqueue.h
osfmk/ipc/ipc_notify.c
osfmk/ipc/ipc_port.c
osfmk/ipc/ipc_port.h
osfmk/ipc/ipc_pset.c
osfmk/ipc/mach_msg.c
osfmk/ipc/mach_port.c
osfmk/kern/backtrace.c
osfmk/kern/debug.h
osfmk/kern/host.c
osfmk/kern/ipc_tt.c
osfmk/kern/kalloc.c
osfmk/kern/kpc_thread.c
osfmk/kern/ledger.c
osfmk/kern/ledger.h
osfmk/kern/mach_node.c
osfmk/kern/sched_prim.c
osfmk/kern/sched_prim.h
osfmk/kern/startup.c
osfmk/kern/task.c
osfmk/kern/task.h
osfmk/kern/thread.c
osfmk/kern/thread.h
osfmk/kern/timer.h
osfmk/kern/turnstile.c
osfmk/kern/waitq.c
osfmk/kern/waitq.h
osfmk/kperf/action.c
osfmk/kperf/action.h
osfmk/kperf/ast.h
osfmk/kperf/callstack.c
osfmk/kperf/callstack.h
osfmk/kperf/kperf.c
osfmk/kperf/kperf.h
osfmk/kperf/kperf_kpc.c
osfmk/kperf/pet.c
osfmk/kperf/sample.h
osfmk/kperf/thread_samplers.c
osfmk/kperf/thread_samplers.h
osfmk/mach/arm/thread_status.h
osfmk/mach/shared_region.h
osfmk/mach/sync_policy.h
osfmk/man/index.html [deleted file]
osfmk/vm/vm_compressor.c
osfmk/vm/vm_fault.c
osfmk/vm/vm_map.c
osfmk/vm/vm_object.h
osfmk/vm/vm_pageout.h
osfmk/x86_64/kpc_x86.c
tests/Makefile
tests/no32exec_35914211.c
tests/no32exec_35914211_helper.c
tests/no32exec_35914211_helper_binprefs.c [deleted file]
tools/lldbmacros/core/cvalue.py
tools/lldbmacros/ipc.py
tools/lldbmacros/memory.py
tools/lldbmacros/misc.py
tools/lldbmacros/process.py
tools/lldbmacros/waitq.py
tools/lldbmacros/xnu.py
tools/tests/zero-to-n/zero-to-n.c
tools/trace/ktruss.lua [new file with mode: 0755]

index f8f34ae337c94dfea3eac7e5525fe51973f0f272..8b7d371cf3f5857db3941fff8eb4997854c30495 100644 (file)
@@ -202,6 +202,7 @@ const char * fbt_blacklist[] =
        CLOSURE(prf)
        CLOSURE(proc_is64bit)
        CLOSURE(proc_selfname)
+       CRITICAL(rbtrace_bt)
        CRITICAL(register_cpu_setup_func)
        CRITICAL(ret64_iret)
        CRITICAL(ret_to_user)
@@ -227,6 +228,7 @@ const char * fbt_blacklist[] =
        ARM_ONLY(timer_state_event)
        CRITICAL(tmrCvt)
        CRITICAL(trap_from_kernel)
+       CRITICAL(traptrace_)
        CRITICAL(tsc_)
        CRITICAL(uart_putc)
        CRITICAL(unlock_debugger)
index 2300e0b7f67e2f88c25bda03257d451bb3155778..567c5817e636c6da6808805218f22f892fbfd1c3 100644 (file)
@@ -1051,4 +1051,10 @@ SYSCTL_PROC(_machdep_misc, OID_AUTO, spin_forever,
     0, 0,
     spin_in_the_kernel, "I", "Spin forever");
 
+
+extern int traptrace_enabled;
+SYSCTL_INT(_machdep_misc, OID_AUTO, traptrace_enabled,
+    CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED,
+    &traptrace_enabled, 0, "Enabled/disable trap trace");
+
 #endif /* DEVELOPMENT || DEBUG */
index 3e2052fb03016c41249ca11603dbf12e321af472..bc5b709e0f185cdf5e3038557d0b6e61c90bb260 100644 (file)
 #include <net/restricted_in_port.h> /* for restricted_in_port_init() */
 #include <kern/assert.h>                /* for assert() */
 #include <sys/kern_overrides.h>         /* for init_system_override() */
+#include <sys/lockf.h>                  /* for lf_init() */
 
 #include <net/init.h>
 
@@ -315,6 +316,8 @@ __private_extern__ int bootarg_vnode_cache_defeat = 0;
 __private_extern__ int bootarg_no_vnode_jetsam = 0;
 #endif /* CONFIG_JETSAM && (DEVELOPMENT || DEBUG) */
 
+__private_extern__ int bootarg_no_vnode_drain = 0;
+
 /*
  * Prevent kernel-based ASLR from being used, for testing.
  */
@@ -760,6 +763,10 @@ bsd_init(void)
        bsd_init_kprintf("calling vfsinit\n");
        vfsinit();
 
+       /* Initialize file locks. */
+       bsd_init_kprintf("calling lf_init\n");
+       lf_init();
+
 #if CONFIG_PROC_UUID_POLICY
        /* Initial proc_uuid_policy subsystem */
        bsd_init_kprintf("calling proc_uuid_policy_init()\n");
@@ -1331,6 +1338,9 @@ parse_bsd_args(void)
        }
 #endif /* CONFIG_JETSAM && (DEVELOPMENT || DEBUG) */
 
+       if (PE_parse_boot_argn("-no_vnode_drain", namep, sizeof(namep))) {
+               bootarg_no_vnode_drain = 1;
+       }
 
 #if CONFIG_EMBEDDED
        /*
index 0110e71142597e053ab92fe47d10556455a49b9f..eb78ca89a87fe4f0b93df590caf4a2b3d7c3c0ef 100644 (file)
@@ -84,8 +84,6 @@
 /*
  * IOP(s)
  *
- * https://coreoswiki.apple.com/wiki/pages/U6z3i0q9/Consistent_Logging_Implementers_Guide.html
- *
  * IOP(s) are auxiliary cores that want to participate in kdebug event logging.
  * They are registered dynamically. Each is assigned a cpu_id at registration.
  *
index dede2e6e85d521e62f27f945fc06286bb2f615c0..e41d1f103d8528c6e6fe6a870a0bb5bde632ca56 100644 (file)
@@ -93,6 +93,7 @@ struct ctl_cb {
        void                    *userdata;
        struct sockaddr_ctl     sac;
        u_int32_t               usecount;
+       u_int32_t               kcb_usecount;
 };
 
 #ifndef ROUNDUP64
@@ -351,6 +352,27 @@ ctl_sofreelastref(struct socket *so)
        return 0;
 }
 
+/*
+ * Use this function to serialize calls into the kctl subsystem
+ */
+static void
+ctl_kcb_increment_use_count(struct ctl_cb *kcb, lck_mtx_t *mutex_held)
+{
+       LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
+       while (kcb->kcb_usecount > 0) {
+               msleep(&kcb->kcb_usecount, mutex_held, PSOCK | PCATCH, "kcb_usecount", NULL);
+       }
+       kcb->kcb_usecount++;
+}
+
+static void
+clt_kcb_decrement_use_count(struct ctl_cb *kcb)
+{
+       assert(kcb->kcb_usecount != 0);
+       kcb->kcb_usecount--;
+       wakeup_one((caddr_t)&kcb->kcb_usecount);
+}
+
 static int
 ctl_detach(struct socket *so)
 {
@@ -360,6 +382,9 @@ ctl_detach(struct socket *so)
                return 0;
        }
 
+       lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK);
+       ctl_kcb_increment_use_count(kcb, mtx_held);
+
        if (kcb->kctl != NULL && kcb->kctl->bind != NULL &&
            kcb->userdata != NULL && !(so->so_state & SS_ISCONNECTED)) {
                // The unit was bound, but not connected
@@ -374,6 +399,7 @@ ctl_detach(struct socket *so)
 
        soisdisconnected(so);
        so->so_flags |= SOF_PCBCLEARING;
+       clt_kcb_decrement_use_count(kcb);
        return 0;
 }
 
@@ -522,9 +548,12 @@ ctl_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
                panic("ctl_bind so_pcb null\n");
        }
 
+       lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK);
+       ctl_kcb_increment_use_count(kcb, mtx_held);
+
        error = ctl_setup_kctl(so, nam, p);
        if (error) {
-               return error;
+               goto out;
        }
 
        if (kcb->kctl == NULL) {
@@ -532,13 +561,16 @@ ctl_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
        }
 
        if (kcb->kctl->bind == NULL) {
-               return EINVAL;
+               error = EINVAL;
+               goto out;
        }
 
        socket_unlock(so, 0);
        error = (*kcb->kctl->bind)(kcb->kctl->kctlref, &kcb->sac, &kcb->userdata);
        socket_lock(so, 0);
 
+out:
+       clt_kcb_decrement_use_count(kcb);
        return error;
 }
 
@@ -552,9 +584,12 @@ ctl_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
                panic("ctl_connect so_pcb null\n");
        }
 
+       lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK);
+       ctl_kcb_increment_use_count(kcb, mtx_held);
+
        error = ctl_setup_kctl(so, nam, p);
        if (error) {
-               return error;
+               goto out;
        }
 
        if (kcb->kctl == NULL) {
@@ -596,6 +631,8 @@ end:
                kctlstat.kcs_conn_fail++;
                lck_mtx_unlock(ctl_mtx);
        }
+out:
+       clt_kcb_decrement_use_count(kcb);
        return error;
 }
 
@@ -605,6 +642,8 @@ ctl_disconnect(struct socket *so)
        struct ctl_cb   *kcb = (struct ctl_cb *)so->so_pcb;
 
        if ((kcb = (struct ctl_cb *)so->so_pcb)) {
+               lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK);
+               ctl_kcb_increment_use_count(kcb, mtx_held);
                struct kctl             *kctl = kcb->kctl;
 
                if (kctl && kctl->disconnect) {
@@ -628,6 +667,7 @@ ctl_disconnect(struct socket *so)
                kctlstat.kcs_gencnt++;
                lck_mtx_unlock(ctl_mtx);
                socket_lock(so, 0);
+               clt_kcb_decrement_use_count(kcb);
        }
        return 0;
 }
@@ -694,11 +734,20 @@ ctl_sbrcv_trim(struct socket *so)
 static int
 ctl_usr_rcvd(struct socket *so, int flags)
 {
+       int                     error = 0;
        struct ctl_cb           *kcb = (struct ctl_cb *)so->so_pcb;
        struct kctl                     *kctl;
 
+       if (kcb == NULL) {
+               return ENOTCONN;
+       }
+
+       lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK);
+       ctl_kcb_increment_use_count(kcb, mtx_held);
+
        if ((kctl = kcb->kctl) == NULL) {
-               return EINVAL;
+               error = EINVAL;
+               goto out;
        }
 
        if (kctl->rcvd) {
@@ -709,7 +758,9 @@ ctl_usr_rcvd(struct socket *so, int flags)
 
        ctl_sbrcv_trim(so);
 
-       return 0;
+out:
+       clt_kcb_decrement_use_count(kcb);
+       return error;
 }
 
 static int
@@ -730,6 +781,9 @@ ctl_send(struct socket *so, int flags, struct mbuf *m,
                error = ENOTCONN;
        }
 
+       lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK);
+       ctl_kcb_increment_use_count(kcb, mtx_held);
+
        if (error == 0 && (kctl = kcb->kctl) == NULL) {
                error = EINVAL;
        }
@@ -749,6 +803,8 @@ ctl_send(struct socket *so, int flags, struct mbuf *m,
        if (error != 0) {
                OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_send_fail);
        }
+       clt_kcb_decrement_use_count(kcb);
+
        return error;
 }
 
@@ -769,6 +825,9 @@ ctl_send_list(struct socket *so, int flags, struct mbuf *m,
                error = ENOTCONN;
        }
 
+       lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK);
+       ctl_kcb_increment_use_count(kcb, mtx_held);
+
        if (error == 0 && (kctl = kcb->kctl) == NULL) {
                error = EINVAL;
        }
@@ -808,6 +867,8 @@ ctl_send_list(struct socket *so, int flags, struct mbuf *m,
        if (error != 0) {
                OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_send_list_fail);
        }
+       clt_kcb_decrement_use_count(kcb);
+
        return error;
 }
 
@@ -1234,16 +1295,21 @@ ctl_ctloutput(struct socket *so, struct sockopt *sopt)
                return EINVAL;
        }
 
+       lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK);
+       ctl_kcb_increment_use_count(kcb, mtx_held);
+
        switch (sopt->sopt_dir) {
        case SOPT_SET:
                if (kctl->setopt == NULL) {
-                       return ENOTSUP;
+                       error = ENOTSUP;
+                       goto out;
                }
                if (sopt->sopt_valsize != 0) {
                        MALLOC(data, void *, sopt->sopt_valsize, M_TEMP,
                            M_WAITOK | M_ZERO);
                        if (data == NULL) {
-                               return ENOMEM;
+                               error = ENOMEM;
+                               goto out;
                        }
                        error = sooptcopyin(sopt, data,
                            sopt->sopt_valsize, sopt->sopt_valsize);
@@ -1263,14 +1329,16 @@ ctl_ctloutput(struct socket *so, struct sockopt *sopt)
 
        case SOPT_GET:
                if (kctl->getopt == NULL) {
-                       return ENOTSUP;
+                       error = ENOTSUP;
+                       goto out;
                }
 
                if (sopt->sopt_valsize && sopt->sopt_val) {
                        MALLOC(data, void *, sopt->sopt_valsize, M_TEMP,
                            M_WAITOK | M_ZERO);
                        if (data == NULL) {
-                               return ENOMEM;
+                               error = ENOMEM;
+                               goto out;
                        }
                        /*
                         * 4108337 - copy user data in case the
@@ -1306,6 +1374,9 @@ ctl_ctloutput(struct socket *so, struct sockopt *sopt)
                }
                break;
        }
+
+out:
+       clt_kcb_decrement_use_count(kcb);
        return error;
 }
 
index 725f96d1e7c38e8cce37d0255258b34204973c7c..8f4d2207b71a4c4fecf35725c2ef6a3927c836f8 100644 (file)
@@ -1123,52 +1123,52 @@ filt_procevent(struct knote *kn, long hint)
 
        /*
         * The kernel has a wrapper in place that returns the same data
-        * as is collected here, in kn_hook64.  Any changes to how
+        * as is collected here, in kn_hook32.  Any changes to how
         * NOTE_EXITSTATUS and NOTE_EXIT_DETAIL are collected
         * should also be reflected in the proc_pidnoteexit() wrapper.
         */
        if (event == NOTE_EXIT) {
-               kn->kn_hook64 = 0;
+               kn->kn_hook32 = 0;
                if ((kn->kn_sfflags & NOTE_EXITSTATUS) != 0) {
                        kn->kn_fflags |= NOTE_EXITSTATUS;
-                       kn->kn_hook64 |= (hint & NOTE_PDATAMASK);
+                       kn->kn_hook32 |= (hint & NOTE_PDATAMASK);
                }
                if ((kn->kn_sfflags & NOTE_EXIT_DETAIL) != 0) {
                        kn->kn_fflags |= NOTE_EXIT_DETAIL;
                        if ((kn->kn_proc->p_lflag &
                            P_LTERM_DECRYPTFAIL) != 0) {
-                               kn->kn_hook64 |= NOTE_EXIT_DECRYPTFAIL;
+                               kn->kn_hook32 |= NOTE_EXIT_DECRYPTFAIL;
                        }
                        if ((kn->kn_proc->p_lflag &
                            P_LTERM_JETSAM) != 0) {
-                               kn->kn_hook64 |= NOTE_EXIT_MEMORY;
+                               kn->kn_hook32 |= NOTE_EXIT_MEMORY;
                                switch (kn->kn_proc->p_lflag & P_JETSAM_MASK) {
                                case P_JETSAM_VMPAGESHORTAGE:
-                                       kn->kn_hook64 |= NOTE_EXIT_MEMORY_VMPAGESHORTAGE;
+                                       kn->kn_hook32 |= NOTE_EXIT_MEMORY_VMPAGESHORTAGE;
                                        break;
                                case P_JETSAM_VMTHRASHING:
-                                       kn->kn_hook64 |= NOTE_EXIT_MEMORY_VMTHRASHING;
+                                       kn->kn_hook32 |= NOTE_EXIT_MEMORY_VMTHRASHING;
                                        break;
                                case P_JETSAM_FCTHRASHING:
-                                       kn->kn_hook64 |= NOTE_EXIT_MEMORY_FCTHRASHING;
+                                       kn->kn_hook32 |= NOTE_EXIT_MEMORY_FCTHRASHING;
                                        break;
                                case P_JETSAM_VNODE:
-                                       kn->kn_hook64 |= NOTE_EXIT_MEMORY_VNODE;
+                                       kn->kn_hook32 |= NOTE_EXIT_MEMORY_VNODE;
                                        break;
                                case P_JETSAM_HIWAT:
-                                       kn->kn_hook64 |= NOTE_EXIT_MEMORY_HIWAT;
+                                       kn->kn_hook32 |= NOTE_EXIT_MEMORY_HIWAT;
                                        break;
                                case P_JETSAM_PID:
-                                       kn->kn_hook64 |= NOTE_EXIT_MEMORY_PID;
+                                       kn->kn_hook32 |= NOTE_EXIT_MEMORY_PID;
                                        break;
                                case P_JETSAM_IDLEEXIT:
-                                       kn->kn_hook64 |= NOTE_EXIT_MEMORY_IDLE;
+                                       kn->kn_hook32 |= NOTE_EXIT_MEMORY_IDLE;
                                        break;
                                }
                        }
                        if ((kn->kn_proc->p_csflags &
                            CS_KILLED) != 0) {
-                               kn->kn_hook64 |= NOTE_EXIT_CSERROR;
+                               kn->kn_hook32 |= NOTE_EXIT_CSERROR;
                        }
                }
        }
@@ -1208,8 +1208,8 @@ filt_procprocess(struct knote *kn, struct kevent_qos_s *kev)
 
        proc_klist_lock();
        if (kn->kn_fflags) {
-               knote_fill_kevent(kn, kev, kn->kn_hook64);
-               kn->kn_hook64 = 0;
+               knote_fill_kevent(kn, kev, kn->kn_hook32);
+               kn->kn_hook32 = 0;
                res = 1;
        }
        proc_klist_unlock();
@@ -3700,14 +3700,14 @@ kevent_register(struct kqueue *kq, struct kevent_qos_s *kev,
        }
 
        if (kq->kq_state & KQ_WORKLOOP) {
-               KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_REGISTER),
+               KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_REGISTER),
                    ((struct kqworkloop *)kq)->kqwl_dynamicid,
                    kev->udata, kev->flags, kev->filter);
        } else if (kq->kq_state & KQ_WORKQ) {
-               KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_REGISTER),
+               KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWQ_REGISTER),
                    0, kev->udata, kev->flags, kev->filter);
        } else {
-               KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_REGISTER),
+               KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQ_REGISTER),
                    VM_KERNEL_UNSLIDE_OR_PERM(kq),
                    kev->udata, kev->flags, kev->filter);
        }
@@ -3995,16 +3995,16 @@ knote_process(struct knote *kn, kevent_ctx_t kectx,
        assert(!(kn->kn_status & (KN_DISABLED | KN_SUPPRESSED | KN_DROPPING)));
 
        if (kq->kq_state & KQ_WORKLOOP) {
-               KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS),
+               KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS),
                    ((struct kqworkloop *)kq)->kqwl_dynamicid,
                    kn->kn_udata, kn->kn_status | (kn->kn_id << 32),
                    kn->kn_filtid);
        } else if (kq->kq_state & KQ_WORKQ) {
-               KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS),
+               KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS),
                    0, kn->kn_udata, kn->kn_status | (kn->kn_id << 32),
                    kn->kn_filtid);
        } else {
-               KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS),
+               KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQ_PROCESS),
                    VM_KERNEL_UNSLIDE_OR_PERM(kq), kn->kn_udata,
                    kn->kn_status | (kn->kn_id << 32), kn->kn_filtid);
        }
@@ -4125,7 +4125,7 @@ knote_process(struct knote *kn, kevent_ctx_t kectx,
        }
 
        if (kev.flags & EV_VANISHED) {
-               KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KNOTE_VANISHED),
+               KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KNOTE_VANISHED),
                    kev.ident, kn->kn_udata, kn->kn_status | (kn->kn_id << 32),
                    kn->kn_filtid);
        }
@@ -4225,13 +4225,13 @@ kqworkq_begin_processing(struct kqworkq *kqwq, workq_threadreq_t kqr,
 {
        int rc = 0;
 
-       KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS_BEGIN) | DBG_FUNC_START,
+       KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS_BEGIN) | DBG_FUNC_START,
            0, kqr->tr_kq_qos_index);
 
        rc = kqworkq_acknowledge_events(kqwq, kqr, kevent_flags,
            KQWQAE_BEGIN_PROCESSING);
 
-       KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS_BEGIN) | DBG_FUNC_END,
+       KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS_BEGIN) | DBG_FUNC_END,
            thread_tid(kqr_thread(kqr)), kqr->tr_kq_wakeup);
 
        return rc;
@@ -4274,7 +4274,7 @@ kqworkloop_begin_processing(struct kqworkloop *kqwl, unsigned int kevent_flags)
 
        kqlock_held(kq);
 
-       KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_BEGIN) | DBG_FUNC_START,
+       KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_BEGIN) | DBG_FUNC_START,
            kqwl->kqwl_dynamicid, 0, 0);
 
        /* nobody else should still be processing */
@@ -4352,7 +4352,7 @@ kqworkloop_begin_processing(struct kqworkloop *kqwl, unsigned int kevent_flags)
        }
 
 done:
-       KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_BEGIN) | DBG_FUNC_END,
+       KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_BEGIN) | DBG_FUNC_END,
            kqwl->kqwl_dynamicid, 0, 0);
 
        return rc;
@@ -4375,13 +4375,13 @@ kqfile_begin_processing(struct kqfile *kq)
        kqlock_held(kq);
 
        assert((kq->kqf_state & (KQ_WORKQ | KQ_WORKLOOP)) == 0);
-       KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN) | DBG_FUNC_START,
+       KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN) | DBG_FUNC_START,
            VM_KERNEL_UNSLIDE_OR_PERM(kq), 0);
 
        /* wait to become the exclusive processing thread */
        for (;;) {
                if (kq->kqf_state & KQ_DRAIN) {
-                       KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN) | DBG_FUNC_END,
+                       KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN) | DBG_FUNC_END,
                            VM_KERNEL_UNSLIDE_OR_PERM(kq), 2);
                        return EBADF;
                }
@@ -4410,7 +4410,7 @@ kqfile_begin_processing(struct kqfile *kq)
 
        /* anything left to process? */
        if (TAILQ_EMPTY(&kq->kqf_queue)) {
-               KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN) | DBG_FUNC_END,
+               KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN) | DBG_FUNC_END,
                    VM_KERNEL_UNSLIDE_OR_PERM(kq), 1);
                return -1;
        }
@@ -4418,7 +4418,7 @@ kqfile_begin_processing(struct kqfile *kq)
        /* convert to processing mode */
        kq->kqf_state |= KQ_PROCESSING;
 
-       KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN) | DBG_FUNC_END,
+       KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN) | DBG_FUNC_END,
            VM_KERNEL_UNSLIDE_OR_PERM(kq));
 
        return 0;
@@ -4476,7 +4476,7 @@ kqworkloop_end_processing(struct kqworkloop *kqwl, int flags, int kevent_flags)
 
        kqlock_held(kq);
 
-       KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_END) | DBG_FUNC_START,
+       KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_END) | DBG_FUNC_START,
            kqwl->kqwl_dynamicid, 0, 0);
 
        if (flags & KQ_PROCESSING) {
@@ -4533,7 +4533,7 @@ kqworkloop_end_processing(struct kqworkloop *kqwl, int flags, int kevent_flags)
                kqworkloop_unbind_delayed_override_drop(thread);
        }
 
-       KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_END) | DBG_FUNC_END,
+       KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_END) | DBG_FUNC_END,
            kqwl->kqwl_dynamicid, 0, 0);
 
        return rc;
@@ -4557,7 +4557,7 @@ kqfile_end_processing(struct kqfile *kq)
 
        assert((kq->kqf_state & (KQ_WORKQ | KQ_WORKLOOP)) == 0);
 
-       KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_END),
+       KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_END),
            VM_KERNEL_UNSLIDE_OR_PERM(kq), 0);
 
        /*
@@ -4663,6 +4663,7 @@ kqueue_workloop_ctl_internal(proc_t p, uintptr_t cmd, uint64_t __unused options,
                trp.trp_value = kqwl->kqwl_params;
                if (trp.trp_flags && !(trp.trp_flags & TRP_RELEASED)) {
                        trp.trp_flags |= TRP_RELEASED;
+                       kqwl->kqwl_params = trp.trp_value;
                        kqworkloop_release_live(kqwl);
                } else {
                        error = EINVAL;
@@ -5045,14 +5046,14 @@ kqueue_threadreq_initiate(struct kqueue *kq, workq_threadreq_t kqr,
                __assert_only struct kqworkloop *kqwl = (struct kqworkloop *)kq;
 
                assert(kqwl->kqwl_owner == THREAD_NULL);
-               KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_THREQUEST),
+               KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_THREQUEST),
                    kqwl->kqwl_dynamicid, 0, qos, kqr->tr_kq_wakeup);
                ts = kqwl->kqwl_turnstile;
                /* Add a thread request reference on the kqueue. */
                kqworkloop_retain(kqwl);
        } else {
                assert(kq->kq_state & KQ_WORKQ);
-               KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_THREQUEST),
+               KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWQ_THREQUEST),
                    -1, 0, qos, kqr->tr_kq_wakeup);
        }
 
@@ -5192,7 +5193,7 @@ kqueue_threadreq_bind(struct proc *p, workq_threadreq_t kqr, thread_t thread,
                        turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
                }
 
-               KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_BIND), kqu.kqwl->kqwl_dynamicid,
+               KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_BIND), kqu.kqwl->kqwl_dynamicid,
                    thread_tid(thread), kqr->tr_kq_qos_index,
                    (kqr->tr_kq_override_index << 16) | kqr->tr_kq_wakeup);
 
@@ -5203,7 +5204,7 @@ kqueue_threadreq_bind(struct proc *p, workq_threadreq_t kqr, thread_t thread,
        } else {
                assert(kqr->tr_kq_override_index == 0);
 
-               KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_BIND), -1,
+               KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWQ_BIND), -1,
                    thread_tid(thread), kqr->tr_kq_qos_index,
                    (kqr->tr_kq_override_index << 16) | kqr->tr_kq_wakeup);
        }
@@ -5432,7 +5433,7 @@ recompute:
        if (kqwl_owner) {
 #if 0
                /* JMM - need new trace hooks for owner overrides */
-               KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_THADJUST),
+               KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_THADJUST),
                    kqwl->kqwl_dynamicid, thread_tid(kqwl_owner), kqr->tr_kq_qos_index,
                    (kqr->tr_kq_override_index << 16) | kqr->tr_kq_wakeup);
 #endif
@@ -5503,7 +5504,7 @@ recompute:
        }
 
        if (qos_changed) {
-               KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_THADJUST), kqwl->kqwl_dynamicid,
+               KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_THADJUST), kqwl->kqwl_dynamicid,
                    thread_tid(servicer), kqr->tr_kq_qos_index,
                    (kqr->tr_kq_override_index << 16) | kqr->tr_kq_wakeup);
        }
@@ -5696,7 +5697,7 @@ kqworkloop_unbind_locked(struct kqworkloop *kqwl, thread_t thread,
        struct uthread *ut = get_bsdthread_info(thread);
        workq_threadreq_t kqr = &kqwl->kqwl_request;
 
-       KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_UNBIND), kqwl->kqwl_dynamicid,
+       KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_UNBIND), kqwl->kqwl_dynamicid,
            thread_tid(thread), 0, 0);
 
        kqlock_held(kqwl);
@@ -5789,7 +5790,7 @@ kqworkq_unbind_locked(struct kqworkq *kqwq,
        struct uthread *ut = get_bsdthread_info(thread);
        kq_index_t old_override = kqr->tr_kq_override_index;
 
-       KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_UNBIND), -1,
+       KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWQ_UNBIND), -1,
            thread_tid(kqr_thread(kqr)), kqr->tr_kq_qos_index, 0);
 
        kqlock_held(kqwq);
@@ -6625,7 +6626,7 @@ static inline void
 knote_mark_active(struct knote *kn)
 {
        if ((kn->kn_status & KN_ACTIVE) == 0) {
-               KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KNOTE_ACTIVATE),
+               KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KNOTE_ACTIVATE),
                    kn->kn_udata, kn->kn_status | (kn->kn_id << 32),
                    kn->kn_filtid);
        }
index 03bcf7896c58579ee416cc8a483ce1beac28c0bb..eb333d4b1a33df93d2b5515180b3f1e60f7155db 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
 #include <security/mac_mach_internal.h>
 #endif
 
+#if CONFIG_AUDIT
+#include <bsm/audit_kevents.h>
+#endif
+
 #if CONFIG_ARCADE
 #include <kern/arcade.h>
 #endif
@@ -1627,6 +1631,7 @@ encapsulated_binary:
                         */
                        if (imgp->ip_scriptvp) {
                                vnode_put(imgp->ip_scriptvp);
+                               imgp->ip_scriptvp = NULLVP;
                        }
                        if (vnode_getwithref(imgp->ip_vp) == 0) {
                                imgp->ip_scriptvp = imgp->ip_vp;
@@ -2013,6 +2018,9 @@ exec_handle_file_actions(struct image_params *imgp, short psa_flags)
        proc_t p = vfs_context_proc(imgp->ip_vfs_context);
        _posix_spawn_file_actions_t px_sfap = imgp->ip_px_sfa;
        int ival[2];            /* dummy retval for system calls) */
+#if CONFIG_AUDIT
+       struct uthread *uthread = get_bsdthread_info(current_thread());
+#endif
 
        for (action = 0; action < px_sfap->psfa_act_count; action++) {
                _psfa_action_t *psfa = &px_sfap->psfa_act_acts[action];
@@ -2049,6 +2057,8 @@ exec_handle_file_actions(struct image_params *imgp, short psa_flags)
                        mode = ((mode & ~p->p_fd->fd_cmask) & ALLPERMS) & ~S_ISTXT;
                        VATTR_SET(vap, va_mode, mode & ACCESSPERMS);
 
+                       AUDIT_SUBCALL_ENTER(OPEN, p, uthread);
+
                        NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_SYSSPACE,
                            CAST_USER_ADDR_T(psfa->psfaa_openargs.psfao_path),
                            imgp->ip_vfs_context);
@@ -2062,6 +2072,8 @@ exec_handle_file_actions(struct image_params *imgp, short psa_flags)
 
                        FREE(bufp, M_TEMP);
 
+                       AUDIT_SUBCALL_EXIT(uthread, error);
+
                        /*
                         * If there's an error, or we get the right fd by
                         * accident, then drop out here.  This is easier than
@@ -2087,7 +2099,9 @@ exec_handle_file_actions(struct image_params *imgp, short psa_flags)
                         * can ignore that, since if we didn't get the
                         * fd we wanted, the error will stop us.
                         */
+                       AUDIT_SUBCALL_ENTER(DUP2, p, uthread);
                        error = dup2(p, &dup2a, ival);
+                       AUDIT_SUBCALL_EXIT(uthread, error);
                        if (error) {
                                break;
                        }
@@ -2097,7 +2111,9 @@ exec_handle_file_actions(struct image_params *imgp, short psa_flags)
                         */
                        ca.fd = origfd;
 
+                       AUDIT_SUBCALL_ENTER(CLOSE, p, uthread);
                        error = close_nocancel(p, &ca, ival);
+                       AUDIT_SUBCALL_EXIT(uthread, error);
                }
                break;
 
@@ -2113,7 +2129,9 @@ exec_handle_file_actions(struct image_params *imgp, short psa_flags)
                         * can ignore that, since if we didn't get the
                         * fd we wanted, the error will stop us.
                         */
+                       AUDIT_SUBCALL_ENTER(DUP2, p, uthread);
                        error = dup2(p, &dup2a, ival);
+                       AUDIT_SUBCALL_EXIT(uthread, error);
                }
                break;
 
@@ -2149,12 +2167,16 @@ exec_handle_file_actions(struct image_params *imgp, short psa_flags)
 
                        dup2a.from = ca.fd = ival[0];
                        dup2a.to = psfa->psfaa_dup2args.psfad_newfiledes;
+                       AUDIT_SUBCALL_ENTER(DUP2, p, uthread);
                        error = dup2(p, &dup2a, ival);
+                       AUDIT_SUBCALL_EXIT(uthread, error);
                        if (error) {
                                break;
                        }
 
+                       AUDIT_SUBCALL_ENTER(CLOSE, p, uthread);
                        error = close_nocancel(p, &ca, ival);
+                       AUDIT_SUBCALL_EXIT(uthread, error);
                }
                break;
 
@@ -2163,7 +2185,9 @@ exec_handle_file_actions(struct image_params *imgp, short psa_flags)
 
                        ca.fd = psfa->psfaa_filedes;
 
+                       AUDIT_SUBCALL_ENTER(CLOSE, p, uthread);
                        error = close_nocancel(p, &ca, ival);
+                       AUDIT_SUBCALL_EXIT(uthread, error);
                }
                break;
 
@@ -2203,11 +2227,13 @@ exec_handle_file_actions(struct image_params *imgp, short psa_flags)
                         */
                        struct nameidata nd;
 
+                       AUDIT_SUBCALL_ENTER(CHDIR, p, uthread);
                        NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1, UIO_SYSSPACE,
                            CAST_USER_ADDR_T(psfa->psfaa_chdirargs.psfac_path),
                            imgp->ip_vfs_context);
 
                        error = chdir_internal(p, imgp->ip_vfs_context, &nd, 0);
+                       AUDIT_SUBCALL_EXIT(uthread, error);
                }
                break;
 
@@ -2216,7 +2242,9 @@ exec_handle_file_actions(struct image_params *imgp, short psa_flags)
 
                        fchdira.fd = psfa->psfaa_filedes;
 
+                       AUDIT_SUBCALL_ENTER(FCHDIR, p, uthread);
                        error = fchdir(p, &fchdira, ival);
+                       AUDIT_SUBCALL_EXIT(uthread, error);
                }
                break;
 
@@ -2562,6 +2590,20 @@ proc_legacy_footprint_entitled(proc_t p, task_t task, const char *caller)
                break;
        }
 }
+
+static inline void
+proc_ios13extended_footprint_entitled(proc_t p, task_t task, const char *caller)
+{
+#pragma unused(p, caller)
+       boolean_t ios13extended_footprint_entitled;
+
+       /* the entitlement grants a footprint limit increase */
+       ios13extended_footprint_entitled = IOTaskHasEntitlement(task,
+           "com.apple.developer.memory.ios13extended_footprint");
+       if (ios13extended_footprint_entitled) {
+               task_set_ios13extended_footprint_limit(task);
+       }
+}
 #endif /* __arm64__ */
 
 /*
@@ -3133,15 +3175,41 @@ do_fork1:
                 * The POSIX_SPAWN_CLOEXEC_DEFAULT flag
                 * is handled in exec_handle_file_actions().
                 */
-               if ((error = exec_handle_file_actions(imgp,
-                   imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0)) != 0) {
+#if CONFIG_AUDIT
+               /*
+                * The file actions auditing can overwrite the upath of
+                * AUE_POSIX_SPAWN audit record.  Save the audit record.
+                */
+               struct kaudit_record *save_uu_ar = uthread->uu_ar;
+               uthread->uu_ar = NULL;
+#endif
+               error = exec_handle_file_actions(imgp,
+                   imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0);
+#if CONFIG_AUDIT
+               /* Restore the AUE_POSIX_SPAWN audit record. */
+               uthread->uu_ar = save_uu_ar;
+#endif
+               if (error != 0) {
                        goto bad;
                }
        }
 
        /* Has spawn port actions? */
        if (imgp->ip_px_spa != NULL) {
-               if ((error = exec_handle_port_actions(imgp, &port_actions)) != 0) {
+#if CONFIG_AUDIT
+               /*
+                * Do the same for the port actions as we did for the file
+                * actions.  Save the AUE_POSIX_SPAWN audit record.
+                */
+               struct kaudit_record *save_uu_ar = uthread->uu_ar;
+               uthread->uu_ar = NULL;
+#endif
+               error = exec_handle_port_actions(imgp, &port_actions);
+#if CONFIG_AUDIT
+               /* Restore the AUE_POSIX_SPAWN audit record. */
+               uthread->uu_ar = save_uu_ar;
+#endif
+               if (error != 0) {
                        goto bad;
                }
        }
@@ -3536,6 +3604,7 @@ bad:
 
 #if __arm64__
                proc_legacy_footprint_entitled(p, new_task, __FUNCTION__);
+               proc_ios13extended_footprint_entitled(p, new_task, __FUNCTION__);
 #endif /* __arm64__ */
        }
 
@@ -4207,6 +4276,7 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval)
 
 #if __arm64__
                proc_legacy_footprint_entitled(p, new_task, __FUNCTION__);
+               proc_ios13extended_footprint_entitled(p, new_task, __FUNCTION__);
 #endif /* __arm64__ */
 
                /* Sever any extant thread affinity */
index 4b0f0e9a4bb9cab9cb4a7dba27203bef28a426eb..c25c85ad265bfa4ad5a60137d3b2d2d09c6b8f89 100644 (file)
@@ -1321,12 +1321,13 @@ retry:
        if (child_proc->p_textvp != NULLVP) {
                /* bump references to the text vnode */
                /* Need to hold iocount across the ref call */
-               if (vnode_getwithref(child_proc->p_textvp) == 0) {
+               if ((error = vnode_getwithref(child_proc->p_textvp)) == 0) {
                        error = vnode_ref(child_proc->p_textvp);
                        vnode_put(child_proc->p_textvp);
-                       if (error != 0) {
-                               child_proc->p_textvp = NULLVP;
-                       }
+               }
+
+               if (error != 0) {
+                       child_proc->p_textvp = NULLVP;
                }
        }
 
index 21edbc5d97dc9631b1328e0d26de7c61fcd3aab9..d67a8f84bb982aad3fb861bf1cb0c5c84263e890 100644 (file)
@@ -153,6 +153,16 @@ static void      lf_boost_blocking_proc(struct lockf *, struct lockf *);
 static void      lf_adjust_assertion(struct lockf *block);
 #endif /* IMPORTANCE_INHERITANCE */
 
+static lck_mtx_t lf_dead_lock;
+static lck_grp_t *lf_dead_lock_grp;
+
+void
+lf_init(void)
+{
+       lf_dead_lock_grp = lck_grp_alloc_init("lf_dead_lock", LCK_GRP_ATTR_NULL);
+       lck_mtx_init(&lf_dead_lock, lf_dead_lock_grp, LCK_ATTR_NULL);
+}
+
 /*
  * lf_advlock
  *
@@ -498,7 +508,7 @@ lf_setlock(struct lockf *lock, struct timespec *timeout)
        struct lockf *block;
        struct lockf **head = lock->lf_head;
        struct lockf **prev, *overlap, *ltmp;
-       static char lockstr[] = "lockf";
+       static const char lockstr[] = "lockf";
        int priority, needtolink, error;
        struct vnode *vp = lock->lf_vnode;
        overlap_t ovcase;
@@ -550,22 +560,43 @@ scan:
                 */
                if ((lock->lf_flags & F_POSIX) &&
                    (block->lf_flags & F_POSIX)) {
-                       struct proc *wproc;
-                       struct uthread *ut;
+                       lck_mtx_lock(&lf_dead_lock);
 
-                       /* The block is waiting on something */
-                       wproc = block->lf_owner;
+                       /* The blocked process is waiting on something */
+                       struct proc *wproc = block->lf_owner;
                        proc_lock(wproc);
+
                        LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p owned by pid %d\n", lock, proc_pid(wproc));
+
+                       struct uthread *ut;
                        TAILQ_FOREACH(ut, &wproc->p_uthlist, uu_list) {
                                /*
-                                * If the thread is asleep (uu_wchan != 0)
-                                * in this code (uu_wmesg == lockstr)
-                                * check to see if the lock is blocked behind
+                                * If the thread is (a) asleep (uu_wchan != 0)
+                                * and (b) in this code (uu_wmesg == lockstr)
+                                * then check to see if the lock is blocked behind
                                 * someone blocked behind us.
+                                *
+                                * Note: (i) vp->v_lock is held, preventing other
+                                * threads from mutating the blocking list for our vnode.
+                                * and (ii) the proc_lock is held i.e the thread list
+                                * is stable.
+                                *
+                                * HOWEVER some thread in wproc might be sleeping on a lockf
+                                * structure for a different vnode, and be woken at any
+                                * time. Thus the waitblock list could mutate while
+                                * it's being inspected by this thread, and what
+                                * ut->uu_wchan was just pointing at could even be freed.
+                                *
+                                * Nevertheless this is safe here because of lf_dead_lock; if
+                                * any thread blocked with uu_wmesg == lockstr wakes (see below)
+                                * it will try to acquire lf_dead_lock which is already held
+                                * here. Holding that lock prevents the lockf structure being
+                                * pointed at by ut->uu_wchan from going away. Thus the vnode
+                                * involved can be found and locked, and the corresponding
+                                * blocking chain can then be examined safely.
                                 */
-                               if ((ut->uu_wchan != NULL) && (ut->uu_wmesg == lockstr)) {
-                                       struct lockf *waitblock = (struct lockf *)ut->uu_wchan;
+                               const struct lockf *waitblock = (const void *)ut->uu_wchan;
+                               if ((waitblock != NULL) && (ut->uu_wmesg == lockstr)) {
                                        LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p which is also blocked on lock %p vnode %p\n", lock, waitblock, waitblock->lf_vnode);
 
                                        vnode_t othervp = NULL;
@@ -585,11 +616,13 @@ scan:
                                                         * v_lock) retry the scan.
                                                         */
                                                        proc_unlock(wproc);
+                                                       lck_mtx_unlock(&lf_dead_lock);
                                                        static struct timespec ts = {
                                                                .tv_sec = 0,
-                                                               .tv_nsec = 10 * NSEC_PER_MSEC,
+                                                               .tv_nsec = 2 * NSEC_PER_MSEC,
                                                        };
-                                                       (void) msleep(lock, &vp->v_lock, priority, lockstr, &ts);
+                                                       static const char pausestr[] = "lockf:pause";
+                                                       (void) msleep(lock, &vp->v_lock, priority, pausestr, &ts);
                                                        LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p contention for vp %p => restart\n", lock, othervp);
                                                        goto scan;
                                                }
@@ -604,15 +637,15 @@ scan:
                                         * we successfully acquired the
                                         * proc_lock).
                                         */
-                                       waitblock = waitblock->lf_next;
-                                       if (waitblock == NULL) {
+                                       const struct lockf *nextblock = waitblock->lf_next;
+                                       if (nextblock == NULL) {
                                                if (othervp) {
                                                        lck_mtx_unlock(&othervp->v_lock);
                                                }
-                                               LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p with no lf_next\n", lock);
+                                               LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p with waitblock %p and no lf_next; othervp %p\n", lock, waitblock, othervp);
                                                continue;
                                        }
-                                       LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p which is also blocked on lock %p vnode %p\n", lock, waitblock, waitblock->lf_vnode);
+                                       LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p which is also blocked on lock %p vnode %p\n", lock, nextblock, nextblock->lf_vnode);
 
                                        /*
                                         * Make sure it's an advisory range
@@ -620,7 +653,7 @@ scan:
                                         * if we mix lock types, it's our own
                                         * fault.
                                         */
-                                       if ((waitblock->lf_flags & F_POSIX) == 0) {
+                                       if ((nextblock->lf_flags & F_POSIX) == 0) {
                                                if (othervp) {
                                                        lck_mtx_unlock(&othervp->v_lock);
                                                }
@@ -633,7 +666,7 @@ scan:
                                         * getting the requested lock, then we
                                         * would deadlock, so error out.
                                         */
-                                       struct proc *bproc = waitblock->lf_owner;
+                                       struct proc *bproc = nextblock->lf_owner;
                                        const boolean_t deadlocked = bproc == lock->lf_owner;
 
                                        if (othervp) {
@@ -643,6 +676,7 @@ scan:
                                        if (deadlocked) {
                                                LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p which is me, so EDEADLK\n", lock);
                                                proc_unlock(wproc);
+                                               lck_mtx_unlock(&lf_dead_lock);
                                                FREE(lock, M_LOCKF);
                                                return EDEADLK;
                                        }
@@ -650,6 +684,7 @@ scan:
                                LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p bottom of thread loop\n", lock);
                        }
                        proc_unlock(wproc);
+                       lck_mtx_unlock(&lf_dead_lock);
                }
 
                /*
@@ -709,7 +744,19 @@ scan:
 #endif /* LOCKF_DEBUGGING */
                DTRACE_FSINFO(advlock__wait, vnode_t, vp);
 
-               error = msleep(lock, &vp->v_lock, priority, lockstr, timeout);
+               if (lock->lf_flags & F_POSIX) {
+                       error = msleep(lock, &vp->v_lock, priority, lockstr, timeout);
+                       /*
+                        * Ensure that 'lock' doesn't get mutated or freed if a
+                        * wakeup occurs while hunting for deadlocks (and holding
+                        * lf_dead_lock - see above)
+                        */
+                       lck_mtx_lock(&lf_dead_lock);
+                       lck_mtx_unlock(&lf_dead_lock);
+               } else {
+                       static const char lockstr_np[] = "lockf:np";
+                       error = msleep(lock, &vp->v_lock, priority, lockstr_np, timeout);
+               }
 
                if (error == 0 && (lock->lf_flags & F_ABORT) != 0) {
                        error = EBADF;
index 15512dd418c87b9182dda9118f13bf5078d68a24..afc9271dc01e625bec98b6aee0817067eb95cd86 100644 (file)
@@ -631,6 +631,83 @@ memorystatus_act_on_legacy_footprint_entitlement(proc_t p, boolean_t footprint_i
        proc_list_unlock();
 }
 
+void
+memorystatus_act_on_ios13extended_footprint_entitlement(proc_t p)
+{
+       int memlimit_mb_active = 0, memlimit_mb_inactive = 0;
+       boolean_t memlimit_active_is_fatal = FALSE, memlimit_inactive_is_fatal = 0, use_active_limit = FALSE;
+
+       if (max_mem < 1500ULL * 1024 * 1024 ||
+           max_mem > 2ULL * 1024 * 1024 * 1024) {
+               /* ios13extended_footprint is only for 2GB devices */
+               return;
+       }
+
+       proc_list_lock();
+
+       if (p->p_memstat_memlimit_active > 0) {
+               memlimit_mb_active = p->p_memstat_memlimit_active;
+       } else if (p->p_memstat_memlimit_active == -1) {
+               memlimit_mb_active = max_task_footprint_mb;
+       } else {
+               /*
+                * Nothing to do for '0' which is
+                * a special value only used internally
+                * to test 'no limits'.
+                */
+               proc_list_unlock();
+               return;
+       }
+
+       if (p->p_memstat_memlimit_inactive > 0) {
+               memlimit_mb_inactive = p->p_memstat_memlimit_inactive;
+       } else if (p->p_memstat_memlimit_inactive == -1) {
+               memlimit_mb_inactive = max_task_footprint_mb;
+       } else {
+               /*
+                * Nothing to do for '0' which is
+                * a special value only used internally
+                * to test 'no limits'.
+                */
+               proc_list_unlock();
+               return;
+       }
+
+       /* limit to "almost 2GB" */
+       int ios13extended_footprint_mb = 1800;
+       if (memlimit_mb_active > ios13extended_footprint_mb) {
+               /* do not lower the current limit */
+               proc_list_unlock();
+               return;
+       }
+       memlimit_mb_active = ios13extended_footprint_mb;
+       memlimit_mb_inactive = ios13extended_footprint_mb;
+
+       memlimit_active_is_fatal = (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL);
+       memlimit_inactive_is_fatal = (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL);
+
+       SET_ACTIVE_LIMITS_LOCKED(p, memlimit_mb_active, memlimit_active_is_fatal);
+       SET_INACTIVE_LIMITS_LOCKED(p, memlimit_mb_inactive, memlimit_inactive_is_fatal);
+
+       if (proc_jetsam_state_is_active_locked(p) == TRUE) {
+               use_active_limit = TRUE;
+               CACHE_ACTIVE_LIMITS_LOCKED(p, memlimit_active_is_fatal);
+       } else {
+               CACHE_INACTIVE_LIMITS_LOCKED(p, memlimit_inactive_is_fatal);
+       }
+
+
+       if (memorystatus_highwater_enabled) {
+               task_set_phys_footprint_limit_internal(p->task,
+                   (p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit : -1,
+                   NULL,                                    /*return old value */
+                   use_active_limit,                                    /*active limit?*/
+                   (use_active_limit ? memlimit_active_is_fatal : memlimit_inactive_is_fatal));
+       }
+
+       proc_list_unlock();
+}
+
 #endif /* CONFIG_MEMORYSTATUS */
 #endif /* __arm64__ */
 
@@ -4482,11 +4559,12 @@ set_vm_map_fork_pidwatch(task_t task, uint64_t x)
  *     then the vm_map_fork is allowed.
  *
  *     And if a process's memory footprint calculates less
- *     than or equal to half of the system-wide task limit,
+ *     than or equal to quarter of the system-wide task limit,
  *     then the vm_map_fork is allowed.  This calculation
  *     is based on the assumption that a process can
  *     munch memory up to the system-wide task limit.
  */
+extern boolean_t corpse_threshold_system_limit;
 boolean_t
 memorystatus_allowed_vm_map_fork(task_t task)
 {
@@ -4505,10 +4583,16 @@ memorystatus_allowed_vm_map_fork(task_t task)
        footprint_in_bytes = get_task_phys_footprint(task);
 
        /*
-        * Maximum is 1/4 of the system-wide task limit.
+        * Maximum is 1/4 of the system-wide task limit by default.
         */
        max_allowed_bytes = ((uint64_t)max_task_footprint_mb * 1024 * 1024) >> 2;
 
+#if DEBUG || DEVELOPMENT
+       if (corpse_threshold_system_limit) {
+               max_allowed_bytes = (uint64_t)max_task_footprint_mb * (1UL << 20);
+       }
+#endif /* DEBUG || DEVELOPMENT */
+
        if (footprint_in_bytes > max_allowed_bytes) {
                printf("memorystatus disallowed vm_map_fork %lld  %lld\n", footprint_in_bytes, max_allowed_bytes);
                set_vm_map_fork_pidwatch(task, MEMORYSTATUS_VM_MAP_FORK_NOT_ALLOWED);
@@ -6256,7 +6340,7 @@ memorystatus_update_levels_locked(boolean_t critical_only)
        }
 
 #if VM_PRESSURE_EVENTS
-       memorystatus_available_pages_pressure = (pressure_threshold_percentage / delta_percentage) * memorystatus_delta;
+       memorystatus_available_pages_pressure = pressure_threshold_percentage * (atop_64(max_mem) / 100);
 #endif
 }
 
index d8afd780c650351a6b9df994ef040d5d9e776aec..019952e736158c4cb0cc8062ec321448cdb6cac0 100644 (file)
@@ -258,7 +258,6 @@ block:
                if ((thread_continue_t)continuation != THREAD_CONTINUE_NULL) {
                        ut->uu_continuation = continuation;
                        ut->uu_pri  = pri;
-                       ut->uu_timo = abstime? 1: 0;
                        ut->uu_mtx  = mtx;
                        (void) thread_block(_sleep_continue);
                        /* NOTREACHED */
index 2da5b2b91065f4f472b907bfad2b21c64afd668c..fa3e01f846ece8ed7ec1bb3cb031241dcc37798f 100644 (file)
 0x132000c      RealFaultAddressPurgeable
 0x1320010      RealFaultAddressExternal
 0x1320014      RealFaultAddressSharedCache
+0x1320018   vm_fast_fault
+0x132001c   vm_slow_fault
+0x1320020   vm_map_lookup_object
 0x1400000      MACH_SCHED
 0x1400004      MACH_STKATTACH
 0x1400008      MACH_STKHANDOFF
index dc2bd511cc9d8db0d9e33ce5440059328d8667cb..aa07cc477c0d2913981c1a2ab496d0fb71235dca 100644 (file)
@@ -826,12 +826,11 @@ socreate_internal(int dom, struct socket **aso, int type, int proto,
 
        switch (dom) {
        /*
-        * Don't mark Unix domain, system or multipath sockets as
+        * Don't mark Unix domain or system
         * eligible for defunct by default.
         */
        case PF_LOCAL:
        case PF_SYSTEM:
-       case PF_MULTIPATH:
                so->so_flags |= SOF_NODEFUNCT;
                break;
        default:
index b903e4a18932055b92d7c1d49ebb0627b08ae3d6..ca5e7dd6e8e17768124c2da4c6d986ff2819c48e 100644 (file)
@@ -1390,6 +1390,11 @@ sendto_nocancel(struct proc *p,
        KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
        AUDIT_ARG(fd, uap->s);
 
+       if (uap->flags & MSG_SKIPCFIL) {
+               error = EPERM;
+               goto done;
+       }
+
        auio = uio_create(1, 0,
            (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
            UIO_WRITE);
@@ -1459,6 +1464,12 @@ sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap,
 
        KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
        AUDIT_ARG(fd, uap->s);
+
+       if (uap->flags & MSG_SKIPCFIL) {
+               error = EPERM;
+               goto done;
+       }
+
        if (IS_64BIT_PROCESS(p)) {
                msghdrp = (caddr_t)&msg64;
                size_of_msghdr = sizeof(msg64);
@@ -1572,6 +1583,11 @@ sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval)
 
        KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
 
+       if (uap->flags & MSG_SKIPCFIL) {
+               error = EPERM;
+               goto out;
+       }
+
        error = file_socket(uap->s, &so);
        if (error) {
                goto out;
index 042363b4158605a58ad4c45b162395c8e60ccccc..3008946345d27b5b33d11a136b26c3d42c693bb8 100644 (file)
@@ -2915,7 +2915,7 @@ spec_knote_select_and_link(struct knote *kn)
        /*
         * This function may be called many times to link or re-link the
         * underlying vnode to the kqueue.  If we've already linked the two,
-        * we will have a valid kn_hook64 which ties us to the underlying
+        * we will have a valid kn_hook_waitqid which ties us to the underlying
         * device's waitq via a the waitq's prepost table object. However,
         * devices can abort any select action by calling selthreadclear().
         * This is OK because the table object will be invalidated by the
@@ -2985,13 +2985,15 @@ spec_knote_select_and_link(struct knote *kn)
                 * the table object's ID to us.  It will also set the
                 * waitq_prepost_id field within the waitq structure.
                 *
-                * We can just overwrite kn_hook64 because it's simply a
+                * We can just overwrite kn_hook_waitqid because it's simply a
                 * table ID used to grab a reference when needed.
                 *
                 * We have a reference on the vnode, so we know that the
                 * device won't go away while we get this ID.
+                *
+                * Note: on 32bit this field is 32bit only.
                 */
-               kn->kn_hook64 = waitq_get_prepost_id(wq);
+               kn->kn_hook_waitqid = (typeof(kn->kn_hook_waitqid))waitq_get_prepost_id(wq);
        } else if (selres == 0) {
                /*
                 * The device indicated that there's no data to read, but didn't call
@@ -3069,7 +3071,7 @@ filt_specattach(struct knote *kn, __unused struct kevent_qos_s *kev)
        }
 
        kn->kn_filtid = EVFILTID_SPEC;
-       kn->kn_hook64 = 0;
+       kn->kn_hook_waitqid = 0;
 
        knote_markstayactive(kn);
        return spec_knote_select_and_link(kn);
@@ -3084,7 +3086,7 @@ filt_specdetach(struct knote *kn)
         * This is potentially tricky: the device's selinfo waitq that was
         * tricked into being part of this knote's waitq set may not be a part
         * of any other set, and the device itself may have revoked the memory
-        * in which the waitq was held. We use the knote's kn_hook64 field
+        * in which the waitq was held. We use the knote's kn_hook_waitqid field
         * to keep the ID of the waitq's prepost table object. This
         * object keeps a pointer back to the waitq, and gives us a safe way
         * to decouple the dereferencing of driver allocated memory: if the
@@ -3092,9 +3094,9 @@ filt_specdetach(struct knote *kn)
         * object will be invalidated. The waitq details are handled in the
         * waitq API invoked here.
         */
-       if (kn->kn_hook64) {
-               waitq_unlink_by_prepost_id(kn->kn_hook64, &(knote_get_kq(kn)->kq_wqs));
-               kn->kn_hook64 = 0;
+       if (kn->kn_hook_waitqid) {
+               waitq_unlink_by_prepost_id(kn->kn_hook_waitqid, &(knote_get_kq(kn)->kq_wqs));
+               kn->kn_hook_waitqid = 0;
        }
 }
 
index e967cad2cd73c54f5021a5fc66cfb72c80dec4a5..407f3243e9fa7dcce7e9ddbd6c04bb1da28b249a 100644 (file)
@@ -2554,6 +2554,9 @@ ipsec_ctl_connect(kern_ctl_ref kctlref,
        }
 
        struct ipsec_pcb *pcb = *unitinfo;
+       if (pcb == NULL) {
+               return EINVAL;
+       }
 
        lck_mtx_lock(&ipsec_lock);
 
@@ -2995,8 +2998,11 @@ ipsec_ctl_setopt(__unused kern_ctl_ref  kctlref,
     void                                   *data,
     size_t                                 len)
 {
-       struct ipsec_pcb                        *pcb = unitinfo;
        errno_t                                 result = 0;
+       struct ipsec_pcb                        *pcb = unitinfo;
+       if (pcb == NULL) {
+               return EINVAL;
+       }
 
        /* check for privileges for privileged options */
        switch (opt) {
@@ -3364,8 +3370,11 @@ ipsec_ctl_getopt(__unused kern_ctl_ref kctlref,
     void *data,
     size_t *len)
 {
-       struct ipsec_pcb *pcb = unitinfo;
        errno_t result = 0;
+       struct ipsec_pcb *pcb = unitinfo;
+       if (pcb == NULL) {
+               return EINVAL;
+       }
 
        switch (opt) {
        case IPSEC_OPT_FLAGS: {
index 378d834fccce819a8cddeddee724df7eb584a9f9..29b05770f0cf830a734d068573af9324eb5d654c 100644 (file)
@@ -47,6 +47,8 @@
 #include <net/if_ports_used.h>
 
 #include <netinet/in_pcb.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp_fsm.h>
 
 
 #include <stdbool.h>
@@ -662,7 +664,12 @@ if_ports_used_add_inpcb(const uint32_t ifindex, const struct inpcb *inp)
        npi.npi_timestamp.tv_usec = wakeuiid_last_check.tv_usec;
 
        if (SOCK_PROTO(so) == IPPROTO_TCP) {
+               struct tcpcb *tp = intotcpcb(inp);
+
                npi.npi_flags |= NPIF_TCP;
+               if (tp != NULL && tp->t_state == TCPS_LISTEN) {
+                       npi.npi_flags |= NPIF_LISTEN;
+               }
        } else if (SOCK_PROTO(so) == IPPROTO_UDP) {
                npi.npi_flags |= NPIF_UDP;
        } else {
@@ -675,7 +682,15 @@ if_ports_used_add_inpcb(const uint32_t ifindex, const struct inpcb *inp)
        npi.npi_local_port = inp->inp_lport;
        npi.npi_foreign_port = inp->inp_fport;
 
-       if (inp->inp_vflag & INP_IPV4) {
+       /*
+        * Take in account IPv4 addresses mapped on IPv6
+        */
+       if ((inp->inp_vflag & INP_IPV6) != 0 && (inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
+           (inp->inp_vflag & (INP_IPV6 | INP_IPV4)) == (INP_IPV6 | INP_IPV4)) {
+               npi.npi_flags |= NPIF_IPV6 | NPIF_IPV4;
+               memcpy(&npi.npi_local_addr_in6,
+                   &inp->in6p_laddr, sizeof(struct in6_addr));
+       } else if (inp->inp_vflag & INP_IPV4) {
                npi.npi_flags |= NPIF_IPV4;
                npi.npi_local_addr_in = inp->inp_laddr;
                npi.npi_foreign_addr_in = inp->inp_faddr;
index 5fcbc480a837eb6b003d4d850f0cf84239cae415..ce782a21c760c3970e60299438aa8c37304cda6c 100644 (file)
@@ -68,13 +68,14 @@ union in_addr_4_6 {
        struct in6_addr _in_a_6;
 };
 
-#define NPIF_IPV4       0x00000001
-#define NPIF_IPV6       0x00000002
-#define NPIF_TCP        0x00000004
-#define NPIF_UDP        0x00000008
-#define NPIF_DELEGATED  0x00000010
-#define NPIF_SOCKET     0x00000020
-#define NPIF_CHANNEL    0x00000040
+#define NPIF_IPV4       0x0001
+#define NPIF_IPV6       0x0002
+#define NPIF_TCP        0x0004
+#define NPIF_UDP        0x0008
+#define NPIF_DELEGATED  0x0010
+#define NPIF_SOCKET     0x0020
+#define NPIF_CHANNEL    0x0040
+#define NPIF_LISTEN     0x0080
 
 struct net_port_info {
        uint16_t                npi_if_index;
index 4e849c3e55fab7dd51e7a7382b3efa7a224c9d20..2d5c6454b8f064f37c2e7381f2e78ea0e2fd2bd7 100644 (file)
@@ -2826,7 +2826,6 @@ ifnet_get_local_ports_extended(ifnet_t ifp, protocol_family_t protocol,
     u_int32_t flags, u_int8_t *bitfield)
 {
        u_int32_t ifindex;
-       u_int32_t inp_flags = 0;
 
        if (bitfield == NULL) {
                return EINVAL;
@@ -2847,26 +2846,15 @@ ifnet_get_local_ports_extended(ifnet_t ifp, protocol_family_t protocol,
        if_ports_used_update_wakeuuid(ifp);
 
 
-       inp_flags |= ((flags & IFNET_GET_LOCAL_PORTS_WILDCARDOK) ?
-           INPCB_GET_PORTS_USED_WILDCARDOK : 0);
-       inp_flags |= ((flags & IFNET_GET_LOCAL_PORTS_NOWAKEUPOK) ?
-           INPCB_GET_PORTS_USED_NOWAKEUPOK : 0);
-       inp_flags |= ((flags & IFNET_GET_LOCAL_PORTS_RECVANYIFONLY) ?
-           INPCB_GET_PORTS_USED_RECVANYIFONLY : 0);
-       inp_flags |= ((flags & IFNET_GET_LOCAL_PORTS_EXTBGIDLEONLY) ?
-           INPCB_GET_PORTS_USED_EXTBGIDLEONLY : 0);
-       inp_flags |= ((flags & IFNET_GET_LOCAL_PORTS_ACTIVEONLY) ?
-           INPCB_GET_PORTS_USED_ACTIVEONLY : 0);
-
        ifindex = (ifp != NULL) ? ifp->if_index : 0;
 
        if (!(flags & IFNET_GET_LOCAL_PORTS_TCPONLY)) {
-               udp_get_ports_used(ifindex, protocol, inp_flags,
+               udp_get_ports_used(ifindex, protocol, flags,
                    bitfield);
        }
 
        if (!(flags & IFNET_GET_LOCAL_PORTS_UDPONLY)) {
-               tcp_get_ports_used(ifindex, protocol, inp_flags,
+               tcp_get_ports_used(ifindex, protocol, flags,
                    bitfield);
        }
 
index f131c57462b8b8ec66c7031b7f68df9cedd5d063..da7f7579dc745e2aa3ba8f9b3cd3dbb1c72b3341 100644 (file)
@@ -3193,6 +3193,7 @@ extern errno_t ifnet_get_local_ports(ifnet_t ifp, u_int8_t *bitfield);
 #define IFNET_GET_LOCAL_PORTS_RECVANYIFONLY     0x10
 #define IFNET_GET_LOCAL_PORTS_EXTBGIDLEONLY     0x20
 #define IFNET_GET_LOCAL_PORTS_ACTIVEONLY        0x40
+#define IFNET_GET_LOCAL_PORTS_ANYTCPSTATEOK     0x80
 /*
  *       @function ifnet_get_local_ports_extended
  *       @discussion Returns a bitfield indicating which local ports of the
@@ -3230,6 +3231,9 @@ extern errno_t ifnet_get_local_ports(ifnet_t ifp, u_int8_t *bitfield);
  *               IFNET_GET_LOCAL_PORTS_ACTIVEONLY: When bit is set, the
  *               port is in the list only if the socket is not in a final TCP
  *               state or the connection is not idle in a final TCP state
+ *               IFNET_GET_LOCAL_PORTS_ANYTCPSTATEOK: When bit is set, the
+ *               port is in the list for all the TCP states except CLOSED
+ *               and TIME_WAIT
  *       @param bitfield A pointer to 8192 bytes.
  *       @result Returns 0 on success.
  */
index 7e13e26384dca4bb2af20aee282aae5a7c847f40..aeec3bb461640148a9d16ee3ea4522452fcac660 100644 (file)
 
 static unsigned int ndrv_multi_max_count = NDRV_DMUX_MAX_DESCR;
 SYSCTL_UINT(_net, OID_AUTO, ndrv_multi_max_count, CTLFLAG_RW | CTLFLAG_LOCKED,
-        &ndrv_multi_max_count, 0, "Number of allowed multicast addresses per NRDV socket");
+    &ndrv_multi_max_count, 0, "Number of allowed multicast addresses per NRDV socket");
+
+/*
+ * The locking strategy relies on the PF_NRDRV domain mutex that protects both the
+ * PCB list "ndrvl" and the sockets themselves
+ */
 
 static int ndrv_do_detach(struct ndrv_cb *);
 static int ndrv_do_disconnect(struct ndrv_cb *);
@@ -97,12 +102,12 @@ static void ndrv_dominit(struct domain *);
 
 u_int32_t  ndrv_sendspace = NDRVSNDQ;
 u_int32_t  ndrv_recvspace = NDRVRCVQ;
-TAILQ_HEAD(, ndrv_cb)  ndrvl = TAILQ_HEAD_INITIALIZER(ndrvl);
+TAILQ_HEAD(, ndrv_cb)   ndrvl = TAILQ_HEAD_INITIALIZER(ndrvl);
 
 static struct domain *ndrvdomain = NULL;
 extern struct domain ndrvdomain_s;
 
-#define NDRV_PROTODEMUX_COUNT  10
+#define NDRV_PROTODEMUX_COUNT   10
 
 /*
  * Verify these values match.
@@ -127,9 +132,9 @@ extern struct domain ndrvdomain_s;
 static int
 ndrv_output(struct mbuf *m, struct socket *so)
 {
-    struct ndrv_cb *np = sotondrvcb(so);
+       struct ndrv_cb *np = sotondrvcb(so);
        struct ifnet *ifp = np->nd_if;
-    int        result = 0;
+       int result = 0;
 
 #if NDRV_DEBUG
        printf("NDRV output: %x, %x, %x\n", m, so, np);
@@ -138,67 +143,77 @@ ndrv_output(struct mbuf *m, struct socket *so)
        /*
         * No header is a format error
         */
-       if ((m->m_flags&M_PKTHDR) == 0)
-               return(EINVAL);
+       if ((m->m_flags & M_PKTHDR) == 0) {
+               return EINVAL;
+       }
 
        /* Unlock before calling ifnet_output */
        socket_unlock(so, 0);
 
        /*
-     * Call DLIL if we can. DLIL is much safer than calling the
-     * ifp directly.
-     */
+        * Call DLIL if we can. DLIL is much safer than calling the
+        * ifp directly.
+        */
        result = ifnet_output_raw(ifp, np->nd_proto_family, m);
 
        socket_lock(so, 0);
 
-       return (result);
+       return result;
 }
 
 /* Our input routine called from DLIL */
 static errno_t
 ndrv_input(
-       ifnet_t                         ifp,
-       protocol_family_t       proto_family,
-       mbuf_t                          m,
-       char                            *frame_header)
+       ifnet_t                         ifp,
+       protocol_family_t       proto_family,
+       mbuf_t                          m,
+       char                            *frame_header)
 {
        struct socket *so;
        struct sockaddr_dl ndrvsrc;
        struct ndrv_cb *np;
        int error = 0;
 
-    ndrvsrc.sdl_len = sizeof (struct sockaddr_dl);
-    ndrvsrc.sdl_family = AF_NDRV;
-    ndrvsrc.sdl_index = 0;
+       ndrvsrc.sdl_len = sizeof(struct sockaddr_dl);
+       ndrvsrc.sdl_family = AF_NDRV;
+       ndrvsrc.sdl_index = 0;
 
-    /* move packet from if queue to socket */
+       /* move packet from if queue to socket */
        /* Should be media-independent */
-    ndrvsrc.sdl_type = IFT_ETHER;
-    ndrvsrc.sdl_nlen = 0;
-    ndrvsrc.sdl_alen = 6;
-    ndrvsrc.sdl_slen = 0;
-    bcopy(frame_header, &ndrvsrc.sdl_data, 6);
+       ndrvsrc.sdl_type = IFT_ETHER;
+       ndrvsrc.sdl_nlen = 0;
+       ndrvsrc.sdl_alen = 6;
+       ndrvsrc.sdl_slen = 0;
+       bcopy(frame_header, &ndrvsrc.sdl_data, 6);
+
+       /* prepend the frame header */
+       m = m_prepend(m, ifnet_hdrlen(ifp), M_NOWAIT);
+       if (m == NULL) {
+               return EJUSTRETURN;
+       }
+       bcopy(frame_header, m->m_data, ifnet_hdrlen(ifp));
+
+       /*
+        * We need to take the domain mutex before the list RW lock
+        */
+       LCK_MTX_ASSERT(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED);
+       lck_mtx_lock(ndrvdomain->dom_mtx);
 
        np = ndrv_find_inbound(ifp, proto_family);
-       if (np == NULL)
-       {
-               return(ENOENT);
+       if (np == NULL) {
+               lck_mtx_unlock(ndrvdomain->dom_mtx);
+               return ENOENT;
        }
+
        so = np->nd_socket;
-    /* prepend the frame header */
-    m = m_prepend(m, ifnet_hdrlen(ifp), M_NOWAIT);
-    if (m == NULL)
-        return EJUSTRETURN;
-    bcopy(frame_header, m->m_data, ifnet_hdrlen(ifp));
 
-       LCK_MTX_ASSERT(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED);
-       lck_mtx_lock(ndrvdomain->dom_mtx);
        if (sbappendaddr(&(so->so_rcv), (struct sockaddr *)&ndrvsrc,
-                                        m, (struct mbuf *)0, &error) != 0) {
+           m, NULL, &error) != 0) {
                sorwakeup(so);
        }
+
        lck_mtx_unlock(ndrvdomain->dom_mtx);
+
        return 0; /* radar 4030377 - always return 0 */
 }
 
@@ -208,24 +223,27 @@ ndrv_input(
 static int
 ndrv_attach(struct socket *so, int proto, __unused struct proc *p)
 {
-    int error;
+       int error;
        struct ndrv_cb *np = sotondrvcb(so);
 
-       if ((so->so_state & SS_PRIV) == 0)
-               return(EPERM);
+       if ((so->so_state & SS_PRIV) == 0) {
+               return EPERM;
+       }
 
 #if NDRV_DEBUG
        printf("NDRV attach: %x, %x, %x\n", so, proto, np);
 #endif
 
-        if ((error = soreserve(so, ndrv_sendspace, ndrv_recvspace)))
-                return(error);
+       if ((error = soreserve(so, ndrv_sendspace, ndrv_recvspace))) {
+               return error;
+       }
 
        MALLOC(np, struct ndrv_cb *, sizeof(*np), M_PCB, M_WAITOK);
-       if (np == NULL)
-               return (ENOMEM);
-    so->so_pcb = (caddr_t)np;
-    bzero(np, sizeof(*np));
+       if (np == NULL) {
+               return ENOMEM;
+       }
+       so->so_pcb = (caddr_t)np;
+       bzero(np, sizeof(*np));
 #if NDRV_DEBUG
        printf("NDRV attach: %x, %x, %x\n", so, proto, np);
 #endif
@@ -234,12 +252,22 @@ ndrv_attach(struct socket *so, int proto, __unused struct proc *p)
        np->nd_socket = so;
        np->nd_proto.sp_family = SOCK_DOM(so);
        np->nd_proto.sp_protocol = proto;
-    np->nd_if = NULL;
-    np->nd_proto_family = 0;
-    np->nd_family = 0;
-    np->nd_unit = 0;
-    TAILQ_INSERT_TAIL(&ndrvl, np, nd_next);
-       return(0);
+       np->nd_if = NULL;
+       np->nd_proto_family = 0;
+       np->nd_family = 0;
+       np->nd_unit = 0;
+
+       /*
+        * Use the domain mutex to protect the list
+        */
+       LCK_MTX_ASSERT(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED);
+       lck_mtx_lock(ndrvdomain->dom_mtx);
+
+       TAILQ_INSERT_TAIL(&ndrvl, np, nd_next);
+
+       lck_mtx_unlock(ndrvdomain->dom_mtx);
+
+       return 0;
 }
 
 /*
@@ -252,8 +280,9 @@ ndrv_detach(struct socket *so)
 {
        struct ndrv_cb *np = sotondrvcb(so);
 
-       if (np == 0)
+       if (np == 0) {
                return EINVAL;
+       }
        return ndrv_do_detach(np);
 }
 
@@ -272,17 +301,20 @@ ndrv_connect(struct socket *so, struct sockaddr *nam, __unused struct proc *p)
 {
        struct ndrv_cb *np = sotondrvcb(so);
 
-       if (np == 0)
+       if (np == 0) {
                return EINVAL;
+       }
 
-       if (np->nd_faddr)
+       if (np->nd_faddr) {
                return EISCONN;
+       }
 
        /* Allocate memory to store the remote address */
        MALLOC(np->nd_faddr, struct sockaddr_ndrv*,
-                nam->sa_len, M_IFADDR, M_WAITOK);
-       if (np->nd_faddr == NULL)
+           nam->sa_len, M_IFADDR, M_WAITOK);
+       if (np->nd_faddr == NULL) {
                return ENOMEM;
+       }
 
        bcopy((caddr_t) nam, (caddr_t) np->nd_faddr, nam->sa_len);
        soisconnected(so);
@@ -291,12 +323,12 @@ ndrv_connect(struct socket *so, struct sockaddr *nam, __unused struct proc *p)
 
 static void
 ndrv_event(struct ifnet *ifp, __unused protocol_family_t protocol,
-                  const struct kev_msg *event)
+    const struct kev_msg *event)
 {
        if (event->vendor_code == KEV_VENDOR_APPLE &&
-               event->kev_class == KEV_NETWORK_CLASS &&
-               event->kev_subclass == KEV_DL_SUBCLASS &&
-               event->event_code == KEV_DL_IF_DETACHING) {
+           event->kev_class == KEV_NETWORK_CLASS &&
+           event->kev_subclass == KEV_DL_SUBCLASS &&
+           event->event_code == KEV_DL_IF_DETACHING) {
                LCK_MTX_ASSERT(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED);
                lck_mtx_lock(ndrvdomain->dom_mtx);
                ndrv_handle_ifp_detach(ifnet_family(ifp), ifnet_unit(ifp));
@@ -314,30 +346,34 @@ static int name_cmp(struct ifnet *, char *);
 static int
 ndrv_bind(struct socket *so, struct sockaddr *nam, __unused struct proc *p)
 {
-    struct sockaddr_ndrv *sa = (struct sockaddr_ndrv *) nam;
+       struct sockaddr_ndrv *sa = (struct sockaddr_ndrv *) nam;
        char *dname;
        struct ndrv_cb *np;
        struct ifnet *ifp;
-    int        result;
+       int result;
 
-       if TAILQ_EMPTY(&ifnet_head)
-               return(EADDRNOTAVAIL); /* Quick sanity check */
+       if (TAILQ_EMPTY(&ifnet_head)) {
+               return EADDRNOTAVAIL;        /* Quick sanity check */
+       }
        np = sotondrvcb(so);
-       if (np == 0)
+       if (np == 0) {
                return EINVAL;
+       }
 
-       if (np->nd_laddr)
-               return EINVAL;                  /* XXX */
-
+       if (np->nd_laddr) {
+               return EINVAL;                  /* XXX */
+       }
        /* I think we just latch onto a copy here; the caller frees */
        np->nd_laddr = _MALLOC(sizeof(struct sockaddr_ndrv), M_IFADDR, M_WAITOK);
-       if (np->nd_laddr == NULL)
-               return(ENOMEM);
+       if (np->nd_laddr == NULL) {
+               return ENOMEM;
+       }
        bcopy((caddr_t) sa, (caddr_t) np->nd_laddr, sizeof(struct sockaddr_ndrv));
        dname = (char *) sa->snd_name;
        np->nd_laddr->snd_len = sizeof(struct sockaddr_ndrv);
-       if (*dname == '\0')
-               return(EINVAL);
+       if (*dname == '\0') {
+               return EINVAL;
+       }
 #if NDRV_DEBUG
        printf("NDRV bind: %x, %x, %s\n", so, np, dname);
 #endif
@@ -347,19 +383,20 @@ ndrv_bind(struct socket *so, struct sockaddr *nam, __unused struct proc *p)
         */
        ifnet_head_lock_shared();
        TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
-               if (name_cmp(ifp, dname) == 0)
+               if (name_cmp(ifp, dname) == 0) {
                        break;
+               }
        }
        ifnet_head_done();
 
-       if (ifp == NULL)
-               return(EADDRNOTAVAIL);
+       if (ifp == NULL) {
+               return EADDRNOTAVAIL;
+       }
 
        // PPP doesn't support PF_NDRV.
-       if (ifnet_family(ifp) != APPLE_IF_FAM_PPP)
-       {
+       if (ifnet_family(ifp) != APPLE_IF_FAM_PPP) {
                /* NDRV on this interface */
-               struct ifnet_attach_proto_param ndrv_proto;
+               struct ifnet_attach_proto_param ndrv_proto;
                result = 0;
                bzero(&ndrv_proto, sizeof(ndrv_proto));
                ndrv_proto.event = ndrv_event;
@@ -372,16 +409,15 @@ ndrv_bind(struct socket *so, struct sockaddr *nam, __unused struct proc *p)
                        return result;
                }
                np->nd_proto_family = PF_NDRV;
-       }
-       else {
+       } else {
                np->nd_proto_family = 0;
        }
 
        np->nd_if = ifp;
-    np->nd_family = ifnet_family(ifp);
-    np->nd_unit = ifnet_unit(ifp);
+       np->nd_family = ifnet_family(ifp);
+       np->nd_unit = ifnet_unit(ifp);
 
-       return(0);
+       return 0;
 }
 
 static int
@@ -389,11 +425,13 @@ ndrv_disconnect(struct socket *so)
 {
        struct ndrv_cb *np = sotondrvcb(so);
 
-       if (np == 0)
+       if (np == 0) {
                return EINVAL;
+       }
 
-       if (np->nd_faddr == 0)
+       if (np->nd_faddr == 0) {
                return ENOTCONN;
+       }
 
        ndrv_do_disconnect(np);
        return 0;
@@ -417,13 +455,14 @@ ndrv_shutdown(struct socket *so)
  */
 static int
 ndrv_send(struct socket *so, __unused int flags, struct mbuf *m,
-         __unused struct sockaddr *addr, struct mbuf *control,
-         __unused struct proc *p)
+    __unused struct sockaddr *addr, struct mbuf *control,
+    __unused struct proc *p)
 {
        int error;
 
-       if (control)
+       if (control) {
                return EOPNOTSUPP;
+       }
 
        error = ndrv_output(m, so);
        m = NULL;
@@ -436,8 +475,9 @@ ndrv_abort(struct socket *so)
 {
        struct ndrv_cb *np = sotondrvcb(so);
 
-       if (np == 0)
+       if (np == 0) {
                return EINVAL;
+       }
 
        ndrv_do_disconnect(np);
        return 0;
@@ -449,18 +489,21 @@ ndrv_sockaddr(struct socket *so, struct sockaddr **nam)
        struct ndrv_cb *np = sotondrvcb(so);
        int len;
 
-       if (np == 0)
+       if (np == 0) {
                return EINVAL;
+       }
 
-       if (np->nd_laddr == 0)
+       if (np->nd_laddr == 0) {
                return EINVAL;
+       }
 
        len = np->nd_laddr->snd_len;
        MALLOC(*nam, struct sockaddr *, len, M_SONAME, M_WAITOK);
-       if (*nam == NULL)
+       if (*nam == NULL) {
                return ENOMEM;
+       }
        bcopy((caddr_t)np->nd_laddr, *nam,
-             (unsigned)len);
+           (unsigned)len);
        return 0;
 }
 
@@ -471,18 +514,21 @@ ndrv_peeraddr(struct socket *so, struct sockaddr **nam)
        struct ndrv_cb *np = sotondrvcb(so);
        int len;
 
-       if (np == 0)
+       if (np == 0) {
                return EINVAL;
+       }
 
-       if (np->nd_faddr == 0)
+       if (np->nd_faddr == 0) {
                return ENOTCONN;
+       }
 
        len = np->nd_faddr->snd_len;
        MALLOC(*nam, struct sockaddr *, len, M_SONAME, M_WAITOK);
-       if (*nam == NULL)
+       if (*nam == NULL) {
                return ENOMEM;
+       }
        bcopy((caddr_t)np->nd_faddr, *nam,
-             (unsigned)len);
+           (unsigned)len);
        return 0;
 }
 
@@ -492,58 +538,59 @@ ndrv_peeraddr(struct socket *so, struct sockaddr **nam)
 static int
 ndrv_ctloutput(struct socket *so, struct sockopt *sopt)
 {
-    struct ndrv_cb *np = sotondrvcb(so);
+       struct ndrv_cb *np = sotondrvcb(so);
        int error = 0;
 
-    switch(sopt->sopt_name)
-    {
-        case NDRV_DELDMXSPEC: /* Delete current spec */
-            /* Verify no parameter was passed */
-            if (sopt->sopt_val != 0 || sopt->sopt_valsize != 0) {
-                /*
-                 * We don't support deleting a specific demux, it's
-                 * all or nothing.
-                 */
-                return EINVAL;
-            }
-            error = ndrv_delspec(np);
-            break;
-        case NDRV_SETDMXSPEC: /* Set protocol spec */
-            error = ndrv_setspec(np, sopt);
-            break;
-        case NDRV_ADDMULTICAST:
-            error = ndrv_do_add_multicast(np, sopt);
-            break;
-        case NDRV_DELMULTICAST:
-            error = ndrv_do_remove_multicast(np, sopt);
-            break;
-        default:
-            error = ENOTSUP;
-    }
+       switch (sopt->sopt_name) {
+       case NDRV_DELDMXSPEC: /* Delete current spec */
+               /* Verify no parameter was passed */
+               if (sopt->sopt_val != 0 || sopt->sopt_valsize != 0) {
+                       /*
+                        * We don't support deleting a specific demux, it's
+                        * all or nothing.
+                        */
+                       return EINVAL;
+               }
+               error = ndrv_delspec(np);
+               break;
+       case NDRV_SETDMXSPEC: /* Set protocol spec */
+               error = ndrv_setspec(np, sopt);
+               break;
+       case NDRV_ADDMULTICAST:
+               error = ndrv_do_add_multicast(np, sopt);
+               break;
+       case NDRV_DELMULTICAST:
+               error = ndrv_do_remove_multicast(np, sopt);
+               break;
+       default:
+               error = ENOTSUP;
+       }
 #ifdef NDRV_DEBUG
        log(LOG_WARNING, "NDRV CTLOUT: %x returns %d\n", sopt->sopt_name,
            error);
 #endif
-       return(error);
+       return error;
 }
 
 static int
 ndrv_do_detach(struct ndrv_cb *np)
 {
-    struct ndrv_cb*    cur_np = NULL;
-    struct socket *so = np->nd_socket;
-    int error = 0;
-    struct ifnet * ifp;
+       struct ndrv_cb*     cur_np = NULL;
+       struct socket *so = np->nd_socket;
+       int error = 0;
+       struct ifnet * ifp;
 
 #if NDRV_DEBUG
        printf("NDRV detach: %x, %x\n", so, np);
 #endif
-    ndrv_remove_all_multicast(np);
+       ndrv_remove_all_multicast(np);
+
+       /* Remove from the linked list of control blocks */
+       LCK_MTX_ASSERT(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_OWNED);
+       TAILQ_REMOVE(&ndrvl, np, nd_next);
 
-    ifp = np->nd_if;
-    /* Remove from the linked list of control blocks */
-    TAILQ_REMOVE(&ndrvl, np, nd_next);
-    if (ifp != NULL) {
+       ifp = np->nd_if;
+       if (ifp != NULL) {
                u_int32_t proto_family = np->nd_proto_family;
 
                if (proto_family != PF_NDRV && proto_family != 0) {
@@ -553,9 +600,10 @@ ndrv_do_detach(struct ndrv_cb *np)
                }
 
                /* Check if this is the last socket attached to this interface */
+               LCK_MTX_ASSERT(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_OWNED);
                TAILQ_FOREACH(cur_np, &ndrvl, nd_next) {
                        if (cur_np->nd_family == np->nd_family &&
-                               cur_np->nd_unit == np->nd_unit) {
+                           cur_np->nd_unit == np->nd_unit) {
                                break;
                        }
                }
@@ -567,7 +615,7 @@ ndrv_do_detach(struct ndrv_cb *np)
                        socket_lock(so, 0);
                }
        }
-       if (np->nd_laddr != NULL) {
+       if (np->nd_laddr != NULL) {
                FREE(np->nd_laddr, M_IFADDR);
                np->nd_laddr = NULL;
        }
@@ -585,9 +633,8 @@ ndrv_do_disconnect(struct ndrv_cb *np)
 #if NDRV_DEBUG
        printf("NDRV disconnect: %x\n", np);
 #endif
-       if (np->nd_faddr)
-       {
-        FREE(np->nd_faddr, M_IFADDR);
+       if (np->nd_faddr) {
+               FREE(np->nd_faddr, M_IFADDR);
                np->nd_faddr = 0;
        }
        /*
@@ -595,34 +642,39 @@ ndrv_do_disconnect(struct ndrv_cb *np)
         * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB;
         * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared.
         */
-       if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF))
+       if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF)) {
                ndrv_do_detach(np);
+       }
        soisdisconnected(so);
-       return(0);
+       return 0;
 }
 
 /* Hackery - return a string version of a decimal number */
 static void
 sprint_d(u_int n, char *buf, int buflen)
-{      char dbuf[IFNAMSIZ];
-       char *cp = dbuf+IFNAMSIZ-1;
+{
+       char dbuf[IFNAMSIZ];
+       char *cp = dbuf + IFNAMSIZ - 1;
 
-        *cp = 0;
-        do {   buflen--;
+       *cp = 0;
+       do {
+               buflen--;
                cp--;
-                *cp = "0123456789"[n % 10];
-                n /= 10;
-        } while (n != 0 && buflen > 0);
-       strlcpy(buf, cp, IFNAMSIZ-buflen);
-        return;
+               *cp = "0123456789"[n % 10];
+               n /= 10;
+       } while (n != 0 && buflen > 0);
+       strlcpy(buf, cp, IFNAMSIZ - buflen);
+       return;
 }
 
 /*
  * Try to compare a device name (q) with one of the funky ifnet
  *  device names (ifp).
  */
-static int name_cmp(struct ifnet *ifp, char *q)
-{      char *r;
+static int
+name_cmp(struct ifnet *ifp, char *q)
+{
+       char *r;
        int len;
        char buf[IFNAMSIZ];
 
@@ -630,11 +682,11 @@ static int name_cmp(struct ifnet *ifp, char *q)
        len = strlen(ifnet_name(ifp));
        strlcpy(r, ifnet_name(ifp), IFNAMSIZ);
        r += len;
-       sprint_d(ifnet_unit(ifp), r, IFNAMSIZ-(r-buf));
+       sprint_d(ifnet_unit(ifp), r, IFNAMSIZ - (r - buf));
 #if NDRV_DEBUG
        printf("Comparing %s, %s\n", buf, q);
 #endif
-       return(strncmp(buf, q, IFNAMSIZ));
+       return strncmp(buf, q, IFNAMSIZ);
 }
 
 #if 0
@@ -645,15 +697,16 @@ static int name_cmp(struct ifnet *ifp, char *q)
 void
 ndrv_flushq(struct ifqueue *q)
 {
-    struct mbuf *m;
-       for (;;)
-       {
+       struct mbuf *m;
+       for (;;) {
                IF_DEQUEUE(q, m);
-               if (m == NULL)
+               if (m == NULL) {
                        break;
+               }
                IF_DROP(q);
-               if (m)
+               if (m) {
                        m_freem(m);
+               }
        }
 }
 #endif
@@ -661,45 +714,50 @@ ndrv_flushq(struct ifqueue *q)
 int
 ndrv_setspec(struct ndrv_cb *np, struct sockopt *sopt)
 {
-       struct ifnet_attach_proto_param proto_param;
-       struct ndrv_protocol_desc       ndrvSpec;
-       struct ndrv_demux_desc*         ndrvDemux = NULL;
-       int                                                     error = 0;
-       struct socket *                         so = np->nd_socket;
-       user_addr_t                                     user_addr;
+       struct ifnet_attach_proto_param proto_param;
+       struct ndrv_protocol_desc       ndrvSpec;
+       struct ndrv_demux_desc*         ndrvDemux = NULL;
+       int                                                     error = 0;
+       struct socket *                         so = np->nd_socket;
+       user_addr_t                                     user_addr;
 
        /* Sanity checking */
-       if (np->nd_proto_family != PF_NDRV)
+       if (np->nd_proto_family != PF_NDRV) {
                return EBUSY;
-       if (np->nd_if == NULL)
+       }
+       if (np->nd_if == NULL) {
                return EINVAL;
+       }
 
        /* Copy the ndrvSpec */
        if (proc_is64bit(sopt->sopt_p)) {
-               struct ndrv_protocol_desc64     ndrvSpec64;
+               struct ndrv_protocol_desc64     ndrvSpec64;
 
-               if (sopt->sopt_valsize != sizeof(ndrvSpec64))
+               if (sopt->sopt_valsize != sizeof(ndrvSpec64)) {
                        return EINVAL;
+               }
 
                error = sooptcopyin(sopt, &ndrvSpec64, sizeof(ndrvSpec64), sizeof(ndrvSpec64));
-               if (error != 0)
+               if (error != 0) {
                        return error;
+               }
 
                ndrvSpec.version         = ndrvSpec64.version;
                ndrvSpec.protocol_family = ndrvSpec64.protocol_family;
                ndrvSpec.demux_count     = ndrvSpec64.demux_count;
 
                user_addr = ndrvSpec64.demux_list;
-       }
-       else {
-               struct ndrv_protocol_desc32     ndrvSpec32;
+       } else {
+               struct ndrv_protocol_desc32     ndrvSpec32;
 
-               if (sopt->sopt_valsize != sizeof(ndrvSpec32))
+               if (sopt->sopt_valsize != sizeof(ndrvSpec32)) {
                        return EINVAL;
+               }
 
                error = sooptcopyin(sopt, &ndrvSpec32, sizeof(ndrvSpec32), sizeof(ndrvSpec32));
-               if (error != 0)
+               if (error != 0) {
                        return error;
+               }
 
                ndrvSpec.version         = ndrvSpec32.version;
                ndrvSpec.protocol_family = ndrvSpec32.protocol_family;
@@ -709,72 +767,74 @@ ndrv_setspec(struct ndrv_cb *np, struct sockopt *sopt)
        }
 
        /* Verify the parameter */
-       if (ndrvSpec.version > NDRV_PROTOCOL_DESC_VERS)
+       if (ndrvSpec.version > NDRV_PROTOCOL_DESC_VERS) {
                return ENOTSUP; // version is too new!
-       else if (ndrvSpec.version < 1)
+       } else if (ndrvSpec.version < 1) {
                return EINVAL; // version is not valid
-       else if (ndrvSpec.demux_count > NDRV_PROTODEMUX_COUNT || ndrvSpec.demux_count == 0)
+       } else if (ndrvSpec.demux_count > NDRV_PROTODEMUX_COUNT || ndrvSpec.demux_count == 0) {
                return EINVAL; // demux_count is not valid
-
+       }
        bzero(&proto_param, sizeof(proto_param));
        proto_param.demux_count = ndrvSpec.demux_count;
 
        /* Allocate storage for demux array */
        MALLOC(ndrvDemux, struct ndrv_demux_desc*, proto_param.demux_count *
-                  sizeof(struct ndrv_demux_desc), M_TEMP, M_WAITOK);
-       if (ndrvDemux == NULL)
+           sizeof(struct ndrv_demux_desc), M_TEMP, M_WAITOK);
+       if (ndrvDemux == NULL) {
                return ENOMEM;
+       }
 
        /* Allocate enough ifnet_demux_descs */
        MALLOC(proto_param.demux_array, struct ifnet_demux_desc*,
-                  sizeof(*proto_param.demux_array) * ndrvSpec.demux_count,
-                  M_TEMP, M_WAITOK);
-       if (proto_param.demux_array == NULL)
+           sizeof(*proto_param.demux_array) * ndrvSpec.demux_count,
+           M_TEMP, M_WAITOK);
+       if (proto_param.demux_array == NULL) {
                error = ENOMEM;
+       }
 
-       if (error == 0)
-       {
+       if (error == 0) {
                /* Copy the ndrv demux array from userland */
                error = copyin(user_addr, ndrvDemux,
-                                          ndrvSpec.demux_count * sizeof(struct ndrv_demux_desc));
+                   ndrvSpec.demux_count * sizeof(struct ndrv_demux_desc));
                ndrvSpec.demux_list = ndrvDemux;
        }
 
-       if (error == 0)
-       {
+       if (error == 0) {
                /* At this point, we've at least got enough bytes to start looking around */
-               u_int32_t       demuxOn = 0;
+               u_int32_t       demuxOn = 0;
 
                proto_param.demux_count = ndrvSpec.demux_count;
                proto_param.input = ndrv_input;
                proto_param.event = ndrv_event;
 
-               for (demuxOn = 0; demuxOn < ndrvSpec.demux_count; demuxOn++)
-               {
+               for (demuxOn = 0; demuxOn < ndrvSpec.demux_count; demuxOn++) {
                        /* Convert an ndrv_demux_desc to a ifnet_demux_desc */
                        error = ndrv_to_ifnet_demux(&ndrvSpec.demux_list[demuxOn],
-                                                                               &proto_param.demux_array[demuxOn]);
-                       if (error)
+                           &proto_param.demux_array[demuxOn]);
+                       if (error) {
                                break;
+                       }
                }
        }
 
-       if (error == 0)
-       {
+       if (error == 0) {
                /* We've got all our ducks lined up...lets attach! */
                socket_unlock(so, 0);
                error = ifnet_attach_protocol(np->nd_if, ndrvSpec.protocol_family,
-                                                                         &proto_param);
+                   &proto_param);
                socket_lock(so, 0);
-               if (error == 0)
+               if (error == 0) {
                        np->nd_proto_family = ndrvSpec.protocol_family;
+               }
        }
 
        /* Free any memory we've allocated */
-       if (proto_param.demux_array)
+       if (proto_param.demux_array) {
                FREE(proto_param.demux_array, M_TEMP);
-       if (ndrvDemux)
+       }
+       if (ndrvDemux) {
                FREE(ndrvDemux, M_TEMP);
+       }
 
        return error;
 }
@@ -783,38 +843,37 @@ ndrv_setspec(struct ndrv_cb *np, struct sockopt *sopt)
 int
 ndrv_to_ifnet_demux(struct ndrv_demux_desc* ndrv, struct ifnet_demux_desc* ifdemux)
 {
-    bzero(ifdemux, sizeof(*ifdemux));
+       bzero(ifdemux, sizeof(*ifdemux));
 
-    if (ndrv->type < DLIL_DESC_ETYPE2)
-    {
-        /* using old "type", not supported */
-        return ENOTSUP;
-    }
+       if (ndrv->type < DLIL_DESC_ETYPE2) {
+               /* using old "type", not supported */
+               return ENOTSUP;
+       }
 
-    if (ndrv->length > 28)
-    {
-        return EINVAL;
-    }
+       if (ndrv->length > 28) {
+               return EINVAL;
+       }
 
-    ifdemux->type = ndrv->type;
-    ifdemux->data = ndrv->data.other;
-    ifdemux->datalen = ndrv->length;
+       ifdemux->type = ndrv->type;
+       ifdemux->data = ndrv->data.other;
+       ifdemux->datalen = ndrv->length;
 
-    return 0;
+       return 0;
 }
 
 int
 ndrv_delspec(struct ndrv_cb *np)
 {
-    int result = 0;
+       int result = 0;
 
-    if (np->nd_proto_family == PF_NDRV ||
-       np->nd_proto_family == 0)
-        return EINVAL;
+       if (np->nd_proto_family == PF_NDRV ||
+           np->nd_proto_family == 0) {
+               return EINVAL;
+       }
 
-    /* Detach the protocol */
-    result = ifnet_detach_protocol(np->nd_if, np->nd_proto_family);
-    np->nd_proto_family = PF_NDRV;
+       /* Detach the protocol */
+       result = ifnet_detach_protocol(np->nd_if, np->nd_proto_family);
+       np->nd_proto_family = PF_NDRV;
 
        return result;
 }
@@ -822,16 +881,20 @@ ndrv_delspec(struct ndrv_cb *np)
 struct ndrv_cb *
 ndrv_find_inbound(struct ifnet *ifp, u_int32_t protocol)
 {
-    struct ndrv_cb* np;
+       struct ndrv_cb* np;
 
-       if (protocol == PF_NDRV) return NULL;
+       LCK_MTX_ASSERT(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_OWNED);
 
-    TAILQ_FOREACH(np, &ndrvl, nd_next) {
-        if (np->nd_proto_family == protocol &&
-               np->nd_if == ifp) {
-            return np;
-        }
-    }
+       if (protocol == PF_NDRV) {
+               return NULL;
+       }
+
+       TAILQ_FOREACH(np, &ndrvl, nd_next) {
+               if (np->nd_proto_family == protocol &&
+                   np->nd_if == ifp) {
+                       return np;
+               }
+       }
 
        return NULL;
 }
@@ -839,239 +902,233 @@ ndrv_find_inbound(struct ifnet *ifp, u_int32_t protocol)
 static void
 ndrv_handle_ifp_detach(u_int32_t family, short unit)
 {
-    struct ndrv_cb* np;
-    struct ifnet       *ifp = NULL;
-    struct socket *so;
-
-    /* Find all sockets using this interface. */
-    TAILQ_FOREACH(np, &ndrvl, nd_next) {
-        if (np->nd_family == family &&
-            np->nd_unit == unit)
-        {
-            /* This cb is using the detaching interface, but not for long. */
-            /* Let the protocol go */
-            ifp = np->nd_if;
-            if (np->nd_proto_family != 0)
-                ndrv_delspec(np);
-
-            /* Delete the multicasts first */
-            ndrv_remove_all_multicast(np);
-
-            /* Disavow all knowledge of the ifp */
-            np->nd_if = NULL;
-            np->nd_unit = 0;
-            np->nd_family = 0;
-
-                 so = np->nd_socket;
-            /* Make sure sending returns an error */
-               LCK_MTX_ASSERT(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_OWNED);
-            socantsendmore(so);
-            socantrcvmore(so);
-        }
-    }
-
-    /* Unregister our protocol */
-    if (ifp) {
-        ifnet_detach_protocol(ifp, PF_NDRV);
-    }
+       struct ndrv_cb* np;
+       struct ifnet        *ifp = NULL;
+       struct socket *so;
+
+       /* Find all sockets using this interface. */
+       TAILQ_FOREACH(np, &ndrvl, nd_next) {
+               if (np->nd_family == family &&
+                   np->nd_unit == unit) {
+                       /* This cb is using the detaching interface, but not for long. */
+                       /* Let the protocol go */
+                       ifp = np->nd_if;
+                       if (np->nd_proto_family != 0) {
+                               ndrv_delspec(np);
+                       }
+
+                       /* Delete the multicasts first */
+                       ndrv_remove_all_multicast(np);
+
+                       /* Disavow all knowledge of the ifp */
+                       np->nd_if = NULL;
+                       np->nd_unit = 0;
+                       np->nd_family = 0;
+
+                       so = np->nd_socket;
+                       /* Make sure sending returns an error */
+                       LCK_MTX_ASSERT(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_OWNED);
+                       socantsendmore(so);
+                       socantrcvmore(so);
+               }
+       }
+
+       /* Unregister our protocol */
+       if (ifp) {
+               ifnet_detach_protocol(ifp, PF_NDRV);
+       }
 }
 
 static int
 ndrv_do_add_multicast(struct ndrv_cb *np, struct sockopt *sopt)
 {
-    struct ndrv_multiaddr*     ndrv_multi;
-    int                                                result;
-
-    if (sopt->sopt_val == 0 || sopt->sopt_valsize < 2 ||
-        sopt->sopt_level != SOL_NDRVPROTO || sopt->sopt_valsize > SOCK_MAXADDRLEN)
-        return EINVAL;
-    if (np->nd_if == NULL)
-        return ENXIO;
-       if (!(np->nd_dlist_cnt < ndrv_multi_max_count))
+       struct ndrv_multiaddr*      ndrv_multi;
+       int                                         result;
+
+       if (sopt->sopt_val == 0 || sopt->sopt_valsize < 2 ||
+           sopt->sopt_level != SOL_NDRVPROTO || sopt->sopt_valsize > SOCK_MAXADDRLEN) {
+               return EINVAL;
+       }
+       if (np->nd_if == NULL) {
+               return ENXIO;
+       }
+       if (!(np->nd_dlist_cnt < ndrv_multi_max_count)) {
                return EPERM;
+       }
+
+       // Allocate storage
+       MALLOC(ndrv_multi, struct ndrv_multiaddr*, sizeof(struct ndrv_multiaddr) -
+           sizeof(struct sockaddr) + sopt->sopt_valsize, M_IFADDR, M_WAITOK);
+       if (ndrv_multi == NULL) {
+               return ENOMEM;
+       }
+
+       // Copy in the address
+       result = copyin(sopt->sopt_val, &ndrv_multi->addr, sopt->sopt_valsize);
+
+       // Validate the sockaddr
+       if (result == 0 && sopt->sopt_valsize != ndrv_multi->addr.sa_len) {
+               result = EINVAL;
+       }
+
+       if (result == 0 && ndrv_have_multicast(np, &ndrv_multi->addr)) {
+               result = EEXIST;
+       }
 
-    // Allocate storage
-    MALLOC(ndrv_multi, struct ndrv_multiaddr*, sizeof(struct ndrv_multiaddr) -
-        sizeof(struct sockaddr) + sopt->sopt_valsize, M_IFADDR, M_WAITOK);
-    if (ndrv_multi == NULL)
-        return ENOMEM;
-
-    // Copy in the address
-    result = copyin(sopt->sopt_val, &ndrv_multi->addr, sopt->sopt_valsize);
-
-    // Validate the sockaddr
-    if (result == 0 && sopt->sopt_valsize != ndrv_multi->addr.sa_len)
-        result = EINVAL;
-
-    if (result == 0 && ndrv_have_multicast(np, &ndrv_multi->addr))
-        result = EEXIST;
-
-    if (result == 0)
-    {
-        // Try adding the multicast
-        result = ifnet_add_multicast(np->nd_if, &ndrv_multi->addr,
-                                                                &ndrv_multi->ifma);
-    }
-
-    if (result == 0)
-    {
-        // Add to our linked list
-        ndrv_multi->next = np->nd_multiaddrs;
-        np->nd_multiaddrs = ndrv_multi;
+       if (result == 0) {
+               // Try adding the multicast
+               result = ifnet_add_multicast(np->nd_if, &ndrv_multi->addr,
+                   &ndrv_multi->ifma);
+       }
+
+       if (result == 0) {
+               // Add to our linked list
+               ndrv_multi->next = np->nd_multiaddrs;
+               np->nd_multiaddrs = ndrv_multi;
                np->nd_dlist_cnt++;
-    }
-    else
-    {
-        // Free up the memory, something went wrong
-        FREE(ndrv_multi, M_IFADDR);
-    }
-
-    return result;
+       } else {
+               // Free up the memory, something went wrong
+               FREE(ndrv_multi, M_IFADDR);
+       }
+
+       return result;
 }
 
 static int
 ndrv_do_remove_multicast(struct ndrv_cb *np, struct sockopt *sopt)
 {
-    struct sockaddr*           multi_addr;
-    struct ndrv_multiaddr*     ndrv_entry = NULL;
-    int                                        result;
-
-    if (sopt->sopt_val == 0 || sopt->sopt_valsize < 2 ||
-        sopt->sopt_level != SOL_NDRVPROTO)
-        return EINVAL;
-    if (np->nd_if == NULL || np->nd_dlist_cnt == 0)
-        return ENXIO;
-
-    // Allocate storage
-    MALLOC(multi_addr, struct sockaddr*, sopt->sopt_valsize,
-            M_TEMP, M_WAITOK);
-    if (multi_addr == NULL)
-        return ENOMEM;
-
-    // Copy in the address
-    result = copyin(sopt->sopt_val, multi_addr, sopt->sopt_valsize);
-
-    // Validate the sockaddr
-    if (result == 0 && sopt->sopt_valsize != multi_addr->sa_len)
-        result = EINVAL;
-
-    if (result == 0)
-    {
-        /* Find the old entry */
-        ndrv_entry = ndrv_have_multicast(np, multi_addr);
-
-        if (ndrv_entry == NULL)
-            result = ENOENT;
-    }
-
-    if (result == 0)
-    {
-        // Try deleting the multicast
-        result = ifnet_remove_multicast(ndrv_entry->ifma);
-    }
-
-    if (result == 0)
-    {
-        // Remove from our linked list
-        struct ndrv_multiaddr* cur = np->nd_multiaddrs;
-
-        ifmaddr_release(ndrv_entry->ifma);
-
-        if (cur == ndrv_entry)
-        {
-            np->nd_multiaddrs = cur->next;
-        }
-        else
-        {
-            for (cur = cur->next; cur != NULL; cur = cur->next)
-            {
-                if (cur->next == ndrv_entry)
-                {
-                    cur->next = cur->next->next;
-                    break;
-                }
-            }
-        }
+       struct sockaddr*            multi_addr;
+       struct ndrv_multiaddr*      ndrv_entry = NULL;
+       int                                 result;
+
+       if (sopt->sopt_val == 0 || sopt->sopt_valsize < 2 ||
+           sopt->sopt_level != SOL_NDRVPROTO) {
+               return EINVAL;
+       }
+       if (np->nd_if == NULL || np->nd_dlist_cnt == 0) {
+               return ENXIO;
+       }
+
+       // Allocate storage
+       MALLOC(multi_addr, struct sockaddr*, sopt->sopt_valsize,
+           M_TEMP, M_WAITOK);
+       if (multi_addr == NULL) {
+               return ENOMEM;
+       }
+
+       // Copy in the address
+       result = copyin(sopt->sopt_val, multi_addr, sopt->sopt_valsize);
+
+       // Validate the sockaddr
+       if (result == 0 && sopt->sopt_valsize != multi_addr->sa_len) {
+               result = EINVAL;
+       }
+
+       if (result == 0) {
+               /* Find the old entry */
+               ndrv_entry = ndrv_have_multicast(np, multi_addr);
+
+               if (ndrv_entry == NULL) {
+                       result = ENOENT;
+               }
+       }
+
+       if (result == 0) {
+               // Try deleting the multicast
+               result = ifnet_remove_multicast(ndrv_entry->ifma);
+       }
+
+       if (result == 0) {
+               // Remove from our linked list
+               struct ndrv_multiaddr*  cur = np->nd_multiaddrs;
+
+               ifmaddr_release(ndrv_entry->ifma);
+
+               if (cur == ndrv_entry) {
+                       np->nd_multiaddrs = cur->next;
+               } else {
+                       for (cur = cur->next; cur != NULL; cur = cur->next) {
+                               if (cur->next == ndrv_entry) {
+                                       cur->next = cur->next->next;
+                                       break;
+                               }
+                       }
+               }
 
                np->nd_dlist_cnt--;
 
-        // Free the memory
-        FREE(ndrv_entry, M_IFADDR);
-    }
-    FREE(multi_addr, M_TEMP);
+               // Free the memory
+               FREE(ndrv_entry, M_IFADDR);
+       }
+       FREE(multi_addr, M_TEMP);
 
-    return result;
+       return result;
 }
 
 static struct ndrv_multiaddr*
 ndrv_have_multicast(struct ndrv_cb *np, struct sockaddr* inAddr)
 {
-    struct ndrv_multiaddr*     cur;
-    for (cur = np->nd_multiaddrs; cur != NULL; cur = cur->next)
-    {
-
-        if ((inAddr->sa_len == cur->addr.sa_len) &&
-            (bcmp(&cur->addr, inAddr, inAddr->sa_len) == 0))
-        {
-            // Found a match
-            return cur;
-        }
-    }
-
-    return NULL;
+       struct ndrv_multiaddr*      cur;
+       for (cur = np->nd_multiaddrs; cur != NULL; cur = cur->next) {
+               if ((inAddr->sa_len == cur->addr.sa_len) &&
+                   (bcmp(&cur->addr, inAddr, inAddr->sa_len) == 0)) {
+                       // Found a match
+                       return cur;
+               }
+       }
+
+       return NULL;
 }
 
 static void
 ndrv_remove_all_multicast(struct ndrv_cb* np)
 {
-    struct ndrv_multiaddr*     cur;
-
-    if (np->nd_if != NULL)
-    {
-        while (np->nd_multiaddrs != NULL)
-        {
-            cur = np->nd_multiaddrs;
-            np->nd_multiaddrs = cur->next;
-
-            ifnet_remove_multicast(cur->ifma);
-            ifmaddr_release(cur->ifma);
-            FREE(cur, M_IFADDR);
-        }
-    }
+       struct ndrv_multiaddr*      cur;
+
+       if (np->nd_if != NULL) {
+               while (np->nd_multiaddrs != NULL) {
+                       cur = np->nd_multiaddrs;
+                       np->nd_multiaddrs = cur->next;
+
+                       ifnet_remove_multicast(cur->ifma);
+                       ifmaddr_release(cur->ifma);
+                       FREE(cur, M_IFADDR);
+               }
+       }
 }
 
 static struct pr_usrreqs ndrv_usrreqs = {
-       .pru_abort =            ndrv_abort,
-       .pru_attach =           ndrv_attach,
-       .pru_bind =             ndrv_bind,
-       .pru_connect =          ndrv_connect,
-       .pru_detach =           ndrv_detach,
-       .pru_disconnect =       ndrv_disconnect,
-       .pru_peeraddr =         ndrv_peeraddr,
-       .pru_send =             ndrv_send,
-       .pru_shutdown =         ndrv_shutdown,
-       .pru_sockaddr =         ndrv_sockaddr,
-       .pru_sosend =           sosend,
-       .pru_soreceive =        soreceive,
+       .pru_abort =            ndrv_abort,
+       .pru_attach =           ndrv_attach,
+       .pru_bind =             ndrv_bind,
+       .pru_connect =          ndrv_connect,
+       .pru_detach =           ndrv_detach,
+       .pru_disconnect =       ndrv_disconnect,
+       .pru_peeraddr =         ndrv_peeraddr,
+       .pru_send =             ndrv_send,
+       .pru_shutdown =         ndrv_shutdown,
+       .pru_sockaddr =         ndrv_sockaddr,
+       .pru_sosend =           sosend,
+       .pru_soreceive =        soreceive,
 };
 
 static struct protosw ndrvsw[] = {
-{
-       .pr_type =              SOCK_RAW,
-       .pr_protocol =          NDRVPROTO_NDRV,
-       .pr_flags =             PR_ATOMIC|PR_ADDR,
-       .pr_output =            ndrv_output,
-       .pr_ctloutput =         ndrv_ctloutput,
-       .pr_usrreqs =           &ndrv_usrreqs,
-}
+       {
+               .pr_type =              SOCK_RAW,
+               .pr_protocol =          NDRVPROTO_NDRV,
+               .pr_flags =             PR_ATOMIC | PR_ADDR,
+               .pr_output =            ndrv_output,
+               .pr_ctloutput =         ndrv_ctloutput,
+               .pr_usrreqs =           &ndrv_usrreqs,
+       }
 };
 
-static int ndrv_proto_count = (sizeof (ndrvsw) / sizeof (struct protosw));
+static int ndrv_proto_count = (sizeof(ndrvsw) / sizeof(struct protosw));
 
 struct domain ndrvdomain_s = {
-       .dom_family =           PF_NDRV,
-       .dom_name =             "NetDriver",
-       .dom_init =             ndrv_dominit,
+       .dom_family =           PF_NDRV,
+       .dom_name =             "NetDriver",
+       .dom_init =             ndrv_dominit,
 };
 
 static void
@@ -1085,6 +1142,7 @@ ndrv_dominit(struct domain *dp)
 
        ndrvdomain = dp;
 
-       for (i = 0, pr = &ndrvsw[0]; i < ndrv_proto_count; i++, pr++)
+       for (i = 0, pr = &ndrvsw[0]; i < ndrv_proto_count; i++, pr++) {
                net_add_proto(pr, dp, 1);
+       }
 }
index 18c84be938f09e90659efe7c34f8882277c8587b..3e251d7b13110eb55e2654c2b906a9ca148907f8 100644 (file)
@@ -406,7 +406,7 @@ static bool necp_is_addr_in_subnet(struct sockaddr *addr, struct sockaddr *subne
 static int necp_addr_compare(struct sockaddr *sa1, struct sockaddr *sa2, int check_port);
 static bool necp_buffer_compare_with_bit_prefix(u_int8_t *p1, u_int8_t *p2, u_int32_t bits);
 static bool necp_addr_is_empty(struct sockaddr *addr);
-static bool necp_is_loopback(struct sockaddr *local_addr, struct sockaddr *remote_addr, struct inpcb *inp, struct mbuf *packet);
+static bool necp_is_loopback(struct sockaddr *local_addr, struct sockaddr *remote_addr, struct inpcb *inp, struct mbuf *packet, u_int32_t bound_interface_index);
 static bool necp_is_intcoproc(struct inpcb *inp, struct mbuf *packet);
 
 struct necp_uuid_id_mapping {
@@ -7063,6 +7063,15 @@ necp_application_find_policy_match_internal(proc_t proc,
                offset += sizeof(u_int8_t) + sizeof(u_int32_t) + length;
        }
 
+       // Check for loopback exception
+       if (necp_pass_loopback > 0 && necp_is_loopback(&local_addr.sa, &remote_addr.sa, NULL, NULL, bound_interface_index)) {
+               returned_result->policy_id = NECP_KERNEL_POLICY_ID_NO_MATCH;
+               returned_result->routing_result = NECP_KERNEL_POLICY_RESULT_PASS;
+               returned_result->routed_interface_index = lo_ifp->if_index;
+               *flags |= (NECP_CLIENT_RESULT_FLAG_IS_LOCAL | NECP_CLIENT_RESULT_FLAG_IS_DIRECT);
+               return 0;
+       }
+
        // Lock
        lck_rw_lock_shared(&necp_kernel_policy_lock);
 
@@ -8253,7 +8262,7 @@ necp_socket_is_connected(struct inpcb *inp)
 static inline bool
 necp_socket_bypass(struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, struct inpcb *inp)
 {
-       if (necp_pass_loopback > 0 && necp_is_loopback(override_local_addr, override_remote_addr, inp, NULL)) {
+       if (necp_pass_loopback > 0 && necp_is_loopback(override_local_addr, override_remote_addr, inp, NULL, IFSCOPE_NONE)) {
                return true;
        } else if (necp_is_intcoproc(inp, NULL)) {
                return true;
@@ -8728,7 +8737,7 @@ necp_ip_output_find_policy_match_locked(necp_kernel_policy_id socket_policy_id,
 static inline bool
 necp_output_bypass(struct mbuf *packet)
 {
-       if (necp_pass_loopback > 0 && necp_is_loopback(NULL, NULL, NULL, packet)) {
+       if (necp_pass_loopback > 0 && necp_is_loopback(NULL, NULL, NULL, packet, IFSCOPE_NONE)) {
                return true;
        }
        if (necp_pass_keepalives > 0 && necp_get_is_keepalive_from_packet(packet)) {
@@ -10297,7 +10306,7 @@ necp_addr_is_loopback(struct sockaddr *address)
 }
 
 static bool
-necp_is_loopback(struct sockaddr *local_addr, struct sockaddr *remote_addr, struct inpcb *inp, struct mbuf *packet)
+necp_is_loopback(struct sockaddr *local_addr, struct sockaddr *remote_addr, struct inpcb *inp, struct mbuf *packet, u_int32_t bound_interface_index)
 {
        // Note: This function only checks for the loopback addresses.
        // In the future, we may want to expand to also allow any traffic
@@ -10327,6 +10336,8 @@ necp_is_loopback(struct sockaddr *local_addr, struct sockaddr *remote_addr, stru
                                return TRUE;
                        }
                }
+       } else if (bound_interface_index != IFSCOPE_NONE && lo_ifp->if_index == bound_interface_index) {
+               return TRUE;
        }
 
        if (packet != NULL) {
index ec1fd72f0e8645b9b24d5ca1792c69308261a616..0e3dd1f47a405cf155d0a246b9cfcf4dcb24197a 100644 (file)
@@ -1484,6 +1484,23 @@ necp_client_flow_is_viable(proc_t proc, struct necp_client *client,
            &flow->local_addr, &flow->remote_addr, NULL, NULL,
            NULL, ignore_address, true);
 
+       // Check for blocking agents
+       for (int i = 0; i < NECP_MAX_NETAGENTS; i++) {
+               if (uuid_is_null(result.netagents[i])) {
+                       // Passed end of valid agents
+                       break;
+               }
+
+               u_int32_t flags = netagent_get_flags(result.netagents[i]);
+               if ((flags & NETAGENT_FLAG_REGISTERED) &&
+                   !(flags & NETAGENT_FLAG_VOLUNTARY) &&
+                   !(flags & NETAGENT_FLAG_ACTIVE) &&
+                   !(flags & NETAGENT_FLAG_SPECIFIC_USE_ONLY)) {
+                       // A required agent is not active, cause the flow to be marked non-viable
+                       return false;
+               }
+       }
+
        return error == 0 &&
               result.routed_interface_index != IFSCOPE_NONE &&
               result.routing_result != NECP_KERNEL_POLICY_RESULT_DROP;
index 085db9c3f074b9a5573595ec43bb19c72c5e675d..a32a2ab04207e5565eb1906ca86bb3e387e10776 100644 (file)
 /* KEV_MPTCP_SUBCLASS event codes */
 #define KEV_MPTCP_CELLUSE       1
 
+#define KEV_IPSEC_SUBCLASS      13      /* IPsec event subclass */
+#define KEV_IPSEC_WAKE_PACKET   1       /* IPsec wake packet available, the
+                                        * first packet processed after a wake event */
+
 #endif /* PRIVATE */
 #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
 #endif /* _NET_NETKEV_H_ */
index 4afbfed57817ba6d48d1633f7c6288ada1a439d0..4c03c1c353a395f4fa6adfa4528f35f5e86a2d0f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2016 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2019 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -555,7 +555,7 @@ pfi_address_add(struct sockaddr *sa, int af, int net)
                            "(%d/%d)\n", pfi_buffer_cnt, PFI_BUFFER_MAX);
                        return;
                }
-               memcpy(pfi_buffer, p, pfi_buffer_cnt * sizeof(*pfi_buffer));
+               memcpy(p, pfi_buffer, pfi_buffer_max * sizeof(*pfi_buffer));
                /* no need to zero buffer */
                _FREE(pfi_buffer, PFI_MTYPE);
                pfi_buffer = p;
index 66674c208aead5f92d4bdf7a0bb05c4d962a2e01..d5b76845e630cbef19c6b13630dcbe88498704b0 100644 (file)
@@ -918,6 +918,9 @@ extern struct in_ifaddr * inifa_ifpclatv4(struct ifnet *);
 #define satosin(sa)     SIN(sa)
 #define sintosa(sin)    ((struct sockaddr *)(void *)(sin))
 #define SINIFSCOPE(s)   ((struct sockaddr_inifscope *)(void *)(s))
+
+#define IPTOS_UNSPEC                    (-1)    /* TOS byte not set */
+#define IPTOS_MASK                      0xFF    /* TOS byte mask */
 #endif /* BSD_KERNEL_PRIVATE */
 
 #ifdef KERNEL_PRIVATE
index a5ec42ab233526acff140e2b515a6e6bc1c1bd56..2ba76a78628836ec6b80cf15995533282eb389d7 100644 (file)
@@ -796,11 +796,6 @@ extern void inpcb_to_xinpcb64(struct inpcb *, struct xinpcb64 *);
 #endif
 
 extern int get_pcblist_n(short, struct sysctl_req *, struct inpcbinfo *);
-#define INPCB_GET_PORTS_USED_WILDCARDOK 0x01
-#define INPCB_GET_PORTS_USED_NOWAKEUPOK 0x02
-#define INPCB_GET_PORTS_USED_RECVANYIFONLY 0x04
-#define INPCB_GET_PORTS_USED_EXTBGIDLEONLY 0x08
-#define INPCB_GET_PORTS_USED_ACTIVEONLY 0x10
 
 extern void inpcb_get_ports_used(u_int32_t, int, u_int32_t, bitstr_t *,
     struct inpcbinfo *);
index dcd59d9c3300467d0830dd7bf0f13d1d5d1ba95e..69302e7316791725c3a7f976315bd59255725d8d 100644 (file)
@@ -438,29 +438,37 @@ inpcb_get_ports_used(uint32_t ifindex, int protocol, uint32_t flags,
        bool iswildcard, wildcardok, nowakeok;
        bool recvanyifonly, extbgidleok;
        bool activeonly;
+       bool anytcpstateok;
 
-       wildcardok = ((flags & INPCB_GET_PORTS_USED_WILDCARDOK) != 0);
-       nowakeok = ((flags & INPCB_GET_PORTS_USED_NOWAKEUPOK) != 0);
-       recvanyifonly = ((flags & INPCB_GET_PORTS_USED_RECVANYIFONLY) != 0);
-       extbgidleok = ((flags & INPCB_GET_PORTS_USED_EXTBGIDLEONLY) != 0);
-       activeonly = ((flags & INPCB_GET_PORTS_USED_ACTIVEONLY) != 0);
+       wildcardok = ((flags & IFNET_GET_LOCAL_PORTS_WILDCARDOK) != 0);
+       nowakeok = ((flags & IFNET_GET_LOCAL_PORTS_NOWAKEUPOK) != 0);
+       recvanyifonly = ((flags & IFNET_GET_LOCAL_PORTS_RECVANYIFONLY) != 0);
+       extbgidleok = ((flags & IFNET_GET_LOCAL_PORTS_EXTBGIDLEONLY) != 0);
+       activeonly = ((flags & IFNET_GET_LOCAL_PORTS_ACTIVEONLY) != 0);
+       anytcpstateok = ((flags & IFNET_GET_LOCAL_PORTS_ANYTCPSTATEOK) != 0);
 
        lck_rw_lock_shared(pcbinfo->ipi_lock);
        gencnt = pcbinfo->ipi_gencnt;
 
        for (inp = LIST_FIRST(pcbinfo->ipi_listhead); inp;
            inp = LIST_NEXT(inp, inp_list)) {
-               uint16_t port;
-
                if (inp->inp_gencnt > gencnt ||
                    inp->inp_state == INPCB_STATE_DEAD ||
                    inp->inp_wantcnt == WNT_STOPUSING) {
                        continue;
                }
 
-               if ((so = inp->inp_socket) == NULL ||
-                   (so->so_state & SS_DEFUNCT) ||
-                   (so->so_state & SS_ISDISCONNECTED)) {
+               if ((so = inp->inp_socket) == NULL || inp->inp_lport == 0) {
+                       continue;
+               }
+
+               /*
+                * ANYTCPSTATEOK means incoming packets cannot be filtered
+                * reception so cast a wide net of possibilities
+                */
+               if (!anytcpstateok &&
+                   ((so->so_state & SS_DEFUNCT) ||
+                   (so->so_state & SS_ISDISCONNECTED))) {
                        continue;
                }
 
@@ -551,6 +559,15 @@ inpcb_get_ports_used(uint32_t ifindex, int protocol, uint32_t flags,
 
                        switch (tp->t_state) {
                        case TCPS_CLOSED:
+                               if (anytcpstateok && inp->inp_fport != 0) {
+                                       /*
+                                        * A foreign port means we had a 4 tuple at
+                                        * least a connection attempt so packets
+                                        * may be received for the 4 tuple after the
+                                        * connection is gone
+                                        */
+                                       break;
+                               }
                                continue;
                        /* NOT REACHED */
                        case TCPS_LISTEN:
@@ -570,26 +587,28 @@ inpcb_get_ports_used(uint32_t ifindex, int protocol, uint32_t flags,
                        case TCPS_FIN_WAIT_2:
                                /*
                                 * In the closing states, the connection
-                                * is not idle when there is outgoing
+                                * is active when there is outgoing
                                 * data having to be acknowledged
                                 */
-                               if (activeonly && so->so_snd.sb_cc == 0) {
+                               if (!anytcpstateok &&
+                                   (activeonly && so->so_snd.sb_cc == 0)) {
                                        continue;
                                }
                                break;
                        case TCPS_TIME_WAIT:
+                               if (anytcpstateok) {
+                                       /*
+                                        * Packets may still be received for the 4 tuple
+                                        * after the connection is gone
+                                        */
+                                       break;
+                               }
                                continue;
                                /* NOT REACHED */
                        }
                }
-               /*
-                * Final safeguard to exclude unspecified local port
-                */
-               port = ntohs(inp->inp_lport);
-               if (port == 0) {
-                       continue;
-               }
-               bitstr_set(bitfield, port);
+
+               bitstr_set(bitfield, ntohs(inp->inp_lport));
 
                if_ports_used_add_inpcb(ifindex, inp);
        }
index 6c939c5aa5ccc6fdc539ddfd8a7eac17d231ef4b..2971f9191cffea053cf5650839e406c09f0fa78a 100644 (file)
@@ -1068,6 +1068,32 @@ so_tc_from_control(struct mbuf *control, int *out_netsvctype)
        return sotc;
 }
 
+__private_extern__ int
+so_tos_from_control(struct mbuf *control)
+{
+       struct cmsghdr *cm;
+       int tos = IPTOS_UNSPEC;
+
+       for (cm = M_FIRST_CMSGHDR(control);
+           is_cmsg_valid(control, cm);
+           cm = M_NXT_CMSGHDR(control, cm)) {
+               if (cm->cmsg_len != CMSG_LEN(sizeof(int))) {
+                       continue;
+               }
+
+               if ((cm->cmsg_level == IPPROTO_IP &&
+                   cm->cmsg_type == IP_TOS) ||
+                   (cm->cmsg_level == IPPROTO_IPV6 &&
+                   cm->cmsg_type == IPV6_TCLASS)) {
+                       tos = *(int *)(void *)CMSG_DATA(cm) & IPTOS_MASK;
+                       /* The first valid option wins */
+                       break;
+               }
+       }
+
+       return tos;
+}
+
 __private_extern__ void
 so_recv_data_stat(struct socket *so, struct mbuf *m, size_t off)
 {
index 16782affc91168fdeaef56144bda893ee03a0242..38a45abfb1b0c2cee67bf36201c51425ad60f657 100644 (file)
@@ -3074,7 +3074,7 @@ ip_pcbopts(int optname, struct mbuf **pcbopt, struct mbuf *m)
                        ovbcopy((caddr_t)(&cp[IPOPT_OFFSET + 1] +
                            sizeof(struct in_addr)),
                            (caddr_t)&cp[IPOPT_OFFSET + 1],
-                           (unsigned)cnt + sizeof(struct in_addr));
+                           (unsigned)cnt - (IPOPT_MINOFF - 1));
                        break;
                }
        }
index a2883309e5a9c756d20a56b522cd6e58b1536059..25a51db92a2ce203d139a8800fe813977a1b244e 100644 (file)
@@ -1465,6 +1465,10 @@ mptcp_session_necp_cb(void *handle, int action, uint32_t interface_index,
                        goto out;
                }
 
+               if (IFNET_IS_COMPANION_LINK(ifp)) {
+                       goto out;
+               }
+
                if (IFNET_IS_EXPENSIVE(ifp) &&
                    (mp_so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE)) {
                        goto out;
index f7980b76c26a2d4577ec1fb396b8ae724ba9d698..da19de2095fbf3579c1c813bfa33dde35af08b94 100644 (file)
@@ -126,7 +126,6 @@ static void mptcp_subflow_abort(struct mptsub *, int);
 
 static void mptcp_send_dfin(struct socket *so);
 static void mptcp_set_cellicon(struct mptses *mpte, struct mptsub *mpts);
-static void mptcp_unset_cellicon(struct mptses *mpte, struct mptsub *mpts, long val);
 static int mptcp_freeq(struct mptcb *mp_tp);
 
 /*
@@ -215,7 +214,6 @@ static uint32_t mptcp_kern_skt_unit;
 static symptoms_advisory_t mptcp_advisory;
 
 uint32_t mptcp_cellicon_refcount = 0;
-#define MPTCP_CELLICON_TOGGLE_RATE      (5 * TCP_RETRANSHZ) /* Only toggle every 5 seconds */
 
 /*
  * XXX The order of the event handlers below is really
@@ -852,9 +850,6 @@ mptcp_trigger_cell_bringup(struct mptses *mpte)
 static boolean_t
 mptcp_subflow_disconnecting(struct mptsub *mpts)
 {
-       /* Split out in if-statements for readability. Compile should
-        * optimize that.
-        */
        if (mpts->mpts_socket->so_state & SS_ISDISCONNECTED) {
                return true;
        }
@@ -2699,11 +2694,15 @@ mptcp_subflow_abort(struct mptsub *mpts, int error)
 void
 mptcp_subflow_disconnect(struct mptses *mpte, struct mptsub *mpts)
 {
-       struct socket *so;
+       struct socket *so, *mp_so;
        struct mptcb *mp_tp;
        int send_dfin = 0;
 
-       socket_lock_assert_owned(mptetoso(mpte));
+       so = mpts->mpts_socket;
+       mp_tp = mpte->mpte_mptcb;
+       mp_so = mptetoso(mpte);
+
+       socket_lock_assert_owned(mp_so);
 
        if (mpts->mpts_flags & (MPTSF_DISCONNECTING | MPTSF_DISCONNECTED)) {
                return;
@@ -2713,8 +2712,6 @@ mptcp_subflow_disconnect(struct mptses *mpte, struct mptsub *mpts)
 
        mpts->mpts_flags |= MPTSF_DISCONNECTING;
 
-       so = mpts->mpts_socket;
-       mp_tp = mpte->mpte_mptcb;
        if (mp_tp->mpt_state > MPTCPS_CLOSE_WAIT) {
                send_dfin = 1;
        }
@@ -2728,10 +2725,29 @@ mptcp_subflow_disconnect(struct mptses *mpte, struct mptsub *mpts)
                if (send_dfin) {
                        mptcp_send_dfin(so);
                }
-               (void) soshutdownlock(so, SHUT_RD);
-               (void) soshutdownlock(so, SHUT_WR);
-               (void) sodisconnectlocked(so);
+
+               if (mp_so->so_flags & SOF_DEFUNCT) {
+                       errno_t ret;
+
+                       ret = sosetdefunct(NULL, so, SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL, TRUE);
+                       if (ret == 0) {
+                               ret = sodefunct(NULL, so, SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL);
+
+                               if (ret != 0) {
+                                       os_log_error(mptcp_log_handle, "%s - %lx: sodefunct failed with %d\n",
+                                           __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), ret);
+                               }
+                       } else {
+                               os_log_error(mptcp_log_handle, "%s - %lx: sosetdefunct failed with %d\n",
+                                   __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), ret);
+                       }
+               } else {
+                       (void) soshutdownlock(so, SHUT_RD);
+                       (void) soshutdownlock(so, SHUT_WR);
+                       (void) sodisconnectlocked(so);
+               }
        }
+
        /*
         * Generate a disconnect event for this subflow socket, in case
         * the lower layer doesn't do it; this is needed because the
@@ -6525,6 +6541,7 @@ mptcp_post_event(u_int32_t event_code, int value)
 static void
 mptcp_set_cellicon(struct mptses *mpte, struct mptsub *mpts)
 {
+       struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
        int error;
 
        /* First-party apps (Siri) don't flip the cellicon */
@@ -6537,9 +6554,17 @@ mptcp_set_cellicon(struct mptses *mpte, struct mptsub *mpts)
                return;
        }
 
+       /* Fallen back connections are not triggering the cellicon */
+       if (mpte->mpte_mptcb->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
+               return;
+       }
+
        /* Remember the last time we set the cellicon. Needed for debouncing */
        mpte->mpte_last_cellicon_set = tcp_now;
 
+       tp->t_timer[TCPT_CELLICON] = OFFSET_FROM_START(tp, MPTCP_CELLICON_TOGGLE_RATE);
+       tcp_sched_timers(tp);
+
        if (mpts->mpts_flags & MPTSF_CELLICON_SET &&
            mpte->mpte_cellicon_increments != 0) {
                if (mptcp_cellicon_refcount == 0) {
@@ -6612,8 +6637,8 @@ __mptcp_unset_cellicon(long val)
        return true;
 }
 
-static void
-mptcp_unset_cellicon(struct mptses *mpte, struct mptsub *mpts, long val)
+void
+mptcp_unset_cellicon(struct mptses *mpte, struct mptsub *mpts, uint32_t val)
 {
        /* First-party apps (Siri) don't flip the cellicon */
        if (mpte->mpte_flags & MPTE_FIRSTPARTY) {
@@ -6640,7 +6665,13 @@ mptcp_unset_cellicon(struct mptses *mpte, struct mptsub *mpts, long val)
                mpts->mpts_flags &= ~MPTSF_CELLICON_SET;
        }
 
-       mpte->mpte_cellicon_increments--;
+       if (mpte->mpte_cellicon_increments < val) {
+               os_log_error(mptcp_log_handle, "%s - %lx: Increments is %u but want to dec by %u.\n",
+                   __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpte->mpte_cellicon_increments, val);
+               val = mpte->mpte_cellicon_increments;
+       }
+
+       mpte->mpte_cellicon_increments -= val;
 
        if (__mptcp_unset_cellicon(val) == false) {
                return;
index a47b8a51226b016d51f508222416350dccf3b04a..0012e449760500252500d5aefc133147d738debd 100644 (file)
@@ -1414,6 +1414,7 @@ mptcp_usr_socheckopt(struct socket *mp_so, struct sockopt *sopt)
        case SO_NOADDRERR:                      /* MP */
        case SO_LABEL:                          /* MP */
        case SO_PEERLABEL:                      /* MP */
+       case SO_DEFUNCTIT:                      /* MP */
        case SO_DEFUNCTOK:                      /* MP */
        case SO_ISDEFUNCT:                      /* MP */
        case SO_TRAFFIC_CLASS_DBG:              /* MP */
@@ -2185,6 +2186,8 @@ mptcp_sopt2str(int level, int optname)
                        return "SO_TRAFFIC_CLASS_DBG";
                case SO_PRIVILEGED_TRAFFIC_CLASS:
                        return "SO_PRIVILEGED_TRAFFIC_CLASS";
+               case SO_DEFUNCTIT:
+                       return "SO_DEFUNCTIT";
                case SO_DEFUNCTOK:
                        return "SO_DEFUNCTOK";
                case SO_ISDEFUNCT:
index 4c9037db7d071d767fe6d5a5f8e9bd74f4339bd8..f13bfb95023568e16ed3d10e5c769fb24546a472 100644 (file)
@@ -554,6 +554,8 @@ extern uint32_t mptcp_dbg_area; /* Multipath TCP debugging area */
 extern int mptcp_developer_mode;        /* Allow aggregation mode */
 extern uint32_t mptcp_cellicon_refcount;
 
+#define MPTCP_CELLICON_TOGGLE_RATE      (5 * TCP_RETRANSHZ) /* Only toggle every 5 seconds */
+
 extern int tcp_jack_rxmt;       /* Join ACK retransmission value in msecs */
 
 __BEGIN_DECLS
@@ -642,6 +644,7 @@ extern struct sockaddr *mptcp_get_session_dst(struct mptses *mpte,
     boolean_t has_v6, boolean_t has_v4);
 extern void mptcp_set_restrictions(struct socket *mp_so);
 extern void mptcp_clear_cellicon(void);
+extern void mptcp_unset_cellicon(struct mptses *mpte, struct mptsub *mpts, uint32_t val);
 extern void mptcp_reset_rexmit_state(struct tcpcb *tp);
 extern void mptcp_reset_keepalive(struct tcpcb *tp);
 extern int mptcp_validate_csum(struct tcpcb *tp, struct mbuf *m, uint64_t dsn,
index 66b0102bbb7ecbcc3f1c0e1bcc386c07b273ed12..dc552d9e30ac30ffdf703082edbb0bafc1648b9c 100644 (file)
@@ -372,6 +372,7 @@ rip_output(
        int flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST;
        struct ip_out_args ipoa;
        struct ip_moptions *imo;
+       int tos = IPTOS_UNSPEC;
        int error = 0;
 
        bzero(&ipoa, sizeof(ipoa));
@@ -383,6 +384,7 @@ rip_output(
 
 
        if (control != NULL) {
+               tos = so_tos_from_control(control);
                sotc = so_tc_from_control(control, &netsvctype);
 
                m_freem(control);
@@ -444,7 +446,11 @@ rip_output(
                        return ENOBUFS;
                }
                ip = mtod(m, struct ip *);
-               ip->ip_tos = inp->inp_ip_tos;
+               if (tos != IPTOS_UNSPEC) {
+                       ip->ip_tos = (uint8_t)(tos & IPTOS_MASK);
+               } else {
+                       ip->ip_tos = inp->inp_ip_tos;
+               }
                ip->ip_off = 0;
                ip->ip_p = inp->inp_ip_p;
                ip->ip_len = m->m_pkthdr.len;
index 784c0e879dee716621bbf3f90ed938bac9e8436d..ee69afd6fd91f2191c0cd72718265f8f3844ed61 100644 (file)
@@ -311,7 +311,6 @@ static void tcp_remove_timer(struct tcpcb *tp);
 static void tcp_sched_timerlist(uint32_t offset);
 static u_int32_t tcp_run_conn_timer(struct tcpcb *tp, u_int16_t *mode,
     u_int16_t probe_if_index);
-static void tcp_sched_timers(struct tcpcb *tp);
 static inline void tcp_set_lotimer_index(struct tcpcb *);
 __private_extern__ void tcp_remove_from_time_wait(struct inpcb *inp);
 static inline void tcp_update_mss_core(struct tcpcb *tp, struct ifnet *ifp);
@@ -1550,6 +1549,27 @@ fc_output:
                        (void) tcp_output(tp);
                }
                break;
+       case TCPT_CELLICON:
+       {
+               struct mptses *mpte = tptomptp(tp)->mpt_mpte;
+
+               tp->t_timer[TCPT_CELLICON] = 0;
+
+               if (mpte->mpte_cellicon_increments == 0) {
+                       /* Cell-icon not set by this connection */
+                       break;
+               }
+
+               if (TSTMP_LT(mpte->mpte_last_cellicon_set + MPTCP_CELLICON_TOGGLE_RATE, tcp_now)) {
+                       mptcp_unset_cellicon(mpte, NULL, 1);
+               }
+
+               if (mpte->mpte_cellicon_increments) {
+                       tp->t_timer[TCPT_CELLICON] = OFFSET_FROM_START(tp, MPTCP_CELLICON_TOGGLE_RATE);
+               }
+
+               break;
+       }
 #endif /* MPTCP */
 
        case TCPT_PTO:
index e09f01de90d4f3cc3b916ee55e60d06ffb4f2908..8b843572266e5508134d02e482ea046af348a9b6 100644 (file)
 #define TCPT_2MSL       6       /* 2*msl quiet time timer */
 #if MPTCP
 #define TCPT_JACK_RXMT  7       /* retransmit timer for join ack */
-#define TCPT_MAX        7
+#define TCPT_CELLICON   8       /* Timer to check for cell-activity */
+#define TCPT_MAX        8
 #else /* MPTCP */
 #define TCPT_MAX        6
 #endif /* !MPTCP */
index 5358d21a094c768c234e1840a31e05da84e1dcfb..95e1903ee5da3e5bb8ba6df6aa0f4637326c4f7d 100644 (file)
@@ -1574,6 +1574,7 @@ void     tcp_gc(struct inpcbinfo *);
 void     tcp_itimer(struct inpcbinfo *ipi);
 void     tcp_check_timer_state(struct tcpcb *tp);
 void     tcp_run_timerlist(void *arg1, void *arg2);
+void     tcp_sched_timers(struct tcpcb *tp);
 
 struct tcptemp *tcp_maketemplate(struct tcpcb *);
 void     tcp_fillheaders(struct tcpcb *, void *, void *);
index 247e0180270b987f2756080ee088425a4fe09240..ed16674e5d4b1797a9fc72341e585489bb8c26f1 100644 (file)
 #if IPSEC
 #include <netinet6/ipsec.h>
 #include <netinet6/esp.h>
+#include <netkey/key.h>
 extern int ipsec_bypass;
 extern int esp_udp_encap_port;
 #endif /* IPSEC */
@@ -615,41 +616,56 @@ udp_input(struct mbuf *m, int iphlen)
        if (ipsec_bypass == 0 && (esp_udp_encap_port & 0xFFFF) != 0 &&
            (uh->uh_dport == ntohs((u_short)esp_udp_encap_port) ||
            uh->uh_sport == ntohs((u_short)esp_udp_encap_port))) {
-               int payload_len = len - sizeof(struct udphdr) > 4 ? 4 :
-                   len - sizeof(struct udphdr);
+               /*
+                * Check if ESP or keepalive:
+                *      1. If the destination port of the incoming packet is 4500.
+                *      2. If the source port of the incoming packet is 4500,
+                *         then check the SADB to match IP address and port.
+                */
+               bool check_esp = true;
+               if (uh->uh_dport != ntohs((u_short)esp_udp_encap_port)) {
+                       check_esp = key_checksa_present(AF_INET, (caddr_t)&ip->ip_dst,
+                           (caddr_t)&ip->ip_src, uh->uh_dport,
+                           uh->uh_sport);
+               }
+
+               if (check_esp) {
+                       int payload_len = len - sizeof(struct udphdr) > 4 ? 4 :
+                           len - sizeof(struct udphdr);
+
+                       if (m->m_len < iphlen + sizeof(struct udphdr) + payload_len) {
+                               if ((m = m_pullup(m, iphlen + sizeof(struct udphdr) +
+                                   payload_len)) == NULL) {
+                                       udpstat.udps_hdrops++;
+                                       KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END,
+                                           0, 0, 0, 0, 0);
+                                       return;
+                               }
+                               /*
+                                * Expect 32-bit aligned data pointer on strict-align
+                                * platforms.
+                                */
+                               MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
 
-               if (m->m_len < iphlen + sizeof(struct udphdr) + payload_len) {
-                       if ((m = m_pullup(m, iphlen + sizeof(struct udphdr) +
-                           payload_len)) == NULL) {
-                               udpstat.udps_hdrops++;
+                               ip = mtod(m, struct ip *);
+                               uh = (struct udphdr *)(void *)((caddr_t)ip + iphlen);
+                       }
+                       /* Check for NAT keepalive packet */
+                       if (payload_len == 1 && *(u_int8_t *)
+                           ((caddr_t)uh + sizeof(struct udphdr)) == 0xFF) {
+                               m_freem(m);
+                               KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END,
+                                   0, 0, 0, 0, 0);
+                               return;
+                       } else if (payload_len == 4 && *(u_int32_t *)(void *)
+                           ((caddr_t)uh + sizeof(struct udphdr)) != 0) {
+                               /* UDP encapsulated IPsec packet to pass through NAT */
                                KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END,
                                    0, 0, 0, 0, 0);
+                               /* preserve the udp header */
+                               esp4_input(m, iphlen + sizeof(struct udphdr));
                                return;
                        }
-                       /*
-                        * Expect 32-bit aligned data pointer on strict-align
-                        * platforms.
-                        */
-                       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
-
-                       ip = mtod(m, struct ip *);
-                       uh = (struct udphdr *)(void *)((caddr_t)ip + iphlen);
-               }
-               /* Check for NAT keepalive packet */
-               if (payload_len == 1 && *(u_int8_t *)
-                   ((caddr_t)uh + sizeof(struct udphdr)) == 0xFF) {
-                       m_freem(m);
-                       KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END,
-                           0, 0, 0, 0, 0);
-                       return;
-               } else if (payload_len == 4 && *(u_int32_t *)(void *)
-                   ((caddr_t)uh + sizeof(struct udphdr)) != 0) {
-                       /* UDP encapsulated IPsec packet to pass through NAT */
-                       KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END,
-                           0, 0, 0, 0, 0);
-                       /* preserve the udp header */
-                       esp4_input(m, iphlen + sizeof(struct udphdr));
-                       return;
                }
        }
 #endif /* IPSEC */
@@ -1478,6 +1494,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
        int netsvctype = _NET_SERVICE_TYPE_UNSPEC;
        struct ifnet *origoutifp = NULL;
        int flowadv = 0;
+       int tos = IPTOS_UNSPEC;
 
        /* Enable flow advisory only when connected */
        flowadv = (so->so_state & SS_ISCONNECTED) ? 1 : 0;
@@ -1516,6 +1533,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
 #endif
 
        if (control != NULL) {
+               tos = so_tos_from_control(control);
                sotc = so_tc_from_control(control, &netsvctype);
                VERIFY(outif == NULL);
                error = udp_check_pktinfo(control, &outif, &pi_laddr);
@@ -1799,7 +1817,11 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
        }
        ((struct ip *)ui)->ip_len = sizeof(struct udpiphdr) + len;
        ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl;    /* XXX */
-       ((struct ip *)ui)->ip_tos = inp->inp_ip_tos;    /* XXX */
+       if (tos != IPTOS_UNSPEC) {
+               ((struct ip *)ui)->ip_tos = (uint8_t)(tos & IPTOS_MASK);
+       } else {
+               ((struct ip *)ui)->ip_tos = inp->inp_ip_tos;    /* XXX */
+       }
        udpstat.udps_opackets++;
 
        KERNEL_DEBUG(DBG_LAYER_OUT_END, ui->ui_dport, ui->ui_sport,
index 104f5a9c68c446440bd59bc30981ce27b2b4f3ff..67a664ec62312ff5c89858cf89bda424b1174436 100644 (file)
@@ -232,7 +232,7 @@ ah4_input(struct mbuf *m, int off)
                 */
                if (siz1 < siz) {
                        ipseclog((LOG_NOTICE, "sum length too short in IPv4 AH input "
-                           "(%lu, should be at least %lu): %s\n",
+                           "(%u, should be at least %u): %s\n",
                            (u_int32_t)siz1, (u_int32_t)siz,
                            ipsec4_logpacketstr(ip, spi)));
                        IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
@@ -240,7 +240,7 @@ ah4_input(struct mbuf *m, int off)
                }
                if ((ah->ah_len << 2) - sizoff != siz1) {
                        ipseclog((LOG_NOTICE, "sum length mismatch in IPv4 AH input "
-                           "(%d should be %lu): %s\n",
+                           "(%d should be %u): %s\n",
                            (ah->ah_len << 2) - sizoff, (u_int32_t)siz1,
                            ipsec4_logpacketstr(ip, spi)));
                        IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
@@ -708,7 +708,7 @@ ah6_input(struct mbuf **mp, int *offp, int proto)
                 */
                if (siz1 < siz) {
                        ipseclog((LOG_NOTICE, "sum length too short in IPv6 AH input "
-                           "(%lu, should be at least %lu): %s\n",
+                           "(%u, should be at least %u): %s\n",
                            (u_int32_t)siz1, (u_int32_t)siz,
                            ipsec6_logpacketstr(ip6, spi)));
                        IPSEC_STAT_INCREMENT(ipsec6stat.in_inval);
@@ -716,7 +716,7 @@ ah6_input(struct mbuf **mp, int *offp, int proto)
                }
                if ((ah->ah_len << 2) - sizoff != siz1) {
                        ipseclog((LOG_NOTICE, "sum length mismatch in IPv6 AH input "
-                           "(%d should be %lu): %s\n",
+                           "(%d should be %u): %s\n",
                            (ah->ah_len << 2) - sizoff, (u_int32_t)siz1,
                            ipsec6_logpacketstr(ip6, spi)));
                        IPSEC_STAT_INCREMENT(ipsec6stat.in_inval);
index a176a64f4205b4004250580bf0a671640999e6ff..85450de0ae61c1b6e4f3b0b14d543af93fb8e1bd 100644 (file)
@@ -442,7 +442,7 @@ esp_chachapoly_decrypt(struct mbuf *m, // head of mbuf chain
 
        // check if total packet length is enough to contain ESP + IV
        if (m->m_pkthdr.len < bodyoff) {
-               esp_packet_log_err("ChaChaPoly Packet too short %d < %zu, SPI 0x%08x",
+               esp_packet_log_err("ChaChaPoly Packet too short %d < %u, SPI 0x%08x",
                    m->m_pkthdr.len, bodyoff, ntohl(sav->spi));
                m_freem(m);
                return EINVAL;
index 17bd8e242007cd75c1a75ef10a61ecb2b8abdc66..28ce428812b84cec4ac807053d61660442cf5a92 100644 (file)
@@ -630,7 +630,7 @@ esp_gcm_mature(struct secasvar *sav)
                break;
        default:
                ipseclog((LOG_ERR,
-                   "esp_gcm_mature %s: invalid algo %d.\n", sav->alg_enc));
+                   "esp_gcm_mature %s: invalid algo %d.\n", algo->name, sav->alg_enc));
                return 1;
        }
 
@@ -777,7 +777,7 @@ esp_cbc_decrypt(struct mbuf *m, size_t off, struct secasvar *sav,
        }
 
        if (m->m_pkthdr.len < bodyoff) {
-               ipseclog((LOG_ERR, "esp_cbc_decrypt %s: bad len %d/%lu\n",
+               ipseclog((LOG_ERR, "esp_cbc_decrypt %s: bad len %d/%u\n",
                    algo->name, m->m_pkthdr.len, (u_int32_t)bodyoff));
                m_freem(m);
                return EINVAL;
@@ -1020,14 +1020,14 @@ esp_cbc_encrypt(
        }
 
        if (m->m_pkthdr.len < bodyoff) {
-               ipseclog((LOG_ERR, "esp_cbc_encrypt %s: bad len %d/%lu\n",
+               ipseclog((LOG_ERR, "esp_cbc_encrypt %s: bad len %d/%u\n",
                    algo->name, m->m_pkthdr.len, (u_int32_t)bodyoff));
                m_freem(m);
                return EINVAL;
        }
        if ((m->m_pkthdr.len - bodyoff) % blocklen) {
                ipseclog((LOG_ERR, "esp_cbc_encrypt %s: "
-                   "payload length must be multiple of %lu\n",
+                   "payload length must be multiple of %u\n",
                    algo->name, (u_int32_t)algo->padbound));
                m_freem(m);
                return EINVAL;
@@ -1228,7 +1228,7 @@ esp_auth(
        siz = (((*algo->sumsiz)(sav) + 3) & ~(4 - 1));
        if (sizeof(sumbuf) < siz) {
                ipseclog((LOG_DEBUG,
-                   "esp_auth: AH_MAXSUMSIZE is too small: siz=%lu\n",
+                   "esp_auth: AH_MAXSUMSIZE is too small: siz=%u\n",
                    (u_int32_t)siz));
                KERNEL_DEBUG(DBG_FNC_ESPAUTH | DBG_FUNC_END, 4, 0, 0, 0, 0);
                return EINVAL;
index f53236153f7f09ecf9d1a74f7985777e1ecd2d94..8e99b3eb7014f154425f5acb6ca46afca13df985 100644 (file)
@@ -378,7 +378,7 @@ esp4_input_extended(struct mbuf *m, int off, ifnet_t interface)
                }
                if (AH_MAXSUMSIZE < siz) {
                        ipseclog((LOG_DEBUG,
-                           "internal error: AH_MAXSUMSIZE must be larger than %lu\n",
+                           "internal error: AH_MAXSUMSIZE must be larger than %u\n",
                            (u_int32_t)siz));
                        IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
                        goto bad;
@@ -811,7 +811,7 @@ noreplaycheck:
                                int mlen;
                                if ((mlen = m_length2(m, NULL)) < hlen) {
                                        ipseclog((LOG_DEBUG,
-                                           "IPv4 ESP input: decrypted packet too short %d < %d\n",
+                                           "IPv4 ESP input: decrypted packet too short %d < %zu\n",
                                            mlen, hlen));
                                        IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
                                        ifnet_release(ipsec_if);
@@ -1055,7 +1055,7 @@ esp6_input_extended(struct mbuf **mp, int *offp, int proto, ifnet_t interface)
                }
                if (AH_MAXSUMSIZE < siz) {
                        ipseclog((LOG_DEBUG,
-                           "internal error: AH_MAXSUMSIZE must be larger than %lu\n",
+                           "internal error: AH_MAXSUMSIZE must be larger than %u\n",
                            (u_int32_t)siz));
                        IPSEC_STAT_INCREMENT(ipsec6stat.in_inval);
                        goto bad;
index fbb36070b199d518c87987fe20b9b329ee9dacb8..9a768a00490366d9cd7c816f54d2e0558d3d705d 100644 (file)
@@ -182,7 +182,7 @@ esp_cbc_decrypt_aes(
        }
 
        if (m->m_pkthdr.len < bodyoff) {
-               ipseclog((LOG_ERR, "esp_cbc_decrypt %s: bad len %d/%lu\n",
+               ipseclog((LOG_ERR, "esp_cbc_decrypt %s: bad len %d/%u\n",
                    algo->name, m->m_pkthdr.len, (u_int32_t)bodyoff));
                m_freem(m);
                return EINVAL;
@@ -399,14 +399,14 @@ esp_cbc_encrypt_aes(
        ivp = (u_int8_t *) sav->iv;
 
        if (m->m_pkthdr.len < bodyoff) {
-               ipseclog((LOG_ERR, "esp_cbc_encrypt %s: bad len %d/%lu\n",
+               ipseclog((LOG_ERR, "esp_cbc_encrypt %s: bad len %d/%u\n",
                    algo->name, m->m_pkthdr.len, (u_int32_t)bodyoff));
                m_freem(m);
                return EINVAL;
        }
        if ((m->m_pkthdr.len - bodyoff) % AES_BLOCKLEN) {
                ipseclog((LOG_ERR, "esp_cbc_encrypt %s: "
-                   "payload length must be multiple of %lu\n",
+                   "payload length must be multiple of %d\n",
                    algo->name, AES_BLOCKLEN));
                m_freem(m);
                return EINVAL;
@@ -705,7 +705,7 @@ esp_gcm_encrypt_aes(
        bzero(nonce, ESP_GCM_SALT_LEN + ivlen);
 
        if (m->m_pkthdr.len < bodyoff) {
-               ipseclog((LOG_ERR, "%s: bad len %d/%lu\n", __FUNCTION__,
+               ipseclog((LOG_ERR, "%s: bad len %d/%u\n", __FUNCTION__,
                    m->m_pkthdr.len, (u_int32_t)bodyoff));
                m_freem(m);
                return EINVAL;
@@ -906,7 +906,7 @@ esp_gcm_decrypt_aes(
        }
 
        if (m->m_pkthdr.len < bodyoff) {
-               ipseclog((LOG_ERR, "%s: bad len %d/%lu\n", __FUNCTION__,
+               ipseclog((LOG_ERR, "%s: bad len %d/%u\n", __FUNCTION__,
                    m->m_pkthdr.len, (u_int32_t)bodyoff));
                m_freem(m);
                return EINVAL;
index 671a6a64f7f459ca3947ce1b4d3d3a617fd1d8d9..c5610feed5554afa02a685afac358204f6707fa6 100644 (file)
 #include <sys/priv.h>
 #include <kern/locks.h>
 #include <sys/kauth.h>
+#include <sys/bitstring.h>
+
 #include <libkern/OSAtomic.h>
 
 #include <net/if.h>
 #include <net/route.h>
 #include <net/if_ipsec.h>
+#include <net/if_ports_used.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 
 #include <IOKit/pwr_mgt/IOPM.h>
 
+#include <os/log_private.h>
+
 #if IPSEC_DEBUG
 int ipsec_debug = 1;
 #else
@@ -5162,6 +5167,15 @@ sysctl_ipsec_wake_packet SYSCTL_HANDLER_ARGS
                }
 
                int result = sysctl_io_opaque(req, &ipsec_wake_pkt, sizeof(ipsec_wake_pkt), NULL);
+
+               ipseclog((LOG_NOTICE, "%s: uuid %s spi %u seq %u len %u result %d",
+                   __func__,
+                   ipsec_wake_pkt.wake_uuid,
+                   ipsec_wake_pkt.wake_pkt_spi,
+                   ipsec_wake_pkt.wake_pkt_seq,
+                   ipsec_wake_pkt.wake_pkt_len,
+                   result));
+
                return result;
        }
 
@@ -5191,12 +5205,63 @@ ipsec_save_wake_packet(struct mbuf *wake_mbuf, u_int32_t spi, u_int32_t seq)
        ipsec_wake_pkt.wake_pkt_spi = spi;
        ipsec_wake_pkt.wake_pkt_seq = seq;
 
+       ipseclog((LOG_NOTICE, "%s: uuid %s spi %u seq %u len %u",
+           __func__,
+           ipsec_wake_pkt.wake_uuid,
+           ipsec_wake_pkt.wake_pkt_spi,
+           ipsec_wake_pkt.wake_pkt_seq,
+           ipsec_wake_pkt.wake_pkt_len));
+
+       struct kev_msg ev_msg = { 0 };
+       ev_msg.vendor_code      = KEV_VENDOR_APPLE;
+       ev_msg.kev_class        = KEV_NETWORK_CLASS;
+       ev_msg.kev_subclass     = KEV_IPSEC_SUBCLASS;
+       ev_msg.kev_subclass     = KEV_IPSEC_WAKE_PACKET;
+       int result = kev_post_msg(&ev_msg);
+       if (result != 0) {
+               os_log_error(OS_LOG_DEFAULT, "%s: kev_post_msg() failed with error %d for wake uuid %s",
+                   __func__, result, ipsec_wake_pkt.wake_uuid);
+       }
+
        ipsec_save_wake_pkt = false;
 done:
        lck_mtx_unlock(sadb_mutex);
        return;
 }
 
+static void
+ipsec_get_local_ports(void)
+{
+       errno_t error;
+       ifnet_t *ifp_list;
+       uint32_t count, i;
+       static uint8_t port_bitmap[bitstr_size(IP_PORTRANGE_SIZE)];
+
+       error = ifnet_list_get_all(IFNET_FAMILY_IPSEC, &ifp_list, &count);
+       if (error != 0) {
+               os_log_error(OS_LOG_DEFAULT, "%s: ifnet_list_get_all() failed %d",
+                   __func__, error);
+               return;
+       }
+       for (i = 0; i < count; i++) {
+               ifnet_t ifp = ifp_list[i];
+
+               /*
+                * Get all the TCP and UDP ports for IPv4 and IPv6
+                */
+               error = ifnet_get_local_ports_extended(ifp, PF_UNSPEC,
+                   IFNET_GET_LOCAL_PORTS_WILDCARDOK |
+                   IFNET_GET_LOCAL_PORTS_NOWAKEUPOK |
+                   IFNET_GET_LOCAL_PORTS_ANYTCPSTATEOK,
+                   port_bitmap);
+               if (error != 0) {
+                       os_log_error(OS_LOG_DEFAULT, "%s: ifnet_get_local_ports_extended(%s) failed %d",
+                           __func__, if_name(ifp), error);
+               }
+       }
+       ifnet_list_free(ifp_list);
+}
+
 static IOReturn
 ipsec_sleep_wake_handler(void *target, void *refCon, UInt32 messageType,
     void *provider, void *messageArgument, vm_size_t argSize)
@@ -5204,16 +5269,17 @@ ipsec_sleep_wake_handler(void *target, void *refCon, UInt32 messageType,
 #pragma unused(target, refCon, provider, messageArgument, argSize)
        switch (messageType) {
        case kIOMessageSystemWillSleep:
+               ipsec_get_local_ports();
                memset(&ipsec_wake_pkt, 0, sizeof(ipsec_wake_pkt));
                IOPMCopySleepWakeUUIDKey(ipsec_wake_pkt.wake_uuid,
                    sizeof(ipsec_wake_pkt.wake_uuid));
-               ipseclog((LOG_INFO,
-                   "ipsec: system will sleep"));
+               ipseclog((LOG_NOTICE,
+                   "ipsec: system will sleep, uuid: %s", ipsec_wake_pkt.wake_uuid));
                break;
-       case kIOMessageSystemHasPoweredOn:
+       case kIOMessageSystemWillPowerOn:
                ipsec_save_wake_pkt = true;
-               ipseclog((LOG_INFO,
-                   "ipsec: system has powered on"));
+               ipseclog((LOG_NOTICE,
+                   "ipsec: system will powered on, uuid: %s", ipsec_wake_pkt.wake_uuid));
                break;
        default:
                break;
index eb094bfce7a038056dc7745290aaa8ce2063d346..0ce45fb5f988e9ab1d8145e50afbb41d6eaf8336 100644 (file)
@@ -45,6 +45,8 @@
 #include <netkey/keydb.h>
 #include <netinet/ip_var.h>
 
+#include <os/log.h>
+
 /* lock for IPsec stats */
 extern lck_grp_t         *sadb_stat_mutex_grp;
 extern lck_grp_attr_t    *sadb_stat_mutex_grp_attr;
@@ -337,7 +339,26 @@ extern int ip4_esp_randpad;
 
 extern bool ipsec_save_wake_pkt;
 
-#define ipseclog(x)     do { if (ipsec_debug) log x; } while (0)
+#define _ipsec_log(level, fmt, ...) do {                            \
+       os_log_type_t type;                                         \
+       switch (level) {                                            \
+       default:                                                    \
+               type = OS_LOG_TYPE_DEFAULT;                         \
+               break;                                              \
+       case LOG_INFO:                                              \
+               type = OS_LOG_TYPE_INFO;                            \
+               break;                                              \
+       case LOG_DEBUG:                                             \
+               type = OS_LOG_TYPE_DEBUG;                           \
+               break;                                              \
+       case LOG_ERR:                                               \
+               type = OS_LOG_TYPE_ERROR;                           \
+               break;                                              \
+       }                                                           \
+       os_log_with_type(OS_LOG_DEFAULT, type, fmt, ##__VA_ARGS__); \
+} while (0)
+
+#define ipseclog(x)     do { if (ipsec_debug != 0) _ipsec_log x; } while (0)
 
 extern struct secpolicy *ipsec4_getpolicybysock(struct mbuf *, u_int,
     struct socket *, int *);
index 9b4c3a16e174e29492874b166923296ba2e0ec67..2917f5c7ec6d6f632126aeff553108c8e81c1744 100644 (file)
 #include <netinet6/ipsec.h>
 #include <netinet6/ipsec6.h>
 #include <netinet6/esp6.h>
+#include <netkey/key.h>
 extern int ipsec_bypass;
 extern int esp_udp_encap_port;
 #endif /* IPSEC */
@@ -492,34 +493,49 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
        if (ipsec_bypass == 0 && (esp_udp_encap_port & 0xFFFF) != 0 &&
            (uh->uh_dport == ntohs((u_short)esp_udp_encap_port) ||
            uh->uh_sport == ntohs((u_short)esp_udp_encap_port))) {
-               int payload_len = ulen - sizeof(struct udphdr) > 4 ? 4 :
-                   ulen - sizeof(struct udphdr);
+               /*
+                * Check if ESP or keepalive:
+                *      1. If the destination port of the incoming packet is 4500.
+                *      2. If the source port of the incoming packet is 4500,
+                *         then check the SADB to match IP address and port.
+                */
+               bool check_esp = true;
+               if (uh->uh_dport != ntohs((u_short)esp_udp_encap_port)) {
+                       check_esp = key_checksa_present(AF_INET6, (caddr_t)&ip6->ip6_dst,
+                           (caddr_t)&ip6->ip6_src, uh->uh_dport,
+                           uh->uh_sport);
+               }
+
+               if (check_esp) {
+                       int payload_len = ulen - sizeof(struct udphdr) > 4 ? 4 :
+                           ulen - sizeof(struct udphdr);
+
+                       if (m->m_len < off + sizeof(struct udphdr) + payload_len) {
+                               if ((m = m_pullup(m, off + sizeof(struct udphdr) +
+                                   payload_len)) == NULL) {
+                                       udpstat.udps_hdrops++;
+                                       goto bad;
+                               }
+                               /*
+                                * Expect 32-bit aligned data pointer on strict-align
+                                * platforms.
+                                */
+                               MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
 
-               if (m->m_len < off + sizeof(struct udphdr) + payload_len) {
-                       if ((m = m_pullup(m, off + sizeof(struct udphdr) +
-                           payload_len)) == NULL) {
-                               udpstat.udps_hdrops++;
+                               ip6 = mtod(m, struct ip6_hdr *);
+                               uh = (struct udphdr *)(void *)((caddr_t)ip6 + off);
+                       }
+                       /* Check for NAT keepalive packet */
+                       if (payload_len == 1 && *(u_int8_t*)
+                           ((caddr_t)uh + sizeof(struct udphdr)) == 0xFF) {
                                goto bad;
+                       } else if (payload_len == 4 && *(u_int32_t*)(void *)
+                           ((caddr_t)uh + sizeof(struct udphdr)) != 0) {
+                               /* UDP encapsulated IPsec packet to pass through NAT */
+                               /* preserve the udp header */
+                               *offp = off + sizeof(struct udphdr);
+                               return esp6_input(mp, offp, IPPROTO_UDP);
                        }
-                       /*
-                        * Expect 32-bit aligned data pointer on strict-align
-                        * platforms.
-                        */
-                       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
-
-                       ip6 = mtod(m, struct ip6_hdr *);
-                       uh = (struct udphdr *)(void *)((caddr_t)ip6 + off);
-               }
-               /* Check for NAT keepalive packet */
-               if (payload_len == 1 && *(u_int8_t*)
-                   ((caddr_t)uh + sizeof(struct udphdr)) == 0xFF) {
-                       goto bad;
-               } else if (payload_len == 4 && *(u_int32_t*)(void *)
-                   ((caddr_t)uh + sizeof(struct udphdr)) != 0) {
-                       /* UDP encapsulated IPsec packet to pass through NAT */
-                       /* preserve the udp header */
-                       *offp = off + sizeof(struct udphdr);
-                       return esp6_input(mp, offp, IPPROTO_UDP);
                }
        }
 #endif /* IPSEC */
index e1230e472fa8f8fbebcce6eef4da779a333e8a10..b7c473e6abd980a64fedc51a616ccda20ffbabab 100644 (file)
@@ -1471,6 +1471,157 @@ found:
        return match;
 }
 
+/*
+ * This function checks whether a UDP packet with a random local port
+ * and a remote port of 4500 matches an SA in the kernel. If does match,
+ * send the packet to the ESP engine. If not, send the packet to the UDP protocol.
+ */
+bool
+key_checksa_present(u_int family,
+    caddr_t local_addr,
+    caddr_t remote_addr,
+    u_int16_t local_port,
+    u_int16_t remote_port)
+{
+       LCK_MTX_ASSERT(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED);
+
+       /* sanity check */
+       if (local_addr == NULL || remote_addr == NULL) {
+               panic("key_allocsa: NULL pointer is passed.\n");
+       }
+
+       /*
+        * searching SAD.
+        * XXX: to be checked internal IP header somewhere.  Also when
+        * IPsec tunnel packet is received.  But ESP tunnel mode is
+        * encrypted so we can't check internal IP header.
+        */
+       /*
+        * search a valid state list for inbound packet.
+        * the search order is not important.
+        */
+       struct secashead *sah = NULL;
+       bool found_sa = false;
+
+       lck_mtx_lock(sadb_mutex);
+       LIST_FOREACH(sah, &sahtree, chain) {
+               if (sah->state == SADB_SASTATE_DEAD) {
+                       continue;
+               }
+
+               if (sah->dir != IPSEC_DIR_OUTBOUND) {
+                       continue;
+               }
+
+               if (family != sah->saidx.src.ss_family) {
+                       continue;
+               }
+
+               struct sockaddr_in src_in = {};
+               struct sockaddr_in6 src_in6 = {};
+
+               /* check src address */
+               switch (family) {
+               case AF_INET:
+                       src_in.sin_family = AF_INET;
+                       src_in.sin_len = sizeof(src_in);
+                       memcpy(&src_in.sin_addr, local_addr, sizeof(src_in.sin_addr));
+                       if (key_sockaddrcmp((struct sockaddr*)&src_in,
+                           (struct sockaddr *)&sah->saidx.src, 0) != 0) {
+                               continue;
+                       }
+                       break;
+               case AF_INET6:
+                       src_in6.sin6_family = AF_INET6;
+                       src_in6.sin6_len = sizeof(src_in6);
+                       memcpy(&src_in6.sin6_addr, local_addr, sizeof(src_in6.sin6_addr));
+                       if (IN6_IS_SCOPE_LINKLOCAL(&src_in6.sin6_addr)) {
+                               /* kame fake scopeid */
+                               src_in6.sin6_scope_id =
+                                   ntohs(src_in6.sin6_addr.s6_addr16[1]);
+                               src_in6.sin6_addr.s6_addr16[1] = 0;
+                       }
+                       if (key_sockaddrcmp((struct sockaddr*)&src_in6,
+                           (struct sockaddr *)&sah->saidx.src, 0) != 0) {
+                               continue;
+                       }
+                       break;
+               default:
+                       ipseclog((LOG_DEBUG, "key_checksa_present: "
+                           "unknown address family=%d.\n",
+                           family));
+                       continue;
+               }
+
+               struct sockaddr_in dest_in = {};
+               struct sockaddr_in6 dest_in6 = {};
+
+               /* check dst address */
+               switch (family) {
+               case AF_INET:
+                       dest_in.sin_family = AF_INET;
+                       dest_in.sin_len = sizeof(dest_in);
+                       memcpy(&dest_in.sin_addr, remote_addr, sizeof(dest_in.sin_addr));
+                       if (key_sockaddrcmp((struct sockaddr*)&dest_in,
+                           (struct sockaddr *)&sah->saidx.dst, 0) != 0) {
+                               continue;
+                       }
+
+                       break;
+               case AF_INET6:
+                       dest_in6.sin6_family = AF_INET6;
+                       dest_in6.sin6_len = sizeof(dest_in6);
+                       memcpy(&dest_in6.sin6_addr, remote_addr, sizeof(dest_in6.sin6_addr));
+                       if (IN6_IS_SCOPE_LINKLOCAL(&dest_in6.sin6_addr)) {
+                               /* kame fake scopeid */
+                               dest_in6.sin6_scope_id =
+                                   ntohs(dest_in6.sin6_addr.s6_addr16[1]);
+                               dest_in6.sin6_addr.s6_addr16[1] = 0;
+                       }
+                       if (key_sockaddrcmp((struct sockaddr*)&dest_in6,
+                           (struct sockaddr *)&sah->saidx.dst, 0) != 0) {
+                               continue;
+                       }
+
+                       break;
+               default:
+                       ipseclog((LOG_DEBUG, "key_checksa_present: "
+                           "unknown address family=%d.\n", family));
+                       continue;
+               }
+
+               struct secasvar *nextsav = NULL;
+               for (u_int stateidx = 0; stateidx < _ARRAYLEN(saorder_state_alive); stateidx++) {
+                       u_int state = saorder_state_alive[stateidx];
+                       for (struct secasvar *sav = LIST_FIRST(&sah->savtree[state]); sav != NULL; sav = nextsav) {
+                               nextsav = LIST_NEXT(sav, chain);
+                               /* sanity check */
+                               if (sav->state != state) {
+                                       ipseclog((LOG_DEBUG, "key_checksa_present: "
+                                           "invalid sav->state "
+                                           "(state: %d SA: %d)\n",
+                                           state, sav->state));
+                                       continue;
+                               }
+
+                               if (sav->remote_ike_port != ntohs(remote_port)) {
+                                       continue;
+                               }
+
+                               if (sav->natt_encapsulated_src_port != local_port) {
+                                       continue;
+                               }
+                               found_sa = true;;
+                               break;
+                       }
+               }
+       }
+
+       /* not found */
+       lck_mtx_unlock(sadb_mutex);
+       return found_sa;
+}
+
 u_int16_t
 key_natt_get_translated_port(
        struct secasvar *outsav)
@@ -1999,7 +2150,8 @@ key_msg2sp(
                                paddr = (struct sockaddr *)(xisr + 1);
                                uint8_t src_len = paddr->sa_len;
 
-                               if (xisr->sadb_x_ipsecrequest_len < src_len) {
+                               /* +sizeof(uint8_t) for dst_len below */
+                               if (xisr->sadb_x_ipsecrequest_len < sizeof(*xisr) + src_len + sizeof(uint8_t)) {
                                        ipseclog((LOG_DEBUG, "key_msg2sp: invalid request "
                                            "invalid source address length.\n"));
                                        key_freesp(newsp, KEY_SADB_UNLOCKED);
@@ -2023,7 +2175,7 @@ key_msg2sp(
                                paddr = (struct sockaddr *)((caddr_t)paddr + paddr->sa_len);
                                uint8_t dst_len = paddr->sa_len;
 
-                               if (xisr->sadb_x_ipsecrequest_len < (src_len + dst_len)) {
+                               if (xisr->sadb_x_ipsecrequest_len < sizeof(*xisr) + src_len + dst_len) {
                                        ipseclog((LOG_DEBUG, "key_msg2sp: invalid request "
                                            "invalid dest address length.\n"));
                                        key_freesp(newsp, KEY_SADB_UNLOCKED);
@@ -4086,8 +4238,8 @@ key_delsav(
        /* remove from SA header */
        if (__LIST_CHAINED(sav)) {
                LIST_REMOVE(sav, chain);
+               ipsec_sav_count--;
        }
-       ipsec_sav_count--;
 
        if (sav->spihash.le_prev || sav->spihash.le_next) {
                LIST_REMOVE(sav, spihash);
index 4d3ee942135cbe2c0f5de28aac17987673978980..418f9792daf2cdd024d57aed5530ee5415137b01 100644 (file)
@@ -67,6 +67,7 @@ extern struct secasvar *key_allocsa(u_int, caddr_t, caddr_t,
 struct secasvar *
 key_allocsa_extended(u_int family, caddr_t src, caddr_t dst,
     u_int proto, u_int32_t spi, ifnet_t interface);
+extern bool key_checksa_present(u_int family, caddr_t src, caddr_t dst, u_int16_t src_port, u_int16_t dst_port);
 extern u_int16_t key_natt_get_translated_port(struct secasvar *);
 extern void key_freesp(struct secpolicy *, int);
 extern void key_freesav(struct secasvar *, int);
index 6a6878fc5f538526da109627b87f07dc05728dba..51d151ecfc7c470e32c629468dd47ae1f6695cc8 100644 (file)
@@ -3597,6 +3597,65 @@ nfsrv_fhtoexport(struct nfs_filehandle *nfhp)
        return nx;
 }
 
+struct nfsrv_getvfs_by_mntonname_callback_args {
+       const char      *path;          /* IN */
+       mount_t         mp;             /* OUT */
+};
+
+static int
+nfsrv_getvfs_by_mntonname_callback(mount_t mp, void *v)
+{
+       struct nfsrv_getvfs_by_mntonname_callback_args * const args = v;
+       char real_mntonname[MAXPATHLEN];
+       int pathbuflen = MAXPATHLEN;
+       vnode_t rvp;
+       int error;
+
+       error = VFS_ROOT(mp, &rvp, vfs_context_current());
+       if (error) {
+               goto out;
+       }
+       error = vn_getpath_ext(rvp, NULLVP, real_mntonname, &pathbuflen,
+           VN_GETPATH_FSENTER | VN_GETPATH_NO_FIRMLINK);
+       vnode_put(rvp);
+       if (error) {
+               goto out;
+       }
+       if (strcmp(args->path, real_mntonname) == 0) {
+               error = vfs_busy(mp, LK_NOWAIT);
+               if (error == 0) {
+                       args->mp = mp;
+               }
+               return VFS_RETURNED_DONE;
+       }
+out:
+       return VFS_RETURNED;
+}
+
+static mount_t
+nfsrv_getvfs_by_mntonname(char *path)
+{
+       struct nfsrv_getvfs_by_mntonname_callback_args args = {
+               .path = path,
+               .mp = NULL,
+       };
+       mount_t mp;
+       int error;
+
+       mp = vfs_getvfs_by_mntonname(path);
+       if (mp) {
+               error = vfs_busy(mp, LK_NOWAIT);
+               mount_iterdrop(mp);
+               if (error) {
+                       mp = NULL;
+               }
+       } else if (vfs_iterate(0, nfsrv_getvfs_by_mntonname_callback,
+           &args) == 0) {
+               mp = args.mp;
+       }
+       return mp;
+}
+
 /*
  * nfsrv_fhtovp() - convert FH to vnode and export info
  */
@@ -3690,14 +3749,7 @@ nfsrv_fhtovp(
        }
 
        /* find mount structure */
-       mp = vfs_getvfs_by_mntonname((*nxp)->nx_fs->nxfs_path);
-       if (mp) {
-               error = vfs_busy(mp, LK_NOWAIT);
-               mount_iterdrop(mp);
-               if (error) {
-                       mp = NULL;
-               }
-       }
+       mp = nfsrv_getvfs_by_mntonname((*nxp)->nx_fs->nxfs_path);
        if (!mp) {
                /*
                 * We have an export, but no mount?
index 78d83c9519360381ec63a719ea5bece6eee0a5d0..fe4bb37cf7de586449dbef76ac7339cac72765d1 100644 (file)
@@ -522,6 +522,23 @@ worktodo:
  * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
  */
 
+static struct nfs_exportfs *
+nfsrv_find_exportfs(const char *ptr)
+{
+       struct nfs_exportfs *nxfs;
+
+       LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
+               if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN)) {
+                       break;
+               }
+       }
+       if (nxfs && strncmp(nxfs->nxfs_path, ptr, strlen(nxfs->nxfs_path))) {
+               nxfs = NULL;
+       }
+
+       return nxfs;
+}
+
 /*
  * Get file handle system call
  */
@@ -532,7 +549,7 @@ getfh(proc_t p, struct getfh_args *uap, __unused int *retval)
        struct nfs_filehandle nfh;
        int error, fhlen, fidlen;
        struct nameidata nd;
-       char path[MAXPATHLEN], *ptr;
+       char path[MAXPATHLEN], real_mntonname[MAXPATHLEN], *ptr;
        size_t pathlen;
        struct nfs_exportfs *nxfs;
        struct nfs_export *nx;
@@ -575,12 +592,28 @@ getfh(proc_t p, struct getfh_args *uap, __unused int *retval)
        // find exportfs that matches f_mntonname
        lck_rw_lock_shared(&nfsrv_export_rwlock);
        ptr = vnode_mount(vp)->mnt_vfsstat.f_mntonname;
-       LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
-               if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN)) {
-                       break;
+       if ((nxfs = nfsrv_find_exportfs(ptr)) == NULL) {
+               /*
+                * The f_mntonname might be a firmlink path.  Resolve
+                * it into a physical path and try again.
+                */
+               int pathbuflen = MAXPATHLEN;
+               vnode_t rvp;
+
+               error = VFS_ROOT(vnode_mount(vp), &rvp, vfs_context_current());
+               if (error) {
+                       goto out;
+               }
+               error = vn_getpath_ext(rvp, NULLVP, real_mntonname, &pathbuflen,
+                   VN_GETPATH_FSENTER | VN_GETPATH_NO_FIRMLINK);
+               vnode_put(rvp);
+               if (error) {
+                       goto out;
                }
+               ptr = real_mntonname;
+               nxfs = nfsrv_find_exportfs(ptr);
        }
-       if (!nxfs || strncmp(nxfs->nxfs_path, path, strlen(nxfs->nxfs_path))) {
+       if (nxfs == NULL) {
                error = EINVAL;
                goto out;
        }
index b460a0411a3529c7b1fc2aa716d358a2be6447b1..abb24e2fb1ccd9f79dbefc1bc3d75dd88339cc23 100644 (file)
@@ -5975,7 +5975,7 @@ nfs_dir_buf_search(
                if ((cnp->cn_namelen == dp->d_namlen) && !strcmp(cnp->cn_nameptr, dp->d_name)) {
                        fhlen = dp->d_name[dp->d_namlen + 1];
                        nvattrp = NFS_DIR_BUF_NVATTR(bp, i);
-                       if ((ndbhp->ndbh_ncgen != bp->nb_np->n_ncgen) || (fhp->fh_len == 0) ||
+                       if ((ndbhp->ndbh_ncgen != bp->nb_np->n_ncgen) || (fhlen == 0) ||
                            (nvattrp->nva_type == VNON) || (nvattrp->nva_fileid == 0)) {
                                /* entry is not valid */
                                error = ENOENT;
index c979f80eb7d1f89b4e286150a05dab0e63bd3114..0e83375cdd27820b0b87d439b4ed417f2c1d56b1 100644 (file)
@@ -1856,6 +1856,7 @@ fixedpri:
                }
        }
 
+
 done:
        if (qos_rv && voucher_rv) {
                /* Both failed, give that a unique error. */
@@ -3239,6 +3240,8 @@ workq_select_threadreq_or_park_and_unlock(proc_t p, struct workqueue *wq,
 
        workq_thread_reset_pri(wq, uth, req, /*unpark*/ true);
 
+       thread_unfreeze_base_pri(uth->uu_thread);
+#if 0 // <rdar://problem/55259863> to turn this back on
        if (__improbable(thread_unfreeze_base_pri(uth->uu_thread) && !is_creator)) {
                if (req_ts) {
                        workq_perform_turnstile_operation_locked(wq, ^{
@@ -3251,6 +3254,7 @@ workq_select_threadreq_or_park_and_unlock(proc_t p, struct workqueue *wq,
                WQ_TRACE_WQ(TRACE_wq_select_threadreq | DBG_FUNC_NONE, wq, 3, 0, 0, 0);
                goto park_thawed;
        }
+#endif
 
        /*
         * We passed all checks, dequeue the request, bind to it, and set it up
@@ -3321,7 +3325,9 @@ workq_select_threadreq_or_park_and_unlock(proc_t p, struct workqueue *wq,
 
 park:
        thread_unfreeze_base_pri(uth->uu_thread);
+#if 0 // <rdar://problem/55259863>
 park_thawed:
+#endif
        workq_park_and_unlock(p, wq, uth, setup_flags);
 }
 
index f7ed3080cf87e61f04b076e7611386a88675a9b7..0824413708e55c8ec7108ce3ead9395678f4d012 100644 (file)
@@ -295,12 +295,14 @@ void workq_kern_threadreq_unlock(struct proc *p);
 
 void workq_kern_threadreq_redrive(struct proc *p, workq_kern_threadreq_flags_t flags);
 
+// This enum matches _pthread_set_flags in libpthread's qos_private.h
 enum workq_set_self_flags {
-       WORKQ_SET_SELF_QOS_FLAG = 0x1,
-       WORKQ_SET_SELF_VOUCHER_FLAG = 0x2,
-       WORKQ_SET_SELF_FIXEDPRIORITY_FLAG = 0x4,
-       WORKQ_SET_SELF_TIMESHARE_FLAG = 0x8,
-       WORKQ_SET_SELF_WQ_KEVENT_UNBIND = 0x10,
+       WORKQ_SET_SELF_QOS_FLAG             = 0x01,
+       WORKQ_SET_SELF_VOUCHER_FLAG         = 0x02,
+       WORKQ_SET_SELF_FIXEDPRIORITY_FLAG   = 0x04,
+       WORKQ_SET_SELF_TIMESHARE_FLAG       = 0x08,
+       WORKQ_SET_SELF_WQ_KEVENT_UNBIND     = 0x10,
+       WORKQ_SET_SELF_ALTERNATE_AMX        = 0x20,
 };
 
 void workq_proc_suspended(struct proc *p);
index d6c156a1b599a984a797873adfe67b9f0cb8adae..0a5f138ae5e3d4b52ee2d482165b852aea3702e4 100644 (file)
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 1999-2009 Apple Inc.
+ * Copyright (c) 1999-2019 Apple Inc.
  * Copyright (c) 2006-2007 Robert N. M. Watson
  * All rights reserved.
  *
@@ -754,6 +754,77 @@ out:
        uthread->uu_ar = NULL;
 }
 
+/*
+ * For system calls such as posix_spawn(2) the sub operations (i.e., file actions
+ * and port actions) need to be audited as their own events. Like with system
+ * calls we need to determine if the sub operation needs to be audited by
+ * examining preselection masks.
+ */
+void
+audit_subcall_enter(au_event_t event, proc_t proc, struct uthread *uthread)
+{
+       struct au_mask *aumask;
+       au_class_t class;
+       au_id_t auid;
+       kauth_cred_t cred;
+
+       /*
+        * Check which audit mask to use; either the kernel non-attributable
+        * event mask or the process audit mask.
+        */
+       cred = kauth_cred_proc_ref(proc);
+       auid = cred->cr_audit.as_aia_p->ai_auid;
+       if (auid == AU_DEFAUDITID) {
+               aumask = &audit_nae_mask;
+       } else {
+               aumask = &cred->cr_audit.as_mask;
+       }
+
+       /*
+        * Allocate an audit record, if preselection allows it, and store in
+        * the thread for later use.
+        */
+       class = au_event_class(event);
+
+       if (au_preselect(event, class, aumask, AU_PRS_BOTH)) {
+               /*
+                * If we're out of space and need to suspend unprivileged
+                * processes, do that here rather than trying to allocate
+                * another audit record.
+                *
+                * Note: we might wish to be able to continue here in the
+                * future, if the system recovers.  That should be possible
+                * by means of checking the condition in a loop around
+                * cv_wait().  It might be desirable to reevaluate whether an
+                * audit record is still required for this event by
+                * re-calling au_preselect().
+                */
+               if (audit_in_failure &&
+                   suser(cred, &proc->p_acflag) != 0) {
+                       cv_wait(&audit_fail_cv, &audit_mtx);
+                       panic("audit_failing_stop: thread continued");
+               }
+               if (uthread->uu_ar == NULL) {
+                       uthread->uu_ar = audit_new(event, proc, uthread);
+               }
+       } else if (audit_pipe_preselect(auid, event, class, AU_PRS_BOTH, 0)) {
+               if (uthread->uu_ar == NULL) {
+                       uthread->uu_ar = audit_new(event, proc, uthread);
+               }
+       }
+
+       kauth_cred_unref(&cred);
+}
+
+void
+audit_subcall_exit(int error, struct uthread *uthread)
+{
+       /* A subcall doesn't have a return value so always zero. */
+       audit_commit(uthread->uu_ar, error, 0 /* retval */);
+
+       uthread->uu_ar = NULL;
+}
+
 /*
  * Calls to set up and tear down audit structures used during Mach system
  * calls.
index 13a1b8c248a544a45757fcdd6a20661e2290596a..90309907475bb9cb227cc8f0ccfbd05ee60c1e61 100644 (file)
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2004-2016 Apple Inc.
+ * Copyright (c) 2004-2019 Apple Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -172,6 +172,11 @@ void    audit_syscall_exit(int error, struct proc *proc,
 void    audit_mach_syscall_enter(unsigned short audit_event);
 void    audit_mach_syscall_exit(int retval, struct uthread *uthread);
 
+void                    audit_subcall_enter(au_event_t event,
+    struct proc *proc, struct uthread *uthread);
+void                    audit_subcall_exit(int error,
+    struct uthread *uthread);
+
 extern struct auditinfo_addr *audit_default_aia_p;
 
 /*
@@ -349,6 +354,16 @@ extern au_event_t sys_au_event[];
                        audit_syscall_exit(code, error, proc, uthread); \
 } while (0)
 
+#define AUDIT_SUBCALL_ENTER(event, proc, uthread)  do {                 \
+       if (AUDIT_ENABLED())                                            \
+               audit_subcall_enter(AUE_ ## event, proc, uthread);      \
+} while (0)
+
+#define AUDIT_SUBCALL_EXIT(uthread, error)  do {                        \
+               if (AUDIT_AUDITING(uthread->uu_ar))                     \
+                       audit_subcall_exit(error, uthread);             \
+} while (0)
+
 /*
  * Wrap the audit_mach_syscall_enter() and audit_mach_syscall_exit()
  * functions in a manner similar to other system call enter/exit functions.
@@ -390,6 +405,12 @@ extern au_event_t sys_au_event[];
 #define AUDIT_SYSCALL_EXIT(code, proc, uthread, error)  do {            \
 } while (0)
 
+#define AUDIT_SUBCALL_ENTER(event, proc, uthread)    do {               \
+} while (0)
+
+#define AUDIT_SUBCALL_EXIT(uthread, error)  do {                        \
+} while (0)
+
 #define AUDIT_MACH_SYSCALL_ENTER(args...)       do {                    \
 } while (0)
 
index 5966311eb5c2d08060a18877a01c119a8aa32de7..6303c491579245ba94a6d1ead9450d7eb762754f 100644 (file)
@@ -728,7 +728,11 @@ struct knote {
        union {
                void               *kn_hook;
                uint32_t            kn_hook32;
-               uint64_t            kn_hook64;
+#if __LP64__
+               uint64_t            kn_hook_waitqid;
+#else
+               uint32_t            kn_hook_waitqid;
+#endif
        };
 
        /* per filter pointer to the resource being watched */
index 04d31067ec9c353d2bb3018a2077f6156c25a8ab..62387f2b61741114f9dd6e6a89f0f2f3a16f6e49 100644 (file)
@@ -281,9 +281,9 @@ struct kqworkloop {
 #define KQWL_STAYACTIVE_FIRED_BIT     (1 << 0)
        uint8_t             kqwl_wakeup_indexes;          /* QoS/override levels that woke */
        kq_index_t          kqwl_stayactive_qos;          /* max QoS of statyactive knotes */
+       struct turnstile   *kqwl_turnstile;               /* turnstile for sync IPC/waiters */
        kqueue_id_t         kqwl_dynamicid;               /* dynamic identity */
        uint64_t            kqwl_params;                  /* additional parameters */
-       struct turnstile   *kqwl_turnstile;               /* turnstile for sync IPC/waiters */
        LIST_ENTRY(kqworkloop) kqwl_hashlink;             /* linkage for search list */
 #if CONFIG_WORKLOOP_DEBUG
 #define KQWL_HISTORY_COUNT 32
index 03c7af88f8286640a30aae222055742717fe2b53..c30f0ba3df41077624e30b39eee5ab79af5f2f3f 100644 (file)
@@ -645,11 +645,12 @@ __BEGIN_DECLS
 #define DBG_MT_TMPCPU 0xff
 
 /* The Kernel Debug Sub Classes for DBG_MISC */
-#define DBG_EVENT              0x10
-#define DBG_MISC_INSTRUMENTS   0x11
-#define DBG_MISC_INSTRUMENTSBT 0x12
-#define DBG_MISC_LAYOUT        0x1a
-#define DBG_BUFFER             0x20
+#define DBG_MISC_COREBRIGHTNESS 0x01
+#define DBG_EVENT               0x10
+#define DBG_MISC_INSTRUMENTS    0x11
+#define DBG_MISC_INSTRUMENTSBT  0x12
+#define DBG_MISC_LAYOUT         0x1a
+#define DBG_BUFFER              0x20
 
 /* The Kernel Debug Sub Classes for DBG_DYLD */
 #define DBG_DYLD_UUID (5)
index 8ea2bce397ec889b5b323b20f05b11ae3608be7e..2ba2a84525ef07d0efab3dafff0bd48787247a71 100644 (file)
@@ -562,6 +562,7 @@ void memorystatus_proc_flags_unsafe(void * v, boolean_t *is_dirty, boolean_t *is
 
 #if __arm64__
 void memorystatus_act_on_legacy_footprint_entitlement(proc_t p, boolean_t footprint_increase);
+void memorystatus_act_on_ios13extended_footprint_entitlement(proc_t p);
 #endif /* __arm64__ */
 
 #endif /* CONFIG_MEMORYSTATUS */
index ab81a003ef139e9e69b0c7ecd61fdec88d1fc19f..574ef7a706ddcecd7e037736cad775956322f7c6 100644 (file)
@@ -107,6 +107,7 @@ struct lockf {
 __BEGIN_DECLS
 
 #ifdef KERNEL_PRIVATE
+void    lf_init(void);
 int     lf_advlock(struct vnop_advlock_args *);
 int     lf_assert(struct vnop_advlock_args *, void **);
 void    lf_commit(void *, int);
index f7e1e82ff41178ef7ba92cfa0e3dff4f95012906..2e30057f1f3a54eb9227749f6ebad2479b80b905 100644 (file)
@@ -960,6 +960,7 @@ extern void set_packet_service_class(struct mbuf *, struct socket *,
     mbuf_svc_class_t, u_int32_t);
 extern void so_tc_update_stats(struct mbuf *, struct socket *,
     mbuf_svc_class_t);
+extern int so_tos_from_control(struct mbuf *);
 extern int so_tc_from_control(struct mbuf *, int *);
 extern mbuf_svc_class_t so_tc2msc(int);
 extern int so_svc2tc(mbuf_svc_class_t);
index 42734a4de5742c7dc14a8b6660ff79e43e32cdf3..8d4001be40d7a080aa528a554c791c9902b108ef 100644 (file)
@@ -123,7 +123,7 @@ struct uthread {
         * relies on single copy atomicity and cannot be changed to a bitfield.
         */
        bool uu_workq_pthread_kill_allowed;
-       unsigned int syscall_code; /* current syscall code */
+       uint16_t syscall_code; /* current syscall code */
 
        /* thread exception handling */
        int     uu_exception;
@@ -191,12 +191,6 @@ struct uthread {
                uint    nbytes; /* number of bytes in ibits and obits */
        } uu_select;                    /* saved state for select() */
 
-       /* internal support for continuation framework */
-       int (*uu_continuation)(int);
-       int uu_pri;
-       int uu_timo;
-       caddr_t uu_wchan;                       /* sleeping thread wait channel */
-       const char *uu_wmesg;                   /* ... wait message */
        struct proc *uu_proc;
        thread_t uu_thread;
        void * uu_userstate;
@@ -223,12 +217,19 @@ struct uthread {
        lck_spin_t      uu_rethrottle_lock;     /* locks was_rethrottled and is_throttled */
        TAILQ_ENTRY(uthread) uu_throttlelist;   /* List of uthreads currently throttled */
        void    *       uu_throttle_info;       /* pointer to throttled I/Os info */
-       int             uu_on_throttlelist;
-       int             uu_lowpri_window;
+       int8_t          uu_on_throttlelist;
+       bool            uu_lowpri_window;
        /* These boolean fields are protected by different locks */
        bool            uu_was_rethrottled;
        bool            uu_is_throttled;
        bool            uu_throttle_bc;
+       bool            uu_defer_reclaims;
+
+       /* internal support for continuation framework */
+       uint16_t uu_pri;                        /* pri | PCATCH | PVFS, ... */
+       caddr_t uu_wchan;                       /* sleeping thread wait channel */
+       int (*uu_continuation)(int);
+       const char *uu_wmesg;                   /* ... wait message */
 
        u_int32_t       uu_network_marks;       /* network control flow marks */
 
@@ -236,7 +237,6 @@ struct uthread {
        vnode_t         uu_vreclaims;
        vnode_t         uu_cdir;                /* per thread CWD */
        int             uu_dupfd;               /* fd in fdesc_open/dupfdopen */
-       int             uu_defer_reclaims;
 
        /*
         * Bound kqueue request. This field is only cleared by the current thread,
@@ -244,7 +244,7 @@ struct uthread {
         */
        struct workq_threadreq_s *uu_kqr_bound;
        TAILQ_ENTRY(uthread) uu_workq_entry;
-       mach_vm_offset_t uu_workq_stackaddr;
+       vm_offset_t uu_workq_stackaddr;
        mach_port_name_t uu_workq_thport;
        struct uu_workq_policy {
                uint16_t qos_req : 4;         /* requested QoS */
index 5ec22ac9334186ed90caf88e0f6627f215d41138..e5263caf4dd0108e42b8b9c50151a78163c1bfd4 100644 (file)
@@ -2369,6 +2369,7 @@ int     vfs_context_issuser(vfs_context_t);
 int vfs_context_iskernel(vfs_context_t);
 vfs_context_t vfs_context_kernel(void);         /* get from 1st kernel thread */
 vnode_t vfs_context_cwd(vfs_context_t);
+vnode_t vfs_context_get_cwd(vfs_context_t); /* get cwd with iocount */
 int vnode_isnoflush(vnode_t);
 void vnode_setnoflush(vnode_t);
 void vnode_clearnoflush(vnode_t);
diff --git a/bsd/tests/ctrr_test_sysctl.c b/bsd/tests/ctrr_test_sysctl.c
deleted file mode 100644 (file)
index bea84e1..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2018 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- *
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <sys/sysctl.h>
-
index 79a40a8173e3a5a5d7d5582cad63d12d8242ae60..693bf9de740a5d8cfc1bb886267c18612e1b9061 100644 (file)
@@ -1330,6 +1330,56 @@ vfs_context_cwd(vfs_context_t ctx)
        return cwd;
 }
 
+/*
+ * vfs_context_get_cwd
+ *
+ * Description:        Returns a vnode for the current working directory for the
+ *              supplied context. The returned vnode has an iocount on it
+ *              which must be released with a vnode_put().
+ *
+ * Parameters: vfs_context_t                   The context to use
+ *
+ * Returns:    vnode_t                         The current working directory
+ *                                             for this context
+ *
+ * Notes:      The function first attempts to obtain the current directory
+ *             from the thread, and if it is not present there, falls back
+ *             to obtaining it from the process instead.  If it can't be
+ *             obtained from either place, we return NULLVP.
+ */
+vnode_t
+vfs_context_get_cwd(vfs_context_t ctx)
+{
+       vnode_t cwd = NULLVP;
+
+       if (ctx != NULL && ctx->vc_thread != NULL) {
+               uthread_t uth = get_bsdthread_info(ctx->vc_thread);
+               proc_t proc;
+
+               /*
+                * Get the cwd from the thread; if there isn't one, get it
+                * from the process, instead.
+                */
+               cwd = uth->uu_cdir;
+
+               if (cwd) {
+                       if ((vnode_get(cwd) != 0)) {
+                               cwd = NULLVP;
+                       }
+               } else if ((proc = (proc_t)get_bsdthreadtask_info(ctx->vc_thread)) != NULL &&
+                   proc->p_fd != NULL) {
+                       proc_fdlock(proc);
+                       cwd = proc->p_fd->fd_cdir;
+                       if (cwd && (vnode_get(cwd) != 0)) {
+                               cwd = NULLVP;
+                       }
+                       proc_fdunlock(proc);
+               }
+       }
+
+       return cwd;
+}
+
 /*
  * vfs_context_create
  *
index aaaf2fbb1270052804fdafbbc38b2b42d7b80044..5fe5b737a4e2482dc7ac2ddfc4337a1e14083d87 100644 (file)
@@ -113,7 +113,7 @@ static int vfs_getrealpath(const char * path, char * realpath, size_t bufsize, v
 #endif
 
 static int              lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, int vbusyflags, vfs_context_t ctx);
-static int              lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx);
+static int              handle_symlink_for_namei(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx);
 static int              lookup_authorize_search(vnode_t dp, struct componentname *cnp, int dp_authorized_in_cache, vfs_context_t ctx);
 static void             lookup_consider_update_cache(vnode_t dvp, vnode_t vp, struct componentname *cnp, int nc_generation);
 static int              lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int rdonly,
@@ -167,6 +167,8 @@ namei(struct nameidata *ndp)
 {
        struct filedesc *fdp;   /* pointer to file descriptor state */
        struct vnode *dp;       /* the directory we are searching */
+       struct vnode *rootdir_with_usecount = NULLVP;
+       struct vnode *startdir_with_usecount = NULLVP;
        struct vnode *usedvp = ndp->ni_dvp;  /* store pointer to vp in case we must loop due to
                                              *                                          heavy vnode pressure */
        u_long cnpflags = ndp->ni_cnd.cn_flags; /* store in case we have to restore after loop */
@@ -348,16 +350,80 @@ retry_copy:
 
        /*
         * determine the starting point for the translation.
+        *
+        * We may need to upto 2 usecounts on vnodes before starting the translation
+        * We need to have a usecount on the root directory for the process
+        * for the entire duration of the lookup. This is because symlink
+        * translation can restart translation at / if a symlink is encountered.
+        *
+        * For the duration of this lookup at rootdir for this lookup is the one
+        * we fetch now under the proc_fdlock even the if the proc rootdir changes
+        * once we let go of the proc_fdlock.
+        *
+        * In the future we may consider holding off a chroot till we complete
+        * in progress lookups.
+        *
+        * If the starting directory is not the process rootdir then we need
+        * a usecount on the starting directory as well for the duration of the
+        * lookup.
+        *
+        * Getting an addtional usecount involves first getting an iocount under
+        * the lock that ensures that a usecount is on the directory. Once we
+        * get an iocount we can release the lock and we will be free to get a
+        * usecount without the vnode getting recycled. Once we get the usecount
+        * we can release the icoount which we used to get our usecount.
         */
+       proc_fdlock(p);
+
        if ((ndp->ni_rootdir = fdp->fd_rdir) == NULLVP) {
                if (!(fdp->fd_flags & FD_CHROOT)) {
                        ndp->ni_rootdir = rootvnode;
+               } else {
+                       proc_fdunlock(p);
+                       /* This should be a panic */
+                       printf("proc is chrooted but does not have a root directory set\n");
+                       error = ENOENT;
+                       goto error_out;
                }
        }
+
+       /*
+        * We have the proc_fdlock here so we still have a usecount
+        * on ndp->ni_rootdir.
+        *
+        * However we need to get our own usecount on it in order to
+        * ensure that the vnode isn't recycled to something else.
+        *
+        * Note : It's fine if the vnode is force reclaimed but with
+        * a usecount it won't be reused until we release the reference.
+        *
+        * In order to get that usecount however, we need to first
+        * get non blocking iocount since we'll be doing this under
+        * the proc_fdlock.
+        */
+       if (vnode_get(ndp->ni_rootdir) != 0) {
+               proc_fdunlock(p);
+               error = ENOENT;
+               goto error_out;
+       }
+
+       proc_fdunlock(p);
+
+       /* Now we can safely get our own ref on ni_rootdir */
+       error = vnode_ref_ext(ndp->ni_rootdir, O_EVTONLY, 0);
+       vnode_put(ndp->ni_rootdir);
+       if (error) {
+               ndp->ni_rootdir = NULLVP;
+               goto error_out;
+       }
+
+       rootdir_with_usecount = ndp->ni_rootdir;
+
        cnp->cn_nameptr = cnp->cn_pnbuf;
 
        ndp->ni_usedvp = NULLVP;
 
+       bool dp_needs_put = false;
        if (*(cnp->cn_nameptr) == '/') {
                while (*(cnp->cn_nameptr) == '/') {
                        cnp->cn_nameptr++;
@@ -368,13 +434,40 @@ retry_copy:
                dp = ndp->ni_dvp;
                ndp->ni_usedvp = dp;
        } else {
-               dp = vfs_context_cwd(ctx);
+               dp = vfs_context_get_cwd(ctx);
+               if (dp) {
+                       dp_needs_put = true;
+               }
        }
 
        if (dp == NULLVP || (dp->v_lflag & VL_DEAD)) {
+               if (dp_needs_put) {
+                       vnode_put(dp);
+                       dp_needs_put = false;
+               }
+               dp = NULLVP;
                error = ENOENT;
                goto error_out;
        }
+
+       if (dp != rootdir_with_usecount) {
+               error = vnode_ref_ext(dp, O_EVTONLY, 0);
+               if (error) {
+                       if (dp_needs_put) {
+                               vnode_put(dp);
+                               dp_needs_put = false;
+                       }
+                       dp = NULLVP;
+                       goto error_out;
+               }
+               startdir_with_usecount = dp;
+       }
+
+       if (dp_needs_put) {
+               vnode_put(dp);
+               dp_needs_put = false;
+       }
+
        ndp->ni_dvp = NULLVP;
        ndp->ni_vp  = NULLVP;
 
@@ -395,6 +488,7 @@ retry_copy:
 #endif
 
                ndp->ni_startdir = dp;
+               dp = NULLVP;
 
                if ((error = lookup(ndp))) {
                        goto error_out;
@@ -404,15 +498,46 @@ retry_copy:
                 * Check for symbolic link
                 */
                if ((cnp->cn_flags & ISSYMLINK) == 0) {
+                       if (startdir_with_usecount) {
+                               vnode_rele_ext(startdir_with_usecount, O_EVTONLY, 0);
+                               startdir_with_usecount = NULLVP;
+                       }
+                       if (rootdir_with_usecount) {
+                               vnode_rele_ext(rootdir_with_usecount, O_EVTONLY, 0);
+                               rootdir_with_usecount = NULLVP;
+                       }
                        return 0;
                }
 
 continue_symlink:
-               /* Gives us a new path to process, and a starting dir */
-               error = lookup_handle_symlink(ndp, &dp, ctx);
+               /*
+                * Gives us a new path to process, and a starting dir (with an iocount).
+                * The iocount is needed to take a usecount on the vnode returned
+                * (if it is not a vnode we already have a usecount on).
+                */
+               error = handle_symlink_for_namei(ndp, &dp, ctx);
                if (error != 0) {
                        break;
                }
+
+               if (dp == ndp->ni_rootdir && startdir_with_usecount) {
+                       vnode_rele_ext(startdir_with_usecount, O_EVTONLY, 0);
+                       startdir_with_usecount = NULLVP;
+               } else if (dp != startdir_with_usecount) {
+                       if (startdir_with_usecount) {
+                               vnode_rele_ext(startdir_with_usecount, O_EVTONLY, 0);
+                               startdir_with_usecount = NULLVP;
+                       }
+                       error = vnode_ref_ext(dp, O_EVTONLY, 0);
+                       if (error) {
+                               vnode_put(dp);
+                               dp = NULLVP;
+                               goto error_out;
+                       }
+                       startdir_with_usecount = dp;
+               }
+               /* iocount not required on dp anymore */
+               vnode_put(dp);
        }
        /*
         * only come here if we fail to handle a SYMLINK...
@@ -436,6 +561,15 @@ error_out:
        ndp->ni_vp = NULLVP;
        ndp->ni_dvp = NULLVP;
 
+       if (startdir_with_usecount) {
+               vnode_rele_ext(startdir_with_usecount, O_EVTONLY, 0);
+               startdir_with_usecount = NULLVP;
+       }
+       if (rootdir_with_usecount) {
+               vnode_rele_ext(rootdir_with_usecount, O_EVTONLY, 0);
+               rootdir_with_usecount = NULLVP;
+       }
+
 #if CONFIG_VOLFS
        /*
         * Deal with volfs fallout.
@@ -1530,10 +1664,10 @@ out:
 
 /*
  * Takes ni_vp and ni_dvp non-NULL.  Returns with *new_dp set to the location
- * at which to start a lookup with a resolved path, and all other iocounts dropped.
+ * at which to start a lookup with a resolved path and with an iocount.
  */
 static int
-lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx)
+handle_symlink_for_namei(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx)
 {
        int error;
        char *cp;               /* pointer into pathname argument */
@@ -1624,17 +1758,18 @@ lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx)
        /*
         * starting point for 'relative'
         * symbolic link path
+        *
+        * If the starting point is not the root we have to return an iocounted
+        * dp to namei so we don't release the icoount here.
         */
        dp = ndp->ni_dvp;
+       ndp->ni_dvp = NULLVP;
 
        /*
         * get rid of references returned via 'lookup'
         */
        vnode_put(ndp->ni_vp);
-       vnode_put(ndp->ni_dvp); /* ALWAYS have a dvp for a symlink */
-
        ndp->ni_vp = NULLVP;
-       ndp->ni_dvp = NULLVP;
 
        /*
         * Check if symbolic link restarts us at the root
@@ -1644,9 +1779,20 @@ lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx)
                        cnp->cn_nameptr++;
                        ndp->ni_pathlen--;
                }
+               vnode_put(dp);
                if ((dp = ndp->ni_rootdir) == NULLVP) {
                        return ENOENT;
                }
+               if (vnode_get(dp) != 0) {
+                       return ENOENT;
+               }
+       }
+
+       if (dp == NULLVP || (dp->v_lflag & VL_DEAD)) {
+               if (dp) {
+                       vnode_put(dp);
+               }
+               return ENOENT;
        }
 
        *new_dp = dp;
index 8a3cdcc476c6b4ce55a4f4dedf823f1dee275d6f..f8304f9ad51c5a0d22eca0ebda38f575e1426823 100644 (file)
@@ -234,6 +234,8 @@ static void record_vp(vnode_t vp, int count);
 extern int bootarg_no_vnode_jetsam;    /* from bsd_init.c default value is 0 */
 #endif /* CONFIG_JETSAM && (DEVELOPMENT || DEBUG) */
 
+extern int bootarg_no_vnode_drain;    /* from bsd_init.c default value is 0 */
+
 boolean_t root_is_CF_drive = FALSE;
 
 #if CONFIG_TRIGGERS
@@ -250,6 +252,7 @@ TAILQ_HEAD(ragelst, vnode) vnode_rage_list;     /* vnode rapid age list */
 struct timeval rage_tv;
 int     rage_limit = 0;
 int     ragevnodes = 0;
+static  int vfs_unmountall_started = 0;
 
 #define RAGE_LIMIT_MIN  100
 #define RAGE_TIME_LIMIT 5
@@ -3311,6 +3314,8 @@ vfs_unmountall(void)
        int mounts, sec = 1;
        struct unmount_info ui;
 
+       vfs_unmountall_started = 1;
+
 retry:
        ui.u_errs = ui.u_busy = 0;
        vfs_iterate(VFS_ITERATE_CB_DROPREF | VFS_ITERATE_TAIL_FIRST, unmount_callback, &ui);
@@ -3454,6 +3459,7 @@ vfs_init_io_attributes(vnode_t devvp, mount_t mp)
        if (VNOP_IOCTL(devvp, DKIOCISVIRTUAL, (caddr_t)&isvirtual, 0, ctx) == 0) {
                if (isvirtual) {
                        mp->mnt_kern_flag |= MNTK_VIRTUALDEV;
+                       mp->mnt_flag |= MNT_REMOVABLE;
                }
        }
        if (VNOP_IOCTL(devvp, DKIOCISSOLIDSTATE, (caddr_t)&isssd, 0, ctx) == 0) {
@@ -3640,10 +3646,7 @@ vfs_init_io_attributes(vnode_t devvp, mount_t mp)
        if (VNOP_IOCTL(devvp, DKIOCGETLOCATION, (caddr_t)&location, 0, ctx) == 0) {
                if (location & DK_LOCATION_EXTERNAL) {
                        mp->mnt_ioflags |= MNT_IOFLAGS_PERIPHERAL_DRIVE;
-                       /* This must be called after MNTK_VIRTUALDEV has been determined via DKIOCISVIRTUAL */
-                       if ((MNTK_VIRTUALDEV & mp->mnt_kern_flag)) {
-                               mp->mnt_flag |= MNT_REMOVABLE;
-                       }
+                       mp->mnt_flag |= MNT_REMOVABLE;
                }
        }
 
@@ -4972,7 +4975,25 @@ vnode_drain(vnode_t vp)
        vp->v_owner = current_thread();
 
        while (vp->v_iocount > 1) {
-               msleep(&vp->v_iocount, &vp->v_lock, PVFS, "vnode_drain", NULL);
+               if (bootarg_no_vnode_drain) {
+                       struct timespec ts = {.tv_sec = 10, .tv_nsec = 0};
+                       int error;
+
+                       if (vfs_unmountall_started) {
+                               ts.tv_sec = 1;
+                       }
+
+                       error = msleep(&vp->v_iocount, &vp->v_lock, PVFS, "vnode_drain_with_timeout", &ts);
+
+                       /* Try to deal with leaked iocounts under bootarg and shutting down */
+                       if (vp->v_iocount > 1 && error == EWOULDBLOCK &&
+                           ts.tv_sec == 1 && vp->v_numoutput == 0) {
+                               vp->v_iocount = 1;
+                               break;
+                       }
+               } else {
+                       msleep(&vp->v_iocount, &vp->v_lock, PVFS, "vnode_drain", NULL);
+               }
        }
 
        vp->v_lflag &= ~VL_DRAIN;
index 838ad8c12170b4c506e444675b85292670a831a5..727d2dafcaf2cb6ce38bc6c0135dd10502c37d7d 100644 (file)
@@ -1966,11 +1966,10 @@ checkdirs_callback(proc_t p, void * arg)
        vnode_t olddp = cdrp->olddp;
        vnode_t newdp = cdrp->newdp;
        struct filedesc *fdp;
-       vnode_t tvp;
-       vnode_t fdp_cvp;
-       vnode_t fdp_rvp;
-       int cdir_changed = 0;
-       int rdir_changed = 0;
+       vnode_t new_cvp = newdp;
+       vnode_t new_rvp = newdp;
+       vnode_t old_cvp = NULL;
+       vnode_t old_rvp = NULL;
 
        /*
         * XXX Also needs to iterate each thread in the process to see if it
@@ -1978,36 +1977,68 @@ checkdirs_callback(proc_t p, void * arg)
         * XXX update that as well.
         */
 
+       /*
+        * First, with the proc_fdlock held, check to see if we will need
+        * to do any work.  If not, we will get out fast.
+        */
        proc_fdlock(p);
        fdp = p->p_fd;
-       if (fdp == (struct filedesc *)0) {
+       if (fdp == NULL ||
+           (fdp->fd_cdir != olddp && fdp->fd_rdir != olddp)) {
                proc_fdunlock(p);
                return PROC_RETURNED;
        }
-       fdp_cvp = fdp->fd_cdir;
-       fdp_rvp = fdp->fd_rdir;
        proc_fdunlock(p);
 
-       if (fdp_cvp == olddp) {
-               vnode_ref(newdp);
-               tvp = fdp->fd_cdir;
-               fdp_cvp = newdp;
-               cdir_changed = 1;
-               vnode_rele(tvp);
+       /*
+        * Ok, we will have to do some work.  Always take two refs
+        * because we might need that many.  We'll dispose of whatever
+        * we ended up not using.
+        */
+       if (vnode_ref(newdp) != 0) {
+               return PROC_RETURNED;
        }
-       if (fdp_rvp == olddp) {
-               vnode_ref(newdp);
-               tvp = fdp->fd_rdir;
-               fdp_rvp = newdp;
-               rdir_changed = 1;
-               vnode_rele(tvp);
+       if (vnode_ref(newdp) != 0) {
+               vnode_rele(newdp);
+               return PROC_RETURNED;
        }
-       if (cdir_changed || rdir_changed) {
-               proc_fdlock(p);
-               fdp->fd_cdir = fdp_cvp;
-               fdp->fd_rdir = fdp_rvp;
-               proc_fdunlock(p);
+
+       /*
+        * Now do the work.  Note: we dropped the proc_fdlock, so we
+        * have to do all of the checks again.
+        */
+       proc_fdlock(p);
+       fdp = p->p_fd;
+       if (fdp != NULL) {
+               if (fdp->fd_cdir == olddp) {
+                       old_cvp = olddp;
+                       fdp->fd_cdir = newdp;
+                       new_cvp = NULL;
+               }
+               if (fdp->fd_rdir == olddp) {
+                       old_rvp = olddp;
+                       fdp->fd_rdir = newdp;
+                       new_rvp = NULL;
+               }
        }
+       proc_fdunlock(p);
+
+       /*
+        * Dispose of any references that are no longer needed.
+        */
+       if (old_cvp != NULL) {
+               vnode_rele(old_cvp);
+       }
+       if (old_rvp != NULL) {
+               vnode_rele(old_rvp);
+       }
+       if (new_cvp != NULL) {
+               vnode_rele(new_cvp);
+       }
+       if (new_rvp != NULL) {
+               vnode_rele(new_rvp);
+       }
+
        return PROC_RETURNED;
 }
 
index a7b0417b2f3b1cff83c9ce5d962e25e9d3b659bd..76d6a4ef35ae064d0b5e4a46d62709175c9a8247 100644 (file)
@@ -523,7 +523,9 @@ enum {
        kIOPMOverTemp                 = (1 << 9),// system dangerously hot
        kIOPMClamshellOpened          = (1 << 10),// clamshell was opened
        kIOPMDWOverTemp               = (1 << 11),// DarkWake thermal limits exceeded.
-       kIOPMPowerButtonUp            = (1 << 12) // Power button up
+       kIOPMPowerButtonUp            = (1 << 12),// Power button up
+       kIOPMProModeEngaged           = (1 << 13),// Fans entered 'ProMode'
+       kIOPMProModeDisengaged        = (1 << 14) // Fans exited 'ProMode'
 };
 
 
@@ -788,6 +790,8 @@ enum {
 #define kIOPMSettingTimeZoneOffsetKey               "TimeZoneOffsetSeconds"
 #define kIOPMSettingMobileMotionModuleKey           "MobileMotionModule"
 #define kIOPMSettingGraphicsSwitchKey               "GPUSwitch"
+#define kIOPMSettingProModeControl                  "ProModeControl"
+#define kIOPMSettingProModeDefer                    "ProModeDefer"
 
 // Setting controlling drivers can register to receive scheduled wake data
 // Either in "CF seconds" type, or structured calendar data in a formatted
index 1a549c0c40f917ce816d27e8a4a5d75fe5f459e9..ee04a50724eacc04bc8200fca20f97089dcde411 100644 (file)
@@ -106,6 +106,9 @@ enum {
 #define kIOPMMessageLaunchBootSpinDump \
                 iokit_family_msg(sub_iokit_powermanagement, 0x440)
 
+#define kIOPMMessageProModeStateChange \
+                iokit_family_msg(sub_iokit_powermanagement, 0x450)
+
 /* @enum SystemSleepReasons
  * @abstract The potential causes for system sleep as logged in the system event record.
  */
@@ -677,6 +680,11 @@ enum {
 #define kIOPMWakeEventReasonKey             "Reason"
 #define kIOPMWakeEventDetailsKey            "Details"
 
+/* kIOPMFeatureProModeKey
+ * Feature published if ProMode is supported
+ */
+#define kIOPMFeatureProModeKey              "ProMode"
+
 /*****************************************************************************
  *
  * Wake event flags reported to IOPMrootDomain::claimSystemWakeEvent()
index 18441e5ce4f4ccbb73947d198704ebb45bd902e3..664890442079d120689ff871d3ef0ff025968d0d 100644 (file)
@@ -317,7 +317,9 @@ IOInterruptController::enableInterrupt(IOService *nub, int source)
                }
                if (vector->interruptDisabledHard) {
                        vector->interruptDisabledHard = 0;
-
+#if !defined(__i386__) && !defined(__x86_64__)
+                       OSMemoryBarrier();
+#endif
                        enableVector(vectorNumber, vector);
                }
        }
index 69e82fbec2cda217b0e02ff63c98cfea3c982ba3..9cf95e50124615c9641a5aa0163c22b5df6cec32 100644 (file)
@@ -874,6 +874,10 @@ IOTrackingLeakScan(void * refcon)
 
                for (ptrIdx = 0; ptrIdx < (page_size / sizeof(uintptr_t)); ptrIdx++) {
                        ptr = ((uintptr_t *)vphysaddr)[ptrIdx];
+#if defined(HAS_APPLE_PAC)
+                       // strip possible ptrauth signature from candidate data pointer
+                       ptr = (uintptr_t)ptrauth_strip((void*)ptr, ptrauth_key_process_independent_data);
+#endif /* defined(HAS_APPLE_PAC) */
 
                        for (lim = count, baseIdx = 0; lim; lim >>= 1) {
                                inst = instances[baseIdx + (lim >> 1)];
index 0920486dad527481418f06e142304d5c6361440d..624d7a812a1934be0739f8b40f7bccc9200c55f0 100644 (file)
@@ -1349,7 +1349,8 @@ static const OSSymbol * gIOPMUserIsActiveKey;
 //
 //******************************************************************************
 
-#define kRootDomainSettingsCount        17
+#define kRootDomainSettingsCount           19
+#define kRootDomainNoPublishSettingsCount  3
 
 bool
 IOPMrootDomain::start( IOService * nub )
@@ -1400,7 +1401,16 @@ IOPMrootDomain::start( IOService * nub )
                OSSymbol::withCString(kIOPMSettingMobileMotionModuleKey),
                OSSymbol::withCString(kIOPMSettingGraphicsSwitchKey),
                OSSymbol::withCString(kIOPMStateConsoleShutdown),
-               gIOPMSettingSilentRunningKey
+               OSSymbol::withCString(kIOPMSettingProModeControl),
+               OSSymbol::withCString(kIOPMSettingProModeDefer),
+               gIOPMSettingSilentRunningKey,
+       };
+
+       const OSSymbol  *noPublishSettingsArr[kRootDomainNoPublishSettingsCount] =
+       {
+               OSSymbol::withCString(kIOPMSettingProModeControl),
+               OSSymbol::withCString(kIOPMSettingProModeDefer),
+               gIOPMSettingSilentRunningKey,
        };
 
        PE_parse_boot_argn("darkwake", &gDarkWakeFlags, sizeof(gDarkWakeFlags));
@@ -1518,7 +1528,9 @@ IOPMrootDomain::start( IOService * nub )
        // List of PM settings that should not automatically publish itself
        // as a feature when registered by a listener.
        noPublishPMSettings = OSArray::withObjects(
-               (const OSObject **) &gIOPMSettingSilentRunningKey, 1, 0);
+               (const OSObject **)noPublishSettingsArr,
+               kRootDomainNoPublishSettingsCount,
+               0);
 
        fPMSettingsDict = OSDictionary::withCapacity(5);
        preventIdleSleepList = OSSet::withCapacity(8);
@@ -7883,6 +7895,18 @@ IOPMrootDomain::handlePowerNotification( UInt32 msg )
                        evaluatePolicy( kStimulusDarkWakeEvaluate );
                }
        }
+
+       if (msg & kIOPMProModeEngaged) {
+               int newState = 1;
+               DLOG("ProModeEngaged\n");
+               messageClient(kIOPMMessageProModeStateChange, systemCapabilityNotifier, &newState, sizeof(newState));
+       }
+
+       if (msg & kIOPMProModeDisengaged) {
+               int newState = 0;
+               DLOG("ProModeDisengaged\n");
+               messageClient(kIOPMMessageProModeStateChange, systemCapabilityNotifier, &newState, sizeof(newState));
+       }
 }
 
 //******************************************************************************
index 7e5abfb411192ba1c22386ef40e7223d301af932..bd20598ef709fc92d8eefbbdec4284ae9506ad88 100644 (file)
@@ -3023,8 +3023,12 @@ IOService::terminateWorker( IOOptionBits options )
                                }
                                if (doPhase2) {
                                        if (kIOServiceNeedWillTerminate & victim->__state[1]) {
-                                               _workLoopAction((IOWorkLoop::Action) &actionWillStop,
-                                                   victim, (void *)(uintptr_t) options, NULL );
+                                               if (NULL == victim->reserved->uvars) {
+                                                       _workLoopAction((IOWorkLoop::Action) &actionWillStop,
+                                                           victim, (void *)(uintptr_t) options);
+                                               } else {
+                                                       actionWillStop(victim, options, NULL, NULL, NULL);
+                                               }
                                        }
 
                                        OSArray * notifiers;
index 4ad8eb5bd8fe8a9dc9e520c0ccdc5e5c3b2e6401..a436a8cb2a5836aa6d72fb94e5b893bbe50e8b16 100644 (file)
@@ -453,6 +453,7 @@ IMPL(IOBufferMemoryDescriptor, Create)
                return kIOReturnBadArgument;
        }
        options &= kIOMemoryDirectionOutIn;
+       options |= kIOMemoryKernelUserShared;
        bmd = IOBufferMemoryDescriptor::inTaskWithOptions(
                kernel_task, options, capacity, alignment);
 
index a4b9b1c3ffda98b8bbd6defce102c39ebeaf9599..46aaceb9ed3f42dfcbf391ba12675b2540590f65 100644 (file)
@@ -38,6 +38,7 @@ OS_ENUM(os_reason_libsystem_code, uint64_t,
     OS_REASON_LIBSYSTEM_CODE_WORKLOOP_OWNERSHIP_LEAK = 1,
     OS_REASON_LIBSYSTEM_CODE_FAULT = 2,     /* generated by os_log_fault */
     OS_REASON_LIBSYSTEM_CODE_SECINIT_INITIALIZER = 3,
+    OS_REASON_LIBSYSTEM_CODE_PTHREAD_CORRUPTION = 4,
     );
 
 #ifndef KERNEL
index 513543952e18765f98af1291081698aa42edcb49..b4e12fba60075955e81a4a0897cde7167e7d5481 100644 (file)
@@ -55,18 +55,8 @@ kpersona_dealloc(uid_t id)
 int
 kpersona_get(uid_t *id)
 {
-       /* persona is a process-static identifier: cache it in a global */
-       static uid_t p_id = PERSONA_ID_NONE;
-       if (p_id == PERSONA_ID_NONE) {
-               int ret = 0;
-               size_t idlen = 1;
-               ret = __persona(PERSONA_OP_GET, 0, NULL, &p_id, &idlen, NULL);
-               if (ret != 0) {
-                       return ret;
-               }
-       }
-       *id = p_id;
-       return 0;
+       size_t idlen = 1;
+       return __persona(PERSONA_OP_GET, 0, NULL, id, &idlen, NULL);
 }
 
 int
index 7851e0ed345db3c1f8527054867cb27b7960f6e9..6f4d332fc5154f481612a6da851477b395736789 100644 (file)
@@ -87,7 +87,7 @@ LEXT(machine_load_context)
        and             r2, r2, #3                                                      // Extract cpu number
        orr             r1, r1, r2                                                      // 
        mcr             p15, 0, r1, c13, c0, 3                          // Write TPIDRURO
-       ldr             r1, [r0, TH_CTH_DATA]
+       mov             r1, #0
        mcr             p15, 0, r1, c13, c0, 2                          // Write TPIDRURW
        mov             r7, #0                                                          // Clear frame pointer
        ldr             r3, [r0, TH_KSTACKPTR]                          // Get kernel stack top
@@ -146,7 +146,8 @@ LEXT(Switch_context)
        bne             switch_threads                                          // No need to save GPR/NEON state if we are
 #if     __ARM_VFP__
        mov             r1, r2                                                          // r2 will be clobbered by the save, so preserve it
-       add             r3, r0, ACT_KVFP                                        // Get the kernel VFP save area for the old thread...
+       ldr             r3, [r0, TH_KSTACKPTR]                          // Get old kernel stack top
+       add             r3, r3, SS_KVFP                                         // Get the kernel VFP save area for the old thread...
        save_vfp_registers                                                      // ...and save our VFP state to it
        mov             r2, r1                                                          // Restore r2 (the new thread pointer)
 #endif /* __ARM_VFP__ */
@@ -161,13 +162,14 @@ switch_threads:
        and             r5, r5, #3                                                      // Extract cpu number
        orr             r6, r6, r5
        mcr             p15, 0, r6, c13, c0, 3                          // Write TPIDRURO
-       ldr             r6, [r2, TH_CTH_DATA]
+       mov             r6, #0
        mcr             p15, 0, r6, c13, c0, 2                          // Write TPIDRURW
 load_reg:      
        add             r3, r3, SS_R4
        ldmia   r3!, {r4-r14}                                           // Restore new thread status
 #if     __ARM_VFP__
-       add             r3, r2, ACT_KVFP                                        // Get the kernel VFP save area for the new thread...
+       ldr             r3, [r2, TH_KSTACKPTR]                          // get kernel stack top
+       add             r3, r3, SS_KVFP                                         // Get the kernel VFP save area for the new thread...
        load_vfp_registers                                                      // ...and load the saved state
 #endif /* __ARM_VFP__ */
        bx              lr                                                                      // Return
@@ -183,7 +185,8 @@ load_reg:
 LEXT(Shutdown_context)
        mrc             p15, 0, r9, c13, c0, 4                          // Read TPIDRPRW
 #if __ARM_VFP__
-       add             r3, r9, ACT_KVFP                                        // Get the kernel VFP save area for the current thread...
+       ldr             r3, [r9, TH_KSTACKPTR]                          // get kernel stack top
+       add             r3, r3, SS_KVFP                                         // Get the kernel VFP save area for the current thread...
        save_vfp_registers                                                      // ...and save our VFP state to it
 #endif
        ldr             r3, [r9, TH_KSTACKPTR]                          // Get kernel stack top
@@ -207,7 +210,8 @@ LEXT(Idle_context)
 
        mrc             p15, 0, r9, c13, c0, 4                          // Read TPIDRPRW
 #if    __ARM_VFP__
-       add             r3, r9, ACT_KVFP                                        // Get the kernel VFP save area for the current thread...
+       ldr             r3, [r9, TH_KSTACKPTR]                          // get kernel stack top
+       add             r3, r3, SS_KVFP                                         // Get the kernel VFP save area for the current thread...
        save_vfp_registers                                                      // ...and save our VFP state to it
 #endif
        ldr             r3, [r9, TH_KSTACKPTR]                          // Get kernel stack top
@@ -233,7 +237,8 @@ LEXT(Idle_load_context)
        add             r3, r3, SS_R4
        ldmia   r3!, {r4-r14}                                           // Restore new thread status
 #if __ARM_VFP__
-       add             r3, r9, ACT_KVFP                                        // Get the kernel VFP save area for the current thread...
+       ldr             r3, [r9, TH_KSTACKPTR]                          // get kernel stack top
+       add             r3, r3, SS_KVFP                                         // Get the kernel VFP save area for the current thread...
        load_vfp_registers                                                      // ...and load the saved state
 #endif
        bx              lr                                                                      // Return
index 5ebbf990bae4b46900de84afa1037a882e0dc05d..585d713036c4a2a11d75029a825ea7e6286d6230 100644 (file)
@@ -131,11 +131,9 @@ main(
        DECLARE("ACT_TASK", offsetof(struct thread, task));
        DECLARE("ACT_PCBDATA", offsetof(struct thread, machine.PcbData));
 #if __ARM_VFP__
-       DECLARE("ACT_UVFP", offsetof(struct thread, machine.uVFPdata));
-       DECLARE("ACT_KVFP", offsetof(struct thread, machine.kVFPdata));
+       DECLARE("ACT_UVFP", offsetof(struct thread, machine.PcbData.VFPdata));
 #endif
        DECLARE("TH_CTH_SELF", offsetof(struct thread, machine.cthread_self));
-       DECLARE("TH_CTH_DATA", offsetof(struct thread, machine.cthread_data));
        DECLARE("ACT_PCBDATA_PC", offsetof(struct thread, machine.PcbData.pc));
        DECLARE("ACT_PCBDATA_R0", offsetof(struct thread, machine.PcbData.r[0]));
        DECLARE("ACT_PREEMPT_CNT", offsetof(struct thread, machine.preemption_count));
@@ -176,6 +174,7 @@ main(
        DECLARE("SS_EXC", offsetof(struct arm_saved_state, exception));
 
 #if __ARM_VFP__
+       DECLARE("SS_KVFP", offsetof(struct arm_saved_state, VFPdata));
        DECLARE("VSS_SIZE", sizeof(struct arm_vfpsaved_state));
        DECLARE("VSS_FPSCR", offsetof(struct arm_vfpsaved_state, fpscr));
        DECLARE("VSS_FPEXC", offsetof(struct arm_vfpsaved_state, fpexc));
index 7b7f41411223e359360aed9da6b867b3c54dd0ea..9d1896393b852f57e67fe0443bf9849e0b431cd5 100644 (file)
@@ -41,7 +41,7 @@ LEXT(machine_set_current_thread)
        and             r2, r2, #3                                                      // Extract cpu number
        orr             r1, r1, r2                                                      //
        mcr             p15, 0, r1, c13, c0, 3                          // Write TPIDRURO
-       ldr             r1, [r0, TH_CTH_DATA]
+       mov             r1, #0
        mcr             p15, 0, r1, c13, c0, 2                          // Write TPIDRURW
        bx              lr
 
index c03e518b63e191038e290e64d26e1dde85abdcc7..b78a1be8ea4a176dddfe2895758e94a06abc87f1 100644 (file)
@@ -143,7 +143,6 @@ machine_thread_create(
        }
        thread->machine.preemption_count = 0;
        thread->machine.cthread_self = 0;
-       thread->machine.cthread_data = 0;
 #if    __ARM_USER_PROTECT__
        {
        struct pmap *new_pmap = vm_map_pmap(task->map);
@@ -252,6 +251,7 @@ machine_stack_attach(
        savestate->r[7] = 0x0UL;
        savestate->r[9] = (uint32_t) NULL;
        savestate->cpsr = PSR_SVC_MODE | PSR_INTMASK;
+       vfp_state_initialize(&savestate->VFPdata);
        machine_stack_attach_kprintf("thread = %x pc = %x, sp = %x\n", thread, savestate->lr, savestate->sp);
 }
 
index 93921c0eb467ba6b577c7fdde4ef506686aa649a..92b3562d71e05a4916eae54683b3498a48ae243b 100644 (file)
@@ -1395,11 +1395,15 @@ static void pmap_tte_deallocate(
 #ifdef __ARM64_PMAP_SUBPAGE_L1__
 #if (__ARM_VMSA__ <= 7)
 #error This is not supported for old-style page tables
-#endif
+#endif /* (__ARM_VMSA__ <= 7) */
 #define PMAP_ROOT_ALLOC_SIZE (((ARM_TT_L1_INDEX_MASK >> ARM_TT_L1_SHIFT) + 1) * sizeof(tt_entry_t))
-#else
+#else /* !defined(__ARM64_PMAP_SUBPAGE_L1__) */
+#if (__ARM_VMSA__ <= 7)
+#define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES * 2)
+#else /* (__ARM_VMSA__ > 7) */
 #define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES)
-#endif
+#endif /* (__ARM_VMSA__ > 7) */
+#endif /* !defined(__ARM64_PMAP_SUBPAGE_L1__) */
 
 const unsigned int arm_hardware_page_size = ARM_PGBYTES;
 const unsigned int arm_pt_desc_size = sizeof(pt_desc_t);
@@ -3458,15 +3462,10 @@ pmap_bootstrap(
 #if (__ARM_VMSA__ == 7)
        kernel_pmap->tte_index_max = 4 * NTTES;
 #endif
-       kernel_pmap->prev_tte = (tt_entry_t *) NULL;
        kernel_pmap->hw_asid = 0;
        kernel_pmap->sw_asid = 0;
 
        PMAP_LOCK_INIT(kernel_pmap);
-#if     (__ARM_VMSA__ == 7)
-       simple_lock_init(&kernel_pmap->tt1_lock, 0);
-       kernel_pmap->cpu_ref = 0;
-#endif
        memset((void *) &kernel_pmap->stats, 0, sizeof(kernel_pmap->stats));
 
        /* allocate space for and initialize the bookkeeping structures */
@@ -3887,13 +3886,14 @@ pmap_create_options_internal(
        p->ledger = ledger;
 
        PMAP_LOCK_INIT(p);
-#if     (__ARM_VMSA__ == 7)
-       simple_lock_init(&p->tt1_lock, 0);
-       p->cpu_ref = 0;
-#endif
        memset((void *) &p->stats, 0, sizeof(p->stats));
 
        p->tt_entry_free = (tt_entry_t *)0;
+       tte_index_max = PMAP_ROOT_ALLOC_SIZE / sizeof(tt_entry_t);
+
+#if     (__ARM_VMSA__ == 7)
+       p->tte_index_max = tte_index_max;
+#endif
 
        p->tte = pmap_tt1_allocate(p, PMAP_ROOT_ALLOC_SIZE, 0);
        if (!(p->tte)) {
@@ -3903,13 +3903,6 @@ pmap_create_options_internal(
        p->ttep = ml_static_vtop((vm_offset_t)p->tte);
        PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(p), VM_KERNEL_ADDRHIDE(p->min), VM_KERNEL_ADDRHIDE(p->max), p->ttep);
 
-#if (__ARM_VMSA__ == 7)
-       tte_index_max = p->tte_index_max = NTTES;
-#else
-       tte_index_max = (PMAP_ROOT_ALLOC_SIZE / sizeof(tt_entry_t));
-#endif
-       p->prev_tte = (tt_entry_t *) NULL;
-
        /* nullify the translation table */
        for (i = 0; i < tte_index_max; i++) {
                p->tte[i] = ARM_TTE_TYPE_FAULT;
@@ -4088,15 +4081,6 @@ pmap_destroy_internal(
        queue_remove(&map_pmap_list, pmap, pmap_t, pmaps);
        pmap_simple_unlock(&pmaps_lock);
 
-#if (__ARM_VMSA__ == 7)
-       if (pmap->cpu_ref != 0) {
-               panic("%s: cpu_ref=%u, "
-                   "pmap=%p",
-                   __FUNCTION__, pmap->cpu_ref,
-                   pmap);
-       }
-#endif /* (__ARM_VMSA__ == 7) */
-
        pmap_trim_self(pmap);
 
        /*
@@ -4144,13 +4128,6 @@ pmap_destroy_internal(
                pmap->ttep = 0;
        }
 
-#if (__ARM_VMSA__ == 7)
-       if (pmap->prev_tte) {
-               pmap_tt1_deallocate(pmap, pmap->prev_tte, PMAP_ROOT_ALLOC_SIZE, 0);
-               pmap->prev_tte = (tt_entry_t *) NULL;
-       }
-#endif /* (__ARM_VMSA__ == 7) */
-
        assert((tt_free_entry_t*)pmap->tt_entry_free == NULL);
 
        pmap_get_pt_ops(pmap)->flush_tlb_async(pmap);
@@ -5210,10 +5187,7 @@ pmap_switch_internal(
        asid_index >>= 1;
 #endif
 
-#if     (__ARM_VMSA__ == 7)
-       assert(not_in_kdp);
-       pmap_simple_lock(&pmap->tt1_lock);
-#else
+#if     (__ARM_VMSA__ > 7)
        pmap_t           last_nested_pmap = cpu_data_ptr->cpu_nested_pmap;
 #endif
 
@@ -5257,10 +5231,6 @@ pmap_switch_internal(
                os_atomic_inc(&pmap_asid_flushes, relaxed);
 #endif
        }
-
-#if     (__ARM_VMSA__ == 7)
-       pmap_simple_unlock(&pmap->tt1_lock);
-#endif
 }
 
 void
@@ -7046,65 +7016,20 @@ pmap_expand(
        tt_entry_t              *tt_p;
        unsigned int    i;
 
-       while (tte_index(pmap, pt_attr, v) >= pmap->tte_index_max) {
-               tte_p = pmap_tt1_allocate(pmap, 2 * ARM_PGBYTES, ((options & PMAP_OPTIONS_NOWAIT)? PMAP_TT_ALLOCATE_NOWAIT : 0));
-               if (tte_p == (tt_entry_t *)0) {
-                       return KERN_RESOURCE_SHORTAGE;
-               }
-
-               PMAP_LOCK(pmap);
-               if (pmap->tte_index_max > NTTES) {
-                       pmap_tt1_deallocate(pmap, tte_p, 2 * ARM_PGBYTES, PMAP_TT_DEALLOCATE_NOBLOCK);
-                       PMAP_UNLOCK(pmap);
-                       break;
-               }
-
-               pmap_simple_lock(&pmap->tt1_lock);
-               for (i = 0; i < pmap->tte_index_max; i++) {
-                       tte_p[i] = pmap->tte[i];
-               }
-               for (i = NTTES; i < 2 * NTTES; i++) {
-                       tte_p[i] = ARM_TTE_TYPE_FAULT;
-               }
-
-               FLUSH_PTE_RANGE(tte_p, tte_p + (2 * NTTES)); // DMB
-
-               /* Order is important here, so that pmap_switch_user_ttb() sees things
-                * in the correct sequence.
-                * --update of pmap->tte[p] must happen prior to updating pmap->tte_index_max,
-                *   separated by at least a DMB, so that context switch does not see a 1 GB
-                *   L1 table with a 2GB size.
-                * --update of pmap->tte[p] must also happen prior to setting pmap->prev_tte,
-                *   separated by at least a DMB, so that context switch does not see an L1
-                *   table to be freed without also seeing its replacement.*/
-
-               tt_entry_t *prev_tte = pmap->tte;
-
-               pmap->tte = tte_p;
-               pmap->ttep = ml_static_vtop((vm_offset_t)pmap->tte);
-
-               __builtin_arm_dmb(DMB_ISH);
-
-               pmap->tte_index_max = 2 * NTTES;
-               pmap->stamp = os_atomic_inc(&pmap_stamp, relaxed);
-
-               for (i = 0; i < NTTES; i++) {
-                       prev_tte[i] = ARM_TTE_TYPE_FAULT;
-               }
-
-               /* We need a strong flush here because a TLB flush will be
-                * issued from pmap_switch_user_ttb() as soon as this pmap
-                * is no longer active on any CPU.  We need to ensure all
-                * prior stores to the TTE region have retired before that. */
-               FLUSH_PTE_RANGE_STRONG(prev_tte, prev_tte + NTTES); // DSB
-               pmap->prev_tte = prev_tte;
+#if DEVELOPMENT || DEBUG
+       /*
+        * We no longer support root level expansion; panic in case something
+        * still attempts to trigger it.
+        */
+       i = tte_index(pmap, pt_attr, v);
 
-               pmap_simple_unlock(&pmap->tt1_lock);
-               PMAP_UNLOCK(pmap);
-               if (current_pmap() == pmap) {
-                       pmap_set_pmap(pmap, current_thread());
-               }
+       if (i >= pmap->tte_index_max) {
+               panic("%s: index out of range, index=%u, max=%u, "
+                   "pmap=%p, addr=%p, options=%u, level=%u",
+                   __func__, i, pmap->tte_index_max,
+                   pmap, (void *)v, options, level);
        }
+#endif /* DEVELOPMENT || DEBUG */
 
        if (level == 1) {
                return KERN_SUCCESS;
@@ -7823,33 +7748,8 @@ pmap_switch_user_ttb_internal(
        cpu_data_ptr = pmap_get_cpu_data();
 
 #if     (__ARM_VMSA__ == 7)
-
-       if ((cpu_data_ptr->cpu_user_pmap != PMAP_NULL)
-           && (cpu_data_ptr->cpu_user_pmap != kernel_pmap)) {
-               unsigned int    c;
-               tt_entry_t      *tt_entry = cpu_data_ptr->cpu_user_pmap->prev_tte;
-
-               c = os_atomic_dec(&cpu_data_ptr->cpu_user_pmap->cpu_ref, acq_rel);
-               if ((c == 0) && (tt_entry != NULL)) {
-                       /* We saved off the old 1-page tt1 in pmap_expand() in case other cores were still using it.
-                        * Now that the user pmap's cpu_ref is 0, we should be able to safely free it.*/
-
-                       cpu_data_ptr->cpu_user_pmap->prev_tte = NULL;
-#if !__ARM_USER_PROTECT__
-                       set_mmu_ttb(kernel_pmap->ttep);
-                       set_context_id(kernel_pmap->hw_asid);
-#endif
-                       /* Now that we can guarantee the old 1-page L1 table is no longer active on any CPU,
-                        * flush any cached intermediate translations that may point to it.  Note that to be truly
-                        * safe from prefetch-related issues, this table PA must have been cleared from TTBR0 prior
-                        * to this call.  __ARM_USER_PROTECT__ effectively guarantees that for all current configurations.*/
-                       flush_mmu_tlb_asid(cpu_data_ptr->cpu_user_pmap->hw_asid);
-                       pmap_tt1_deallocate(cpu_data_ptr->cpu_user_pmap, tt_entry, ARM_PGBYTES, PMAP_TT_DEALLOCATE_NOBLOCK);
-               }
-       }
        cpu_data_ptr->cpu_user_pmap = pmap;
        cpu_data_ptr->cpu_user_pmap_stamp = pmap->stamp;
-       os_atomic_inc(&pmap->cpu_ref, acq_rel);
 
 #if     MACH_ASSERT && __ARM_USER_PROTECT__
        {
@@ -10429,7 +10329,7 @@ pmap_max_32bit_offset(
        if (option == ARM_PMAP_MAX_OFFSET_DEFAULT) {
                max_offset_ret = arm_pmap_max_offset_default;
        } else if (option == ARM_PMAP_MAX_OFFSET_MIN) {
-               max_offset_ret = 0x66000000;
+               max_offset_ret = 0x80000000;
        } else if (option == ARM_PMAP_MAX_OFFSET_MAX) {
                max_offset_ret = VM_MAX_ADDRESS;
        } else if (option == ARM_PMAP_MAX_OFFSET_DEVICE) {
@@ -10438,7 +10338,7 @@ pmap_max_32bit_offset(
                } else if (max_mem > 0x20000000) {
                        max_offset_ret = 0x80000000;
                } else {
-                       max_offset_ret = 0x66000000;
+                       max_offset_ret = 0x80000000;
                }
        } else if (option == ARM_PMAP_MAX_OFFSET_JUMBO) {
                max_offset_ret = 0x80000000;
index 3d45185eb340994630f6536c7a79f04b37956d15..92d4a167a1cc989d68c580d4c28a2fcdc165ee28 100644 (file)
@@ -276,8 +276,8 @@ extern pmap_paddr_t mmu_uvtop(vm_offset_t va);
 #define PMAP_GC_WAIT            2
 
 #if DEVELOPMENT || DEBUG
-#define pmap_cs_log(msg, args...) printf("PMAP_CS: " msg "\n", args)
 #define pmap_cs_log_h(msg, args...) { if(pmap_cs_log_hacks) printf("PMAP_CS: " msg "\n", args); }
+#define pmap_cs_log pmap_cs_log_h
 
 #define PMAP_CS_EXCEPTION_LIST_HACK 1
 
@@ -309,7 +309,6 @@ struct pmap {
        struct pmap_statistics  stats;          /* map statistics */
        queue_chain_t           pmaps;                  /* global list of pmaps */
        tt_entry_t                      *tt_entry_free; /* free translation table entries */
-       tt_entry_t                      *prev_tte;              /* previous translation table */
        struct pmap                     *nested_pmap;   /* nested pmap */
        vm_map_address_t        nested_region_grand_addr;
        vm_map_address_t        nested_region_subord_addr;
@@ -319,8 +318,6 @@ struct pmap {
        unsigned int            *nested_region_asid_bitmap;
 
 #if (__ARM_VMSA__ <= 7)
-       decl_simple_lock_data(, tt1_lock);       /* lock on tt1 */
-       unsigned int            cpu_ref;                /* number of cpus using pmap */
        unsigned int            tte_index_max;          /* max tte index in translation table entries */
 #endif
 
index bdfcf5a6ba531e72109febe813d29e58ffb6d4ce..2f35514a37c24e4cab0e469871df5e68cb32016d 100644 (file)
@@ -511,8 +511,7 @@ machine_thread_state_initialize(
        savestate->cpsr = PSR_USERDFLT;
 
 #if __ARM_VFP__
-       vfp_state_initialize(&thread->machine.uVFPdata);
-       vfp_state_initialize(&thread->machine.kVFPdata);
+       vfp_state_initialize(&thread->machine.PcbData.VFPdata);
 #endif
 
        thread->machine.DebugData = NULL;
@@ -561,15 +560,14 @@ machine_thread_dup(
 #endif
 
        target->machine.cthread_self = self->machine.cthread_self;
-       target->machine.cthread_data = self->machine.cthread_data;
 
        self_saved_state = &self->machine.PcbData;
        target_saved_state = &target->machine.PcbData;
        bcopy(self_saved_state, target_saved_state, sizeof(struct arm_saved_state));
 
 #if    __ARM_VFP__
-       self_vfp_state = &self->machine.uVFPdata;
-       target_vfp_state = &target->machine.uVFPdata;
+       self_vfp_state = &self->machine.PcbData.VFPdata;
+       target_vfp_state = &target->machine.PcbData.VFPdata;
        bcopy(self_vfp_state, target_vfp_state, sizeof(struct arm_vfpsaved_state));
 #endif
 
@@ -626,7 +624,7 @@ struct arm_vfpsaved_state *
 find_user_vfp(
              thread_t thread)
 {
-       return &thread->machine.uVFPdata;
+       return &thread->machine.PcbData.VFPdata;
 }
 #endif /* __ARM_VFP__ */
 
index f17ae451dad75e997cdf1bc7250f047be86013b4..3782d0f265fa496d0ef6640aa9c80b9e8a93af8c 100644 (file)
 #include <arm/proc_reg.h>
 #endif
 
-#if __ARM_VFP__
-
-#define VFPSAVE_ALIGN  16
-#define VFPSAVE_ATTRIB __attribute__((aligned (VFPSAVE_ALIGN)))
-#define THREAD_ALIGN   VFPSAVE_ALIGN
-
-/*
- * vector floating point saved state
- */
-struct arm_vfpsaved_state {
-       uint32_t r[64];
-       uint32_t fpscr;
-       uint32_t fpexc;
-};
-#endif
-
 struct perfcontrol_state {
        uint64_t opaque[8] __attribute__((aligned(8)));
 };
@@ -103,26 +87,31 @@ typedef struct arm_saved_state machine_thread_kernel_state;
 #include <kern/thread_kernel_state.h>
 
 struct machine_thread {
+#if __ARM_USER_PROTECT__
+       unsigned int              uptw_ttc;
+       unsigned int              uptw_ttb;
+       unsigned int              kptw_ttb;
+       unsigned int              asid;
+#endif
+
 #if __arm64__
        arm_context_t *           contextData;             /* allocated user context */
        arm_saved_state_t *       upcb;                    /* pointer to user GPR state */
        arm_neon_saved_state_t *  uNeon;                   /* pointer to user VFP state */
 #elif __arm__
        struct arm_saved_state    PcbData;
-#if __ARM_VFP__
-       struct arm_vfpsaved_state uVFPdata VFPSAVE_ATTRIB;
-       struct arm_vfpsaved_state kVFPdata VFPSAVE_ATTRIB;
-#endif /* __ARM_VFP__ */
-
 #else
 #error Unknown arch
 #endif
 
-#if __ARM_USER_PROTECT__
-       unsigned int              uptw_ttc;
-       unsigned int              uptw_ttb;
-       unsigned int              kptw_ttb;
-       unsigned int              asid;
+#if defined(__arm__) && defined(__ARM_VFP__)
+       // for packing reasons chtread_self and DebugData
+       // are inside the the PcbData when __ARM_VFP__ is set
+#define DebugData    PcbData.VFPpadding_DebugData
+#define cthread_self PcbData.VFPpadding_cthread_self
+#else
+       arm_debug_state_t        *DebugData;
+       vm_address_t              cthread_self;               /* for use of cthread package */
 #endif
 
        vm_offset_t               kstackptr;                  /* top of kernel stack */
@@ -139,10 +128,6 @@ struct machine_thread {
        uint8_t                   machine_thread_flags;
 #endif /* __ARM_SMP__ */
 
-       arm_debug_state_t *       DebugData;
-       mach_vm_address_t         cthread_self;               /* for use of cthread package */
-       mach_vm_address_t         cthread_data;               /* for use of cthread package */
-
        struct perfcontrol_state  perfctrl_state;
 #if __arm64__
        uint64_t                  energy_estimate_nj;
index 06aeca99ef4cd839fcb7784f4211c869b17182dc..48e0879b1cdc352efcb6440a37fb75e87cd52f6b 100644 (file)
        and             $2, $2, #(MACHDEP_CPUNUM_MASK)
        orr             $2, $1, $2                                                      // Save new cthread/cpu to TPIDRRO_EL0
        msr             TPIDRRO_EL0, $2
-       ldr             $1, [$0, TH_CTH_DATA]                           // Get new cthread data pointer
-       msr             TPIDR_EL0, $1                                           // Save data pointer to TPIDRRW_EL0
+       msr             TPIDR_EL0, xzr
        /* ARM64_TODO Reserve x18 until we decide what to do with it */
        mov             x18, $1                                                         // ... and trash reserved x18
 .endmacro
index 8dfdecddaedf90221c0655fa52dfeff252e0105d..c47c6ab1a82bffc166ceae7d1b350864701fa1dc 100644 (file)
@@ -122,7 +122,6 @@ main(int     argc,
        /* These fields are being added on demand */
        DECLARE("ACT_CONTEXT", offsetof(struct thread, machine.contextData));
        DECLARE("TH_CTH_SELF", offsetof(struct thread, machine.cthread_self));
-       DECLARE("TH_CTH_DATA", offsetof(struct thread, machine.cthread_data));
        DECLARE("ACT_PREEMPT_CNT", offsetof(struct thread, machine.preemption_count));
        DECLARE("ACT_CPUDATAP", offsetof(struct thread, machine.CpuDatap));
        DECLARE("ACT_DEBUGDATA", offsetof(struct thread, machine.DebugData));
index 5edaf67f1711d32ee4b51684c5885492e286d857..f9162a819ba186cd98c92df289e033e3749069f7 100644 (file)
@@ -904,8 +904,7 @@ exception_return_unint_tpidr_x3:
        mov             sp, x21                                         // Reload the pcb pointer
 
        /* ARM64_TODO Reserve x18 until we decide what to do with it */
-       ldr             x0, [x3, TH_CTH_DATA]                           // Load cthread data pointer
-       str             x0, [sp, SS64_X18]                                      // and use it to trash x18
+       str             xzr, [sp, SS64_X18]
 
 #if __ARM_KERNEL_PROTECT__
        /*
index 495cc7c03d9fb24197330c33df18a0b553020f68..8b7ea91f4c4a738b3334969852015ed33c4802b2 100644 (file)
@@ -185,6 +185,8 @@ bzero_phys_nc(addr64_t src64, vm_size_t bytes)
        bzero_phys(src64, bytes);
 }
 
+extern void *secure_memset(void *, int, size_t);
+
 /* Zero bytes starting at a physical address */
 void
 bzero_phys(addr64_t src, vm_size_t bytes)
@@ -202,15 +204,14 @@ bzero_phys(addr64_t src, vm_size_t bytes)
 
                boolean_t use_copy_window = !pmap_valid_address(src);
                pn = (ppnum_t)(src >> PAGE_SHIFT);
+               wimg_bits = pmap_cache_attributes(pn);
 #if !defined(__ARM_COHERENT_IO__) && !__ARM_PTE_PHYSMAP__
                count = PAGE_SIZE - offset;
-               wimg_bits = pmap_cache_attributes(pn);
                if ((wimg_bits & VM_WIMG_MASK) != VM_WIMG_DEFAULT) {
                        use_copy_window = TRUE;
                }
 #else
                if (use_copy_window) {
-                       wimg_bits = pmap_cache_attributes(pn);
                        count = PAGE_SIZE - offset;
                }
 #endif
@@ -229,7 +230,17 @@ bzero_phys(addr64_t src, vm_size_t bytes)
                        count = bytes;
                }
 
-               bzero(buf, count);
+               switch (wimg_bits & VM_WIMG_MASK) {
+               case VM_WIMG_DEFAULT:
+               case VM_WIMG_WCOMB:
+               case VM_WIMG_INNERWBACK:
+               case VM_WIMG_WTHRU:
+                       bzero(buf, count);
+                       break;
+               default:
+                       /* 'dc zva' performed by bzero is not safe for device memory */
+                       secure_memset((void*)buf, 0, count);
+               }
 
                if (use_copy_window) {
                        pmap_unmap_cpu_windows_copy(index);
index 64fd61152e622e2d9c860b6991de65c9741e780c..5dc6cde731106afb85392ade5d0c675874c826f0 100644 (file)
@@ -195,7 +195,6 @@ LEXT(set_mmu_ttb)
 LEXT(set_aux_control)
        msr             ACTLR_EL1, x0
        // Synchronize system
-       dsb             sy
        isb             sy
        ret
 
index 4303f45fe81c5967db4fadb0327b8d38416aebb4..3bf15f95afad2aa6781d9c84b2a1402411be896d 100644 (file)
@@ -158,7 +158,6 @@ machine_thread_create(thread_t thread,
        }
        thread->machine.preemption_count = 0;
        thread->machine.cthread_self = 0;
-       thread->machine.cthread_data = 0;
 #if defined(HAS_APPLE_PAC)
        thread->machine.rop_pid = task->rop_pid;
        thread->machine.disable_user_jop = task->disable_user_jop;
index 9026e45f11cf0d6ba27e9237022ec1b77d8f43df..8523c57ab2c2eda1ef81e7f13a82e656577fc579 100644 (file)
@@ -270,42 +270,6 @@ lt_upgrade_downgrade_rw()
        lck_rw_done(&lt_rwlock);
 }
 
-const int limit = 1000000;
-static int lt_stress_local_counters[MAX_CPUS];
-
-static void
-lt_stress_hw_lock()
-{
-       int local_counter = 0;
-
-       uint cpuid = current_processor()->cpu_id;
-
-       kprintf("%s>cpu %d starting\n", __FUNCTION__, cpuid);
-
-       hw_lock_lock(&lt_hw_lock, LCK_GRP_NULL);
-       lt_counter++;
-       local_counter++;
-       hw_lock_unlock(&lt_hw_lock);
-
-       while (lt_counter < lt_target_done_threads) {
-               ;
-       }
-
-       kprintf("%s>cpu %d started\n", __FUNCTION__, cpuid);
-
-       while (lt_counter < limit) {
-               hw_lock_lock(&lt_hw_lock, LCK_GRP_NULL);
-               if (lt_counter < limit) {
-                       lt_counter++;
-                       local_counter++;
-               }
-               hw_lock_unlock(&lt_hw_lock);
-       }
-
-       lt_stress_local_counters[cpuid] = local_counter;
-
-       kprintf("%s>final counter %d cpu %d incremented the counter %d times\n", __FUNCTION__, lt_counter, cpuid, local_counter);
-}
 
 static void
 lt_grab_hw_lock()
@@ -595,29 +559,6 @@ lt_thread(void *arg, wait_result_t wres __unused)
        OSIncrementAtomic((volatile SInt32*) &lt_done_threads);
 }
 
-static void
-lt_bound_thread(void *arg, wait_result_t wres __unused)
-{
-       void (*func)(void) = (void (*)(void))arg;
-
-       int cpuid = OSIncrementAtomic((volatile SInt32 *)&lt_cpu_bind_id);
-
-       processor_t processor = processor_list;
-       while ((processor != NULL) && (processor->cpu_id != cpuid)) {
-               processor = processor->processor_list;
-       }
-
-       if (processor != NULL) {
-               thread_bind(processor);
-       }
-
-       thread_block(THREAD_CONTINUE_NULL);
-
-       func();
-
-       OSIncrementAtomic((volatile SInt32*) &lt_done_threads);
-}
-
 static void
 lt_start_lock_thread(thread_continue_t func)
 {
@@ -631,18 +572,6 @@ lt_start_lock_thread(thread_continue_t func)
 }
 
 
-static void
-lt_start_lock_thread_bound(thread_continue_t func)
-{
-       thread_t thread;
-       kern_return_t kr;
-
-       kr = kernel_thread_start(lt_bound_thread, func, &thread);
-       assert(kr == KERN_SUCCESS);
-
-       thread_deallocate(thread);
-}
-
 static kern_return_t
 lt_test_locks()
 {
@@ -833,29 +762,6 @@ lt_test_locks()
        lt_wait_for_lock_test_threads();
        T_EXPECT_EQ_UINT(lt_counter, LOCK_TEST_ITERATIONS * lt_target_done_threads, NULL);
 
-       /* HW locks stress test */
-       T_LOG("Running HW locks stress test with hw_lock_lock()");
-       extern unsigned int real_ncpus;
-       lt_reset();
-       lt_target_done_threads = real_ncpus;
-       for (processor_t processor = processor_list; processor != NULL; processor = processor->processor_list) {
-               lt_start_lock_thread_bound(lt_stress_hw_lock);
-       }
-       lt_wait_for_lock_test_threads();
-       bool starvation = false;
-       uint total_local_count = 0;
-       for (processor_t processor = processor_list; processor != NULL; processor = processor->processor_list) {
-               starvation = starvation || (lt_stress_local_counters[processor->cpu_id] < 10);
-               total_local_count += lt_stress_local_counters[processor->cpu_id];
-       }
-       if (total_local_count != lt_counter) {
-               T_FAIL("Lock failure\n");
-       } else if (starvation) {
-               T_FAIL("Lock starvation found\n");
-       } else {
-               T_PASS("HW locks stress test with hw_lock_lock()");
-       }
-
 
        /* HW locks: trylocks */
        T_LOG("Running test with hw_lock_try()");
index 10c7aa5671739b8b56bc014cc6e6a69380884d7e..66a551ffed01e8d1a37d0bb5e7540d22c47c4680 100644 (file)
@@ -1632,6 +1632,9 @@ typedef enum {
 #define APCTL_EL1_KernKeyEn  (1ULL << 2)
 #endif
 
+#define ACTLR_EL1_DisHWP_OFFSET  3
+#define ACTLR_EL1_DisHWP_MASK    (1ULL << ACTLR_EL1_DisHWP_OFFSET)
+#define ACTLR_EL1_DisHWP         ACTLR_EL1_DisHWP_MASK
 
 
 #if defined(HAS_APPLE_PAC)
index 41d213e69033432226072226d9032b67eedab1ad..28f87b0a1e034f89d73719a998fb49e43e1d1f83 100644 (file)
@@ -1336,7 +1336,6 @@ machine_thread_dup(thread_t self,
        struct arm_saved_state *target_saved_state;
 
        target->machine.cthread_self = self->machine.cthread_self;
-       target->machine.cthread_data = self->machine.cthread_data;
 
        self_saved_state = self->machine.upcb;
        target_saved_state = target->machine.upcb;
index 040f331cee70d2cacf7e37ca7327bc4aaa410030..a3d283be44e678f95013ee61c7eaa81f580bfcd0 100644 (file)
@@ -152,6 +152,9 @@ static _Atomic uint32_t inflight_corpses;
 unsigned long  total_corpses_created = 0;
 boolean_t corpse_enabled_config = TRUE;
 
+/* bootarg to generate corpse with size up to max_footprint_mb */
+boolean_t corpse_threshold_system_limit = FALSE;
+
 /* bootarg to turn on corpse forking for EXC_RESOURCE */
 int exc_via_corpse_forking = 1;
 
@@ -189,6 +192,11 @@ corpses_init()
        if (PE_parse_boot_argn("corpse_for_fatal_memkill", &fatal_memkill, sizeof(fatal_memkill))) {
                corpse_for_fatal_memkill = fatal_memkill;
        }
+#if DEBUG || DEVELOPMENT
+       if (PE_parse_boot_argn("-corpse_threshold_system_limit", &corpse_threshold_system_limit, sizeof(corpse_threshold_system_limit))) {
+               corpse_threshold_system_limit = TRUE;
+       }
+#endif /* DEBUG || DEVELOPMENT */
 }
 
 /*
index a479eaea8fe601497a8a8a7a6fe091bda21c66bd..1f69f62f34b6b67df771512eb6cbe40673b175d9 100644 (file)
@@ -125,6 +125,7 @@ typedef struct {
        uint64_t plbt[MAX_TRACE_BTFRAMES];
 } plrecord_t;
 
+#if     DEVELOPMENT || DEBUG
 typedef enum {
        IOTRACE_PHYS_READ = 1,
        IOTRACE_PHYS_WRITE,
@@ -145,7 +146,17 @@ typedef struct {
        uint64_t        backtrace[MAX_TRACE_BTFRAMES];
 } iotrace_entry_t;
 
-#if     DEVELOPMENT || DEBUG
+typedef struct {
+       int             vector;                 /* Vector number of interrupt */
+       thread_t        curthread;              /* Current thread at the time of the interrupt */
+       uint64_t        interrupted_pc;
+       int             curpl;                  /* Current preemption level */
+       int             curil;                  /* Current interrupt level */
+       uint64_t        start_time_abs;
+       uint64_t        duration;
+       uint64_t        backtrace[MAX_TRACE_BTFRAMES];
+} traptrace_entry_t;
+
 #define DEFAULT_IOTRACE_ENTRIES_PER_CPU (64)
 #define IOTRACE_MAX_ENTRIES_PER_CPU (256)
 extern volatile int mmiotrace_enabled;
@@ -154,7 +165,14 @@ extern int iotrace_entries_per_cpu;
 extern int *iotrace_next;
 extern iotrace_entry_t **iotrace_ring;
 
-extern void init_iotrace_bufs(int cpucnt, int entries_per_cpu);
+#define TRAPTRACE_INVALID_INDEX (~0U)
+#define DEFAULT_TRAPTRACE_ENTRIES_PER_CPU (16)
+#define TRAPTRACE_MAX_ENTRIES_PER_CPU (256)
+extern volatile int traptrace_enabled;
+extern int traptrace_generators;
+extern int traptrace_entries_per_cpu;
+extern int *traptrace_next;
+extern traptrace_entry_t **traptrace_ring;
 #endif /* DEVELOPMENT || DEBUG */
 
 /*
@@ -490,11 +508,12 @@ current_cpu_datap(void)
  */
 #if DEVELOPMENT || DEBUG
 static inline void
-rbtrace_bt(uint64_t *rets, int maxframes, cpu_data_t *cdata)
+rbtrace_bt(uint64_t *rets, int maxframes, cpu_data_t *cdata, uint64_t frameptr, bool use_cursp)
 {
        extern uint32_t         low_intstack[];         /* bottom */
        extern uint32_t         low_eintstack[];        /* top */
        extern char             mp_slave_stack[];
+       int                     btidx = 0;
 
        uint64_t kstackb, kstackt;
 
@@ -502,16 +521,21 @@ rbtrace_bt(uint64_t *rets, int maxframes, cpu_data_t *cdata)
         * element. This will also indicate if we were unable to
         * trace further up the stack for some reason
         */
-       __asm__ volatile ("leaq 1f(%%rip), %%rax; mov %%rax, %0\n1:"
-             : "=m" (rets[0])
-             :
-             : "rax");
-
+       if (use_cursp) {
+               __asm__ volatile ("leaq 1f(%%rip), %%rax; mov %%rax, %0\n1:"
+                     : "=m" (rets[btidx++])
+                     :
+                     : "rax");
+       }
 
        thread_t cplthread = cdata->cpu_active_thread;
        if (cplthread) {
                uintptr_t csp;
-               __asm__ __volatile__ ("movq %%rsp, %0": "=r" (csp):);
+               if (use_cursp == true) {
+                       __asm__ __volatile__ ("movq %%rsp, %0": "=r" (csp):);
+               } else {
+                       csp = frameptr;
+               }
                /* Determine which stack we're on to populate stack bounds.
                 * We don't need to trace across stack boundaries for this
                 * routine.
@@ -539,10 +563,10 @@ rbtrace_bt(uint64_t *rets, int maxframes, cpu_data_t *cdata)
                }
 
                if (__probable(kstackb && kstackt)) {
-                       uint64_t *cfp = (uint64_t *) __builtin_frame_address(0);
+                       uint64_t *cfp = (uint64_t *) frameptr;
                        int rbbtf;
 
-                       for (rbbtf = 1; rbbtf < maxframes; rbbtf++) {
+                       for (rbbtf = btidx; rbbtf < maxframes; rbbtf++) {
                                if (((uint64_t)cfp == 0) || (((uint64_t)cfp < kstackb) || ((uint64_t)cfp > kstackt))) {
                                        rets[rbbtf] = 0;
                                        continue;
@@ -577,7 +601,7 @@ pltrace_internal(boolean_t enable)
 
        cdata->cpu_plri = cplrecord;
 
-       rbtrace_bt(plbts, MAX_TRACE_BTFRAMES - 1, cdata);
+       rbtrace_bt(plbts, MAX_TRACE_BTFRAMES - 1, cdata, (uint64_t)__builtin_frame_address(0), true);
 }
 
 extern int plctrace_enabled;
@@ -610,8 +634,55 @@ iotrace(iotrace_type_e type, uint64_t vaddr, uint64_t paddr, int size, uint64_t
        iotrace_next[cpu_num] = ((nextidx + 1) >= iotrace_entries_per_cpu) ? 0 : (nextidx + 1);
 
        rbtrace_bt(&cur_iotrace_ring[nextidx].backtrace[0],
-           MAX_TRACE_BTFRAMES - 1, cdata);
+           MAX_TRACE_BTFRAMES - 1, cdata, (uint64_t)__builtin_frame_address(0), true);
+}
+
+static inline uint32_t
+traptrace_start(int vecnum, uint64_t ipc, uint64_t sabs, uint64_t frameptr)
+{
+       cpu_data_t *cdata;
+       int cpu_num, nextidx;
+       traptrace_entry_t *cur_traptrace_ring;
+
+       if (__improbable(traptrace_enabled == 0 || traptrace_generators == 0)) {
+               return TRAPTRACE_INVALID_INDEX;
+       }
+
+       assert(ml_get_interrupts_enabled() == FALSE);
+       cdata = current_cpu_datap();
+       cpu_num = cdata->cpu_number;
+       nextidx = traptrace_next[cpu_num];
+       /* prevent nested interrupts from clobbering this record */
+       traptrace_next[cpu_num] = ((nextidx + 1) >= traptrace_entries_per_cpu) ? 0 : (nextidx + 1);
+
+       cur_traptrace_ring = traptrace_ring[cpu_num];
+
+       cur_traptrace_ring[nextidx].vector = vecnum;
+       cur_traptrace_ring[nextidx].curthread = current_thread();
+       cur_traptrace_ring[nextidx].interrupted_pc = ipc;
+       cur_traptrace_ring[nextidx].curpl = cdata->cpu_preemption_level;
+       cur_traptrace_ring[nextidx].curil = cdata->cpu_interrupt_level;
+       cur_traptrace_ring[nextidx].start_time_abs = sabs;
+       cur_traptrace_ring[nextidx].duration = ~0ULL;
+
+       rbtrace_bt(&cur_traptrace_ring[nextidx].backtrace[0],
+           MAX_TRACE_BTFRAMES - 1, cdata, frameptr, false);
+
+       assert(nextidx <= 0xFFFF);
+
+       return ((unsigned)cpu_num << 16) | nextidx;
+}
+
+static inline void
+traptrace_end(uint32_t index, uint64_t eabs)
+{
+       if (index != TRAPTRACE_INVALID_INDEX) {
+               traptrace_entry_t *ttentp = &traptrace_ring[index >> 16][index & 0xFFFF];
+
+               ttentp->duration = eabs - ttentp->start_time_abs;
+       }
 }
+
 #endif /* DEVELOPMENT || DEBUG */
 
 static inline void
index 37ce39b2a8cd4768d10f7ec362ebb4b505c41c43..7396b1b5fc22f0cc31989384f9d40fee3933f6a4 100644 (file)
@@ -55,6 +55,7 @@ extern cpu_data_t cpshadows[];
 
 #if DEVELOPMENT || DEBUG
 void iotrace_init(int ncpus);
+void traptrace_init(int ncpus);
 #endif /* DEVELOPMENT || DEBUG */
 
 
@@ -151,6 +152,7 @@ cpu_topology_sort(int ncpus)
 
 #if DEVELOPMENT || DEBUG
        iotrace_init(ncpus);
+       traptrace_init(ncpus);
 #endif /* DEVELOPMENT || DEBUG */
 
        /*
@@ -316,65 +318,117 @@ int iotrace_entries_per_cpu = 0;
 int *iotrace_next;
 iotrace_entry_t **iotrace_ring;
 
-void
-init_iotrace_bufs(int cpucnt, int entries_per_cpu)
+volatile int traptrace_enabled = 1;
+int traptrace_generators = 0;
+int traptrace_entries_per_cpu = 0;
+int *traptrace_next;
+traptrace_entry_t **traptrace_ring;
+
+static void
+init_trace_bufs(int cpucnt, int entries_per_cpu, void ***ring, int entry_size,
+    int **next_array, int *allocated_entries_per_cpu, int *allocated_generator_count)
 {
        int i;
 
-       iotrace_next = kalloc_tag(cpucnt * sizeof(int), VM_KERN_MEMORY_DIAG);
-       if (__improbable(iotrace_next == NULL)) {
-               iotrace_generators = 0;
+       *next_array = kalloc_tag(cpucnt * sizeof(int), VM_KERN_MEMORY_DIAG);
+       if (__improbable(*next_array == NULL)) {
+               *allocated_generator_count = 0;
                return;
        } else {
-               bzero(iotrace_next, cpucnt * sizeof(int));
+               bzero(*next_array, cpucnt * sizeof(int));
        }
 
-       iotrace_ring = kalloc_tag(cpucnt * sizeof(iotrace_entry_t *), VM_KERN_MEMORY_DIAG);
-       if (__improbable(iotrace_ring == NULL)) {
-               kfree(iotrace_next, cpucnt * sizeof(int));
-               iotrace_generators = 0;
+       *ring = kalloc_tag(cpucnt * sizeof(void *), VM_KERN_MEMORY_DIAG);
+       if (__improbable(*ring == NULL)) {
+               kfree(*next_array, cpucnt * sizeof(int));
+               *next_array = NULL;
+               *allocated_generator_count = 0;
                return;
        }
        for (i = 0; i < cpucnt; i++) {
-               iotrace_ring[i] = kalloc_tag(entries_per_cpu * sizeof(iotrace_entry_t), VM_KERN_MEMORY_DIAG);
-               if (__improbable(iotrace_ring[i] == NULL)) {
-                       kfree(iotrace_next, cpucnt * sizeof(int));
-                       iotrace_next = NULL;
+               (*ring)[i] = kalloc_tag(entries_per_cpu * entry_size, VM_KERN_MEMORY_DIAG);
+               if (__improbable((*ring)[i] == NULL)) {
+                       kfree(*next_array, cpucnt * sizeof(int));
+                       *next_array = NULL;
                        for (int j = 0; j < i; j++) {
-                               kfree(iotrace_ring[j], entries_per_cpu * sizeof(iotrace_entry_t));
+                               kfree((*ring)[j], entries_per_cpu * entry_size);
                        }
-                       kfree(iotrace_ring, cpucnt * sizeof(iotrace_entry_t *));
-                       iotrace_ring = NULL;
+                       kfree(*ring, cpucnt * sizeof(void *));
+                       *ring = NULL;
                        return;
                }
-               bzero(iotrace_ring[i], entries_per_cpu * sizeof(iotrace_entry_t));
+               bzero((*ring)[i], entries_per_cpu * entry_size);
        }
 
-       iotrace_entries_per_cpu = entries_per_cpu;
-       iotrace_generators = cpucnt;
+       *allocated_entries_per_cpu = entries_per_cpu;
+       *allocated_generator_count = cpucnt;
 }
 
-void
-iotrace_init(int ncpus)
+
+static void
+init_iotrace_bufs(int cpucnt, int entries_per_cpu)
 {
-       int iot, epc;
-       int entries_per_cpu;
+       init_trace_bufs(cpucnt, entries_per_cpu, (void ***)&iotrace_ring, sizeof(iotrace_entry_t),
+           &iotrace_next, &iotrace_entries_per_cpu, &iotrace_generators);
+}
+
+static void
+init_traptrace_bufs(int cpucnt, int entries_per_cpu)
+{
+       init_trace_bufs(cpucnt, entries_per_cpu, (void ***)&traptrace_ring, sizeof(traptrace_entry_t),
+           &traptrace_next, &traptrace_entries_per_cpu, &traptrace_generators);
+}
 
-       if (PE_parse_boot_argn("iotrace", &iot, sizeof(iot))) {
-               mmiotrace_enabled = iot;
+static void
+gentrace_configure_from_bootargs(const char *ena_prop, int *ena_valp, const char *epc_prop,
+    int *epcp, int max_epc, int def_epc, int override)
+{
+       if (kern_feature_override(override)) {
+               *ena_valp = 0;
        }
 
-       if (mmiotrace_enabled == 0) {
+       (void) PE_parse_boot_argn(ena_prop, ena_valp, sizeof(*ena_valp));
+
+       if (*ena_valp == 0) {
                return;
        }
 
-       if (PE_parse_boot_argn("iotrace_epc", &epc, sizeof(epc)) &&
-           epc >= 1 && epc <= IOTRACE_MAX_ENTRIES_PER_CPU) {
-               entries_per_cpu = epc;
-       } else {
-               entries_per_cpu = DEFAULT_IOTRACE_ENTRIES_PER_CPU;
+       if (PE_parse_boot_argn(epc_prop, epcp, sizeof(*epcp)) &&
+           (*epcp < 1 || *epcp > max_epc)) {
+               *epcp = def_epc;
        }
+}
+
+void
+iotrace_init(int ncpus)
+{
+       int entries_per_cpu = DEFAULT_IOTRACE_ENTRIES_PER_CPU;
+       int enable = mmiotrace_enabled;
 
-       init_iotrace_bufs(ncpus, entries_per_cpu);
+       gentrace_configure_from_bootargs("iotrace", &enable, "iotrace_epc", &entries_per_cpu,
+           IOTRACE_MAX_ENTRIES_PER_CPU, DEFAULT_IOTRACE_ENTRIES_PER_CPU, KF_IOTRACE_OVRD);
+
+       mmiotrace_enabled = enable;
+
+       if (mmiotrace_enabled) {
+               init_iotrace_bufs(ncpus, entries_per_cpu);
+       }
 }
+
+void
+traptrace_init(int ncpus)
+{
+       int entries_per_cpu = DEFAULT_TRAPTRACE_ENTRIES_PER_CPU;
+       int enable = traptrace_enabled;
+
+       gentrace_configure_from_bootargs("traptrace", &enable, "traptrace_epc", &entries_per_cpu,
+           TRAPTRACE_MAX_ENTRIES_PER_CPU, DEFAULT_TRAPTRACE_ENTRIES_PER_CPU, KF_TRAPTRACE_OVRD);
+
+       traptrace_enabled = enable;
+
+       if (traptrace_enabled) {
+               init_traptrace_bufs(ncpus, entries_per_cpu);
+       }
+}
+
 #endif /* DEVELOPMENT || DEBUG */
index 0fafb3aad65d1e2a230a925ac1c93b66c9043830..25a26de3bd1d298d1e5bef0efbb6f0de746edfed 100644 (file)
@@ -213,6 +213,7 @@ static cpuid_cache_descriptor_t intel_cpuid_leaf2_descriptor_table[] = {
                                sizeof(cpuid_cache_descriptor_t))
 
 static void do_cwas(i386_cpu_info_t *cpuinfo, boolean_t on_slave);
+static void cpuid_do_precpuid_was(void);
 
 static inline cpuid_cache_descriptor_t *
 cpuid_leaf2_find(uint8_t value)
@@ -257,6 +258,7 @@ do_cwas(i386_cpu_info_t *cpuinfo, boolean_t on_slave)
         * enumerated, lest we #GP when forced to access it.)
         */
        if (cpuid_wa_required(CPU_INTEL_TSXFA) == CWA_ON) {
+               /* This must be executed on all logical processors */
                wrmsr64(MSR_IA32_TSX_FORCE_ABORT,
                    rdmsr64(MSR_IA32_TSX_FORCE_ABORT) | MSR_IA32_TSXFA_RTM_FORCE_ABORT);
        }
@@ -897,6 +899,9 @@ cpuid_set_info(void)
        i386_cpu_info_t         *info_p = &cpuid_cpu_info;
        boolean_t               enable_x86_64h = TRUE;
 
+       /* Perform pre-cpuid workarounds (since their effects impact values returned via cpuid) */
+       cpuid_do_precpuid_was();
+
        cpuid_set_generic_info(info_p);
 
        /* verify we are running on a supported CPU */
@@ -1370,10 +1375,10 @@ cpuid_vmm_family(void)
 cwa_classifier_e
 cpuid_wa_required(cpu_wa_e wa)
 {
+       i386_cpu_info_t *info_p = &cpuid_cpu_info;
        static uint64_t bootarg_cpu_wa_enables = 0;
        static uint64_t bootarg_cpu_wa_disables = 0;
        static int bootargs_overrides_processed = 0;
-       i386_cpu_info_t *info_p = &cpuid_cpu_info;
 
        if (!bootargs_overrides_processed) {
                if (!PE_parse_boot_argn("cwae", &bootarg_cpu_wa_enables, sizeof(bootarg_cpu_wa_enables))) {
@@ -1420,7 +1425,7 @@ cpuid_wa_required(cpu_wa_e wa)
 
        case CPU_INTEL_TSXFA:
                /*
-                * If this CPU supports RTM and supports FORCE_ABORT, return that
+                * Otherwise, if the CPU supports both TSX(HLE) and FORCE_ABORT, return that
                 * the workaround should be enabled.
                 */
                if ((info_p->cpuid_leaf7_extfeatures & CPUID_LEAF7_EXTFEATURE_TSXFA) != 0 &&
@@ -1435,3 +1440,14 @@ cpuid_wa_required(cpu_wa_e wa)
 
        return CWA_OFF;
 }
+
+static void
+cpuid_do_precpuid_was(void)
+{
+       /*
+        * Note that care must be taken not to use any data from the cached cpuid data since it is
+        * likely uninitialized at this point.  That includes calling functions that make use of
+        * that data as well.
+        */
+
+}
index c80308084014e33309c1b99ffd4ec18388994a7b..a3a6ad6eeb9359f1096dcb8b82fa5300d6ec8ffa 100644 (file)
@@ -476,7 +476,7 @@ typedef struct {
 
 typedef enum {
        CPU_INTEL_SEGCHK = 1,
-       CPU_INTEL_TSXFA
+       CPU_INTEL_TSXFA = 2
 } cpu_wa_e;
 
 typedef enum {
index 82ab4423f91e3a704bfb6c94f1a62f6099d997ba..b05c65b9af114eaf176d25d73bdb2d57ed31923a 100644 (file)
@@ -1295,7 +1295,7 @@ fpextovrflt(void)
        intr = ml_set_interrupts_enabled(FALSE);
 
        if (get_interrupt_level()) {
-               panic("FPU segment overrun exception  at interrupt context\n");
+               panic("FPU segment overrun exception at interrupt context\n");
        }
        if (current_task() == kernel_task) {
                panic("FPU segment overrun exception in kernel thread context\n");
@@ -1327,12 +1327,6 @@ fpextovrflt(void)
        if (ifps) {
                fp_state_free(ifps, xstate);
        }
-
-       /*
-        * Raise exception.
-        */
-       i386_exception(EXC_BAD_ACCESS, VM_PROT_READ | VM_PROT_EXECUTE, 0);
-       /*NOTREACHED*/
 }
 
 extern void fpxlog(int, uint32_t, uint32_t, uint32_t);
@@ -1369,16 +1363,6 @@ fpexterrflt(void)
        const uint32_t xcpt = ~mask & (ifps->fx_status &
            (FPS_IE | FPS_DE | FPS_ZE | FPS_OE | FPS_UE | FPS_PE));
        fpxlog(EXC_I386_EXTERR, ifps->fx_status, ifps->fx_control, xcpt);
-       /*
-        * Raise FPU exception.
-        * Locking not needed on pcb->ifps,
-        * since thread is running.
-        */
-       i386_exception(EXC_ARITHMETIC,
-           EXC_I386_EXTERR,
-           ifps->fx_status);
-
-       /*NOTREACHED*/
 }
 
 /*
@@ -1473,11 +1457,6 @@ fpSSEexterrflt(void)
        const uint32_t xcpt = ~mask & (ifps->fx_MXCSR &
            (FPS_IE | FPS_DE | FPS_ZE | FPS_OE | FPS_UE | FPS_PE));
        fpxlog(EXC_I386_SSEEXTERR, ifps->fx_MXCSR, ifps->fx_MXCSR, xcpt);
-
-       i386_exception(EXC_ARITHMETIC,
-           EXC_I386_SSEEXTERR,
-           ifps->fx_MXCSR);
-       /*NOTREACHED*/
 }
 
 
@@ -1592,8 +1571,8 @@ fpu_thread_promote_avx512(thread_t thread)
  * return directly via thread_exception_return().
  * Otherwise simply return.
  */
-#define MAX_X86_INSN_LENGTH (16)
-void
+#define MAX_X86_INSN_LENGTH (15)
+int
 fpUDflt(user_addr_t rip)
 {
        uint8_t         instruction_prefix;
@@ -1605,7 +1584,7 @@ fpUDflt(user_addr_t rip)
                 * rather than issue multiple copyins
                 */
                if (copyin(rip, (char *) &instruction_prefix, 1)) {
-                       return;
+                       return 1;
                }
                DBG("fpUDflt(0x%016llx) prefix: 0x%x\n",
                    rip, instruction_prefix);
@@ -1624,7 +1603,7 @@ fpUDflt(user_addr_t rip)
                        /* Skip optional prefixes */
                        rip++;
                        if ((rip - original_rip) > MAX_X86_INSN_LENGTH) {
-                               return;
+                               return 1;
                        }
                        break;
                case 0x62:      /* EVEX */
@@ -1633,7 +1612,7 @@ fpUDflt(user_addr_t rip)
                        is_AVX512_instruction = TRUE;
                        break;
                default:
-                       return;
+                       return 1;
                }
        } while (!is_AVX512_instruction);
 
@@ -1643,7 +1622,7 @@ fpUDflt(user_addr_t rip)
         * Fail if this machine doesn't support AVX512
         */
        if (fpu_capability != AVX512) {
-               return;
+               return 1;
        }
 
        assert(xgetbv(XCR0) == AVX_XMASK);
@@ -1651,8 +1630,7 @@ fpUDflt(user_addr_t rip)
        DBG("fpUDflt() switching xstate to AVX512\n");
        (void) fpu_thread_promote_avx512(current_thread());
 
-       thread_exception_return();
-       /* NOT REACHED */
+       return 0;
 }
 #endif /* !defined(RC_HIDE_XNU_J137) */
 
index 7042cea10b9cafcbd720d7491b54b159bd640e4e..542de23ebcd1d8b5f680a56cddda52f80998f917 100644 (file)
@@ -143,7 +143,7 @@ extern void             fpu_switch_addrmode(
 extern xstate_t         fpu_default;
 extern xstate_t         fpu_capability;
 extern xstate_t         current_xstate(void);
-extern void             fpUDflt(user_addr_t rip);
+extern int              fpUDflt(user_addr_t rip);
 #ifdef MACH_KERNEL_PRIVATE
 extern uint32_t thread_fpsimd_hash(thread_t);
 extern void vzeroall(void);
index f1b9f1e1204e8a468346354eec85fb45b787de33..aef78bcbd7b4bcbc347cfdf7b169907c3acf0be3 100644 (file)
@@ -854,9 +854,6 @@ do_init_slave(boolean_t fast_restart)
 #endif
                /* update CPU microcode */
                ucode_update_wake();
-
-               /* Do CPU workarounds after the microcode update */
-               cpuid_do_was();
        } else {
                init_param = FAST_SLAVE_INIT;
        }
index cf5c384e485e23d6dae9b78ec69582cc5a0dece9..022491a8972d7219683f9df088e82f0566691fdc 100644 (file)
@@ -568,6 +568,7 @@ __END_DECLS
 
 #define MSR_IA32_BBL_CR_CTL                     0x119
 
+
 #define MSR_IA32_SYSENTER_CS                    0x174
 #define MSR_IA32_SYSENTER_ESP                   0x175
 #define MSR_IA32_SYSENTER_EIP                   0x176
index bfc24c4aa93e45faf1235cf34669a3eabfacc287..4231b1f26ea8a33e40c25dcc353eb9dffd52cedd 100644 (file)
@@ -116,7 +116,6 @@ extern void kprint_state(x86_saved_state64_t *saved_state);
 /*
  * Forward declarations
  */
-static void user_page_fault_continue(kern_return_t kret);
 static void panic_trap(x86_saved_state64_t *saved_state, uint32_t pl, kern_return_t fault_result) __dead2;
 static void set_recovery_ip(x86_saved_state64_t *saved_state, vm_offset_t ip);
 
@@ -206,36 +205,6 @@ thread_syscall_return(
        /*NOTREACHED*/
 }
 
-
-static inline void
-user_page_fault_continue(
-       kern_return_t  kr)
-{
-       thread_t        thread = current_thread();
-       user_addr_t     vaddr;
-
-       if (thread_is_64bit_addr(thread)) {
-               x86_saved_state64_t     *uregs;
-
-               uregs = USER_REGS64(thread);
-
-               vaddr = (user_addr_t)uregs->cr2;
-       } else {
-               x86_saved_state32_t     *uregs;
-
-               uregs = USER_REGS32(thread);
-
-               vaddr = uregs->cr2;
-       }
-
-
-       /* PAL debug hook */
-       pal_dbg_page_fault( thread, vaddr, kr );
-
-       i386_exception(EXC_BAD_ACCESS, kr, vaddr);
-       /*NOTREACHED*/
-}
-
 /*
  * Fault recovery in copyin/copyout routines.
  */
@@ -374,6 +343,11 @@ interrupt(x86_saved_state_t *state)
                user_mode = TRUE;
        }
 
+#if DEVELOPMENT || DEBUG
+       uint64_t frameptr = is_saved_state64(state) ? state64->rbp : saved_state32(state)->ebp;
+       uint32_t traptrace_index = traptrace_start(interrupt_num, rip, mach_absolute_time(), frameptr);
+#endif
+
        if (cpu_data_ptr[cnum]->lcpu.package->num_idle == topoParms.nLThreadsPerPackage) {
                cpu_data_ptr[cnum]->cpu_hwIntpexits[interrupt_num]++;
        }
@@ -492,6 +466,12 @@ interrupt(x86_saved_state_t *state)
            interrupt_num);
 
        assert(ml_get_interrupts_enabled() == FALSE);
+
+#if DEVELOPMENT || DEBUG
+       if (traptrace_index != TRAPTRACE_INVALID_INDEX) {
+               traptrace_end(traptrace_index, mach_absolute_time());
+       }
+#endif
 }
 
 static inline void
@@ -553,6 +533,10 @@ kernel_trap(
 
        is_user = (vaddr < VM_MAX_USER_PAGE_ADDRESS);
 
+#if DEVELOPMENT || DEBUG
+       uint32_t traptrace_index = traptrace_start(type, kern_ip, mach_absolute_time(), saved_state->rbp);
+#endif
+
 #if CONFIG_DTRACE
        /*
         * Is there a DTrace hook?
@@ -562,7 +546,7 @@ kernel_trap(
                        /*
                         * If it succeeds, we are done...
                         */
-                       return;
+                       goto common_return;
                }
        }
 #endif /* CONFIG_DTRACE */
@@ -578,7 +562,8 @@ kernel_trap(
                KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                    (MACHDBG_CODE(DBG_MACH_EXCP_KTRAP_x86, type)) | DBG_FUNC_NONE,
                    0, 0, 0, VM_KERNEL_UNSLIDE(kern_ip), 0);
-               return;
+
+               goto common_return;
        }
 
        user_addr_t     kd_vaddr = is_user ? vaddr : VM_KERNEL_UNSLIDE(vaddr);
@@ -675,19 +660,19 @@ kernel_trap(
        switch (type) {
        case T_NO_FPU:
                fpnoextflt();
-               return;
+               goto common_return;
 
        case T_FPU_FAULT:
                fpextovrflt();
-               return;
+               goto common_return;
 
        case T_FLOATING_POINT_ERROR:
                fpexterrflt();
-               return;
+               goto common_return;
 
        case T_SSE_FLOAT_ERROR:
                fpSSEexterrflt();
-               return;
+               goto common_return;
 
        case T_INVALID_OPCODE:
                fpUDflt(kern_ip);
@@ -701,7 +686,7 @@ kernel_trap(
                         * This isn't supposed to happen.
                         */
                        reset_dr7();
-                       return;
+                       goto common_return;
                }
                goto debugger_entry;
        case T_INT3:
@@ -745,7 +730,7 @@ kernel_trap(
                                (void) ml_set_interrupts_enabled(intr);
                        }
 #endif /* NCOPY_WINDOWS > 0 */
-                       return;
+                       goto common_return;
                }
                /*
                 * fall through
@@ -762,7 +747,7 @@ FALL_THROUGH:
                for (rp = recover_table; rp < recover_table_end; rp++) {
                        if (kern_ip == rp->fault_addr) {
                                set_recovery_ip(saved_state, rp->recover_addr);
-                               return;
+                               goto common_return;
                        }
                }
 
@@ -772,7 +757,7 @@ FALL_THROUGH:
                if (thread != THREAD_NULL && thread->recover) {
                        set_recovery_ip(saved_state, thread->recover);
                        thread->recover = 0;
-                       return;
+                       goto common_return;
                }
        /*
         * Unanticipated page-fault errors in kernel
@@ -787,7 +772,7 @@ FALL_THROUGH:
                 */
                if (type == 15) {
                        kprintf("kernel_trap() ignoring spurious trap 15\n");
-                       return;
+                       goto common_return;
                }
 debugger_entry:
                /* Ensure that the i386_kernel_state at the base of the
@@ -798,7 +783,7 @@ debugger_entry:
                sync_iss_to_iks(state);
 #if  MACH_KDP
                if (kdp_i386_trap(type, saved_state, result, (vm_offset_t)vaddr)) {
-                       return;
+                       goto common_return;
                }
 #endif
        }
@@ -807,6 +792,14 @@ debugger_entry:
        /*
         * NO RETURN
         */
+
+common_return:
+#if DEVELOPMENT || DEBUG
+       if (traptrace_index != TRAPTRACE_INVALID_INDEX) {
+               traptrace_end(traptrace_index, mach_absolute_time());
+       }
+#endif
+       return;
 }
 
 static void
@@ -907,7 +900,9 @@ user_trap(
        kern_return_t           kret;
        user_addr_t             rip;
        unsigned long           dr6 = 0; /* 32 bit for i386, 64 bit for x86_64 */
-
+#if DEVELOPMENT || DEBUG
+       uint32_t                traptrace_index;
+#endif
        assert((is_saved_state32(saved_state) && !thread_is_64bit_addr(thread)) ||
            (is_saved_state64(saved_state) && thread_is_64bit_addr(thread)));
 
@@ -923,6 +918,9 @@ user_trap(
                err  = (int)regs->isf.err & 0xffff;
                vaddr = (user_addr_t)regs->cr2;
                rip   = (user_addr_t)regs->isf.rip;
+#if DEVELOPMENT || DEBUG
+               traptrace_index = traptrace_start(type, rip, mach_absolute_time(), regs->rbp);
+#endif
        } else {
                x86_saved_state32_t     *regs;
 
@@ -935,8 +933,12 @@ user_trap(
                err   = regs->err & 0xffff;
                vaddr = (user_addr_t)regs->cr2;
                rip   = (user_addr_t)regs->eip;
+#if DEVELOPMENT || DEBUG
+               traptrace_index = traptrace_start(type, rip, mach_absolute_time(), regs->ebp);
+#endif
        }
 
+
        if ((type == T_DEBUG) && thread->machine.ids) {
                unsigned long clear = 0;
                /* Stash and clear this processor's DR6 value, in the event
@@ -1023,20 +1025,25 @@ user_trap(
                break;
 
        case T_INVALID_OPCODE:
-#if !defined(RC_HIDE_XNU_J137)
-               fpUDflt(rip);   /* May return from exception directly */
-#endif
-               exc = EXC_BAD_INSTRUCTION;
-               code = EXC_I386_INVOP;
+               if (fpUDflt(rip) == 1) {
+                       exc = EXC_BAD_INSTRUCTION;
+                       code = EXC_I386_INVOP;
+               }
                break;
 
        case T_NO_FPU:
                fpnoextflt();
-               return;
+               break;
 
        case T_FPU_FAULT:
-               fpextovrflt(); /* Propagates exception directly, doesn't return */
-               return;
+               fpextovrflt();
+               /*
+                * Raise exception.
+                */
+               exc = EXC_BAD_ACCESS;
+               code = VM_PROT_READ | VM_PROT_EXECUTE;
+               subcode = 0;
+               break;
 
        case T_INVALID_TSS:     /* invalid TSS == iret with NT flag set */
                exc = EXC_BAD_INSTRUCTION;
@@ -1114,30 +1121,37 @@ user_trap(
                }
 #endif
                if (__probable((kret == KERN_SUCCESS) || (kret == KERN_ABORTED))) {
-                       thread_exception_return();
-                       /*NOTREACHED*/
-               }
-
-               /*
-                * For a user trap, vm_fault() should never return KERN_FAILURE.
-                * If it does, we're leaking preemption disables somewhere in the kernel.
-                */
-               if (__improbable(kret == KERN_FAILURE)) {
+                       break;
+               } else if (__improbable(kret == KERN_FAILURE)) {
+                       /*
+                        * For a user trap, vm_fault() should never return KERN_FAILURE.
+                        * If it does, we're leaking preemption disables somewhere in the kernel.
+                        */
                        panic("vm_fault() KERN_FAILURE from user fault on thread %p", thread);
                }
 
-               user_page_fault_continue(kret);
-       }       /* NOTREACHED */
+               /* PAL debug hook (empty on x86) */
+               pal_dbg_page_fault(thread, vaddr, kret);
+               exc = EXC_BAD_ACCESS;
+               code = kret;
+               subcode = vaddr;
+       }
        break;
 
        case T_SSE_FLOAT_ERROR:
-               fpSSEexterrflt(); /* Propagates exception directly, doesn't return */
-               return;
+               fpSSEexterrflt();
+               exc = EXC_ARITHMETIC;
+               code = EXC_I386_SSEEXTERR;
+               subcode = ((struct x86_fx_thread_state *)thread->machine.ifps)->fx_MXCSR;
+               break;
 
 
        case T_FLOATING_POINT_ERROR:
-               fpexterrflt(); /* Propagates exception directly, doesn't return */
-               return;
+               fpexterrflt();
+               exc = EXC_ARITHMETIC;
+               code = EXC_I386_EXTERR;
+               subcode = ((struct x86_fx_thread_state *)thread->machine.ifps)->fx_status;
+               break;
 
        case T_DTRACE_RET:
 #if CONFIG_DTRACE
@@ -1156,11 +1170,21 @@ user_trap(
        default:
                panic("Unexpected user trap, type %d", type);
        }
-       /* Note: Codepaths that directly return from user_trap() have pending
-        * ASTs processed in locore
-        */
-       i386_exception(exc, code, subcode);
-       /* NOTREACHED */
+
+#if DEVELOPMENT || DEBUG
+       if (traptrace_index != TRAPTRACE_INVALID_INDEX) {
+               traptrace_end(traptrace_index, mach_absolute_time());
+       }
+#endif
+
+       if (exc != 0) {
+               /*
+                * Note: Codepaths that directly return from user_trap() have pending
+                * ASTs processed in locore
+                */
+               i386_exception(exc, code, subcode);
+               /* NOTREACHED */
+       }
 }
 
 /*
index b5613be392f826c0318a547bda4d2924d3a83c47..da7e55e201812c53861b9ac7bff797465f125ec5 100644 (file)
@@ -171,11 +171,19 @@ panic_64(x86_saved_state_t *sp, __unused int pc, __unused const char *msg, boole
 void
 panic_double_fault64(x86_saved_state_t *sp)
 {
+#if DEVELOPMENT || DEBUG
+       uint64_t frameptr = is_saved_state64(sp) ? saved_state64(sp)->rbp : saved_state32(sp)->ebp;
+       (void) traptrace_start(T_DOUBLE_FAULT, saved_state64(sp)->isf.rip, mach_absolute_time(), frameptr);
+#endif
        (void)OSCompareAndSwap((UInt32) - 1, (UInt32) cpu_number(), (volatile UInt32 *)&panic_double_fault_cpu);
        panic_64(sp, PANIC_DOUBLE_FAULT, "Double fault", FALSE);
 }
 void
 panic_machine_check64(x86_saved_state_t *sp)
 {
+#if DEVELOPMENT || DEBUG
+       uint64_t frameptr = is_saved_state64(sp) ? saved_state64(sp)->rbp : saved_state32(sp)->ebp;
+       (void) traptrace_start(T_MACHINE_CHECK, saved_state64(sp)->isf.rip, mach_absolute_time(), frameptr);
+#endif
        panic_64(sp, PANIC_MACHINE_CHECK, "Machine Check", TRUE);
 }
index a9e9a12f5b5571425883482b4f6d0315b4fda0e9..13925061727b24b95c675e226f4f2cd971e88e70 100644 (file)
@@ -252,8 +252,6 @@ xcpu_update(void)
        cpu_apply_microcode();
        /* Update the cpuid info */
        ucode_cpuid_set_info();
-       /* Now apply workarounds */
-       cpuid_do_was();
        mp_enable_preemption();
 
        /* Get all other CPUs to perform the update */
index 4bec21084b2c88a77d49f5edc94f72c2342e4a4c..360879748d76c9db446377ebad1bd6cdc0c907e2 100644 (file)
@@ -120,15 +120,19 @@ static void ipc_mqueue_peek_on_thread(
  */
 void
 ipc_mqueue_init(
-       ipc_mqueue_t    mqueue,
-       boolean_t       is_set)
+       ipc_mqueue_t            mqueue,
+       ipc_mqueue_kind_t       kind)
 {
-       if (is_set) {
+       switch (kind) {
+       case IPC_MQUEUE_KIND_SET:
                waitq_set_init(&mqueue->imq_set_queue,
                    SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST,
                    NULL, NULL);
-       } else {
-               waitq_init(&mqueue->imq_wait_queue, SYNC_POLICY_FIFO | SYNC_POLICY_PORT);
+               break;
+       case IPC_MQUEUE_KIND_NONE: /* cheat: we really should have "no" mqueue */
+       case IPC_MQUEUE_KIND_PORT:
+               waitq_init(&mqueue->imq_wait_queue,
+                   SYNC_POLICY_FIFO | SYNC_POLICY_TURNSTILE_PROXY);
                ipc_kmsg_queue_init(&mqueue->imq_messages);
                mqueue->imq_seqno = 0;
                mqueue->imq_msgcount = 0;
@@ -138,6 +142,7 @@ ipc_mqueue_init(
 #if MACH_FLIPC
                mqueue->imq_fport = FPORT_NULL;
 #endif
+               break;
        }
        klist_init(&mqueue->imq_klist);
 }
@@ -1147,7 +1152,7 @@ ipc_mqueue_receive_on_thread(
                        imq_unlock(port_mq);
                        return THREAD_NOT_WAITING;
                }
-       } else if (imq_is_queue(mqueue)) {
+       } else if (imq_is_queue(mqueue) || imq_is_turnstile_proxy(mqueue)) {
                ipc_kmsg_queue_t kmsgs;
 
                /*
@@ -1199,8 +1204,7 @@ ipc_mqueue_receive_on_thread(
        }
 
        /*
-        * Threads waiting on a special reply port
-        * (not portset or regular ports)
+        * Threads waiting on a reply port (not portset)
         * will wait on its receive turnstile.
         *
         * Donate waiting thread's turnstile and
@@ -1217,7 +1221,7 @@ ipc_mqueue_receive_on_thread(
         * will be converted to to turnstile waitq
         * in waitq_assert_wait instead of global waitqs.
         */
-       if (imq_is_queue(mqueue) && ip_from_mq(mqueue)->ip_specialreply) {
+       if (imq_is_turnstile_proxy(mqueue)) {
                ipc_port_t port = ip_from_mq(mqueue);
                rcv_turnstile = turnstile_prepare((uintptr_t)port,
                    port_rcv_turnstile_address(port),
index 4e6fb324031b7392dfec4f43669ae2ee847c16b3..f982ba6771e9706e8e26e2846a6bbca5e8ecc2f9 100644 (file)
@@ -159,6 +159,8 @@ typedef struct ipc_mqueue {
 #define imq_set_queue           data.pset.setq
 #define imq_is_set(mq)          waitqs_is_set(&(mq)->imq_set_queue)
 #define imq_is_queue(mq)        waitq_is_queue(&(mq)->imq_wait_queue)
+#define imq_is_turnstile_proxy(mq) \
+               waitq_is_turnstile_proxy(&(mq)->imq_wait_queue)
 #define imq_is_valid(mq)        waitq_is_valid(&(mq)->imq_wait_queue)
 
 #define imq_unlock(mq)          waitq_unlock(&(mq)->imq_wait_queue)
@@ -199,10 +201,16 @@ extern int ipc_mqueue_full;
  * Exported interfaces
  */
 
+__enum_closed_decl(ipc_mqueue_kind_t, int, {
+       IPC_MQUEUE_KIND_NONE,   /* this mqueue really isn't used */
+       IPC_MQUEUE_KIND_PORT,   /* this queue is a regular port queue */
+       IPC_MQUEUE_KIND_SET,    /* this queue is a portset queue */
+});
+
 /* Initialize a newly-allocated message queue */
 extern void ipc_mqueue_init(
        ipc_mqueue_t            mqueue,
-       boolean_t               is_set);
+       ipc_mqueue_kind_t       kind);
 
 /* de-initialize / cleanup an mqueue (specifically waitq resources) */
 extern void ipc_mqueue_deinit(
index f677c6e28c498b6004a7a692464e4dd02855204b..9744b2b627c8fbcfce225e89d24508602c8e7cbb 100644 (file)
@@ -158,7 +158,7 @@ void
 ipc_notify_send_once(
        ipc_port_t      port)
 {
-       ipc_port_adjust_special_reply_port(port, IPC_PORT_ADJUST_RESET_BOOSTRAP_CHECKIN, FALSE);
+       ipc_port_adjust_special_reply_port(port, IPC_PORT_ADJUST_RESET_BOOSTRAP_CHECKIN);
 
        (void)mach_notify_send_once(port);
        /* send-once right consumed */
index cd8c04b817db9f067c89650b18f7f37ca8d325b6..b8cddf28ae5d75998d6242b1592076b04a510291 100644 (file)
@@ -638,6 +638,7 @@ void
 ipc_port_init(
        ipc_port_t              port,
        ipc_space_t             space,
+       ipc_port_init_flags_t   flags,
        mach_port_name_t        name)
 {
        /* port->ip_kobject doesn't have to be initialized */
@@ -648,6 +649,10 @@ ipc_port_init(
        port->ip_mscount = 0;
        port->ip_srights = 0;
        port->ip_sorights = 0;
+       if (flags & IPC_PORT_INIT_MAKE_SEND_RIGHT) {
+               port->ip_srights = 1;
+               port->ip_mscount = 1;
+       }
 
        port->ip_nsrequest = IP_NULL;
        port->ip_pdrequest = IP_NULL;
@@ -669,17 +674,19 @@ ipc_port_init(
        port->ip_immovable_send = 0;
        port->ip_impcount    = 0;
 
-       port->ip_specialreply = 0;
+       port->ip_specialreply = (flags & IPC_PORT_INIT_SPECIAL_REPLY) != 0;
        port->ip_sync_link_state = PORT_SYNC_LINK_ANY;
        port->ip_sync_bootstrap_checkin = 0;
-       port->ip_watchport_elem = NULL;
 
        ipc_special_reply_port_bits_reset(port);
 
        port->ip_send_turnstile = TURNSTILE_NULL;
 
-       ipc_mqueue_init(&port->ip_messages,
-           FALSE /* !set */);
+       ipc_mqueue_kind_t kind = IPC_MQUEUE_KIND_NONE;
+       if (flags & IPC_PORT_INIT_MESSAGE_QUEUE) {
+               kind = IPC_MQUEUE_KIND_PORT;
+       }
+       ipc_mqueue_init(&port->ip_messages, kind);
 }
 
 /*
@@ -699,7 +706,7 @@ ipc_port_init(
 kern_return_t
 ipc_port_alloc(
        ipc_space_t             space,
-       bool                    make_send_right,
+       ipc_port_init_flags_t   flags,
        mach_port_name_t        *namep,
        ipc_port_t              *portp)
 {
@@ -714,7 +721,7 @@ ipc_port_alloc(
        ipc_port_callstack_init_debug(&buf[0], IP_CALLSTACK_MAX);
 #endif /* MACH_ASSERT */
 
-       if (make_send_right) {
+       if (flags & IPC_PORT_INIT_MAKE_SEND_RIGHT) {
                type |= MACH_PORT_TYPE_SEND;
                urefs = 1;
        }
@@ -725,13 +732,7 @@ ipc_port_alloc(
        }
 
        /* port and space are locked */
-       ipc_port_init(port, space, name);
-
-       if (make_send_right) {
-               /* ipc_object_alloc() already made the entry reference */
-               port->ip_srights++;
-               port->ip_mscount++;
-       }
+       ipc_port_init(port, space, flags, name);
 
 #if     MACH_ASSERT
        ipc_port_init_debug(port, &buf[0], IP_CALLSTACK_MAX);
@@ -763,19 +764,25 @@ ipc_port_alloc(
 kern_return_t
 ipc_port_alloc_name(
        ipc_space_t             space,
+       ipc_port_init_flags_t   flags,
        mach_port_name_t        name,
        ipc_port_t              *portp)
 {
        ipc_port_t port;
        kern_return_t kr;
+       mach_port_type_t type = MACH_PORT_TYPE_RECEIVE;
+       mach_port_urefs_t urefs = 0;
 
 #if     MACH_ASSERT
        uintptr_t buf[IP_CALLSTACK_MAX];
        ipc_port_callstack_init_debug(&buf[0], IP_CALLSTACK_MAX);
 #endif /* MACH_ASSERT */
 
-       kr = ipc_object_alloc_name(space, IOT_PORT,
-           MACH_PORT_TYPE_RECEIVE, 0,
+       if (flags & IPC_PORT_INIT_MAKE_SEND_RIGHT) {
+               type |= MACH_PORT_TYPE_SEND;
+               urefs = 1;
+       }
+       kr = ipc_object_alloc_name(space, IOT_PORT, type, urefs,
            name, (ipc_object_t *) &port);
        if (kr != KERN_SUCCESS) {
                return kr;
@@ -783,7 +790,7 @@ ipc_port_alloc_name(
 
        /* port is locked */
 
-       ipc_port_init(port, space, name);
+       ipc_port_init(port, space, flags, name);
 
 #if     MACH_ASSERT
        ipc_port_init_debug(port, &buf[0], IP_CALLSTACK_MAX);
@@ -985,7 +992,7 @@ ipc_port_destroy(ipc_port_t port)
 
                if (special_reply) {
                        ipc_port_adjust_special_reply_port(port,
-                           IPC_PORT_ADJUST_SR_ALLOW_SYNC_LINKAGE, FALSE);
+                           IPC_PORT_ADJUST_SR_ALLOW_SYNC_LINKAGE);
                }
 
                if (watchport_elem) {
@@ -1058,7 +1065,7 @@ ipc_port_destroy(ipc_port_t port)
        /* unlink the kmsg from special reply port */
        if (special_reply) {
                ipc_port_adjust_special_reply_port(port,
-                   IPC_PORT_ADJUST_SR_ALLOW_SYNC_LINKAGE, FALSE);
+                   IPC_PORT_ADJUST_SR_ALLOW_SYNC_LINKAGE);
        }
 
        /* throw away no-senders request */
@@ -1310,6 +1317,36 @@ not_circular:
 #endif /* !IMPORTANCE_INHERITANCE */
 }
 
+/*
+ *     Routine:        ipc_port_watchport_elem
+ *     Purpose:
+ *             Get the port's watchport elem field
+ *
+ *     Conditions:
+ *             mqueue locked
+ */
+static struct task_watchport_elem *
+ipc_port_watchport_elem(ipc_port_t port)
+{
+       return port->ip_messages.imq_wait_queue.waitq_tspriv;
+}
+
+/*
+ *     Routine:        ipc_port_update_watchport_elem
+ *     Purpose:
+ *             Set the port's watchport elem field
+ *
+ *     Conditions:
+ *             mqueue locked
+ */
+static inline struct task_watchport_elem *
+ipc_port_update_watchport_elem(ipc_port_t port, struct task_watchport_elem *we)
+{
+       struct task_watchport_elem *old_we = ipc_port_watchport_elem(port);
+       port->ip_messages.imq_wait_queue.waitq_tspriv = we;
+       return old_we;
+}
+
 /*
  * Update the recv turnstile inheritor for a port.
  *
@@ -1414,7 +1451,7 @@ ipc_port_send_update_inheritor(
            port->ip_destination != NULL) {
                /* Case 2. */
                inheritor = port_send_turnstile(port->ip_destination);
-       } else if (port->ip_watchport_elem != NULL) {
+       } else if (ipc_port_watchport_elem(port) != NULL) {
                /* Case 3. */
                if (prioritize_launch) {
                        assert(port->ip_sync_link_state == PORT_SYNC_LINK_ANY);
@@ -1539,35 +1576,7 @@ ipc_port_send_turnstile_complete(ipc_port_t port)
 static struct turnstile *
 ipc_port_rcv_turnstile(ipc_port_t port)
 {
-       return turnstile_lookup_by_proprietor((uintptr_t)port, TURNSTILE_SYNC_IPC);
-}
-
-
-/*
- *     Routine:        ipc_port_rcv_turnstile_waitq
- *     Purpose:
- *             Given the mqueue's waitq, find the port's
- *              rcv turnstile and return its waitq.
- *
- *     Conditions:
- *             mqueue locked or thread waiting on turnstile is locked.
- */
-struct waitq *
-ipc_port_rcv_turnstile_waitq(struct waitq *waitq)
-{
-       struct waitq *safeq;
-
-       ipc_mqueue_t mqueue = imq_from_waitq(waitq);
-       ipc_port_t port = ip_from_mq(mqueue);
-       struct turnstile *rcv_turnstile = ipc_port_rcv_turnstile(port);
-
-       /* Check if the port has a rcv turnstile */
-       if (rcv_turnstile != TURNSTILE_NULL) {
-               safeq = &rcv_turnstile->ts_waitq;
-       } else {
-               safeq = global_eventq(waitq);
-       }
-       return safeq;
+       return *port_rcv_turnstile_address(port);
 }
 
 
@@ -1702,11 +1711,15 @@ ipc_port_adjust_special_reply_port_locked(
        turnstile_inheritor_t inheritor = TURNSTILE_INHERITOR_NULL;
        struct turnstile *ts = TURNSTILE_NULL;
 
-       assert(special_reply_port->ip_specialreply);
-
        ip_lock_held(special_reply_port); // ip_sync_link_state is touched
        imq_lock(&special_reply_port->ip_messages);
 
+       if (!special_reply_port->ip_specialreply) {
+               // only mach_msg_receive_results_complete() calls this with any port
+               assert(get_turnstile);
+               goto not_special;
+       }
+
        if (flags & IPC_PORT_ADJUST_SR_RECEIVED_MSG) {
                ipc_special_reply_port_msg_sent_reset(special_reply_port);
        }
@@ -1721,6 +1734,7 @@ ipc_port_adjust_special_reply_port_locked(
 
        /* Check if the special reply port is marked non-special */
        if (special_reply_port->ip_sync_link_state == PORT_SYNC_LINK_ANY) {
+not_special:
                if (get_turnstile) {
                        turnstile_complete((uintptr_t)special_reply_port,
                            port_rcv_turnstile_address(special_reply_port), NULL, TURNSTILE_SYNC_IPC);
@@ -1822,18 +1836,12 @@ ipc_port_adjust_special_reply_port_locked(
  */
 void
 ipc_port_adjust_special_reply_port(
-       ipc_port_t special_reply_port,
-       uint8_t flags,
-       boolean_t get_turnstile)
+       ipc_port_t port,
+       uint8_t flags)
 {
-       if (special_reply_port->ip_specialreply) {
-               ip_lock(special_reply_port);
-               ipc_port_adjust_special_reply_port_locked(special_reply_port, NULL,
-                   flags, get_turnstile);
-               /* special_reply_port unlocked */
-       }
-       if (get_turnstile) {
-               assert(current_thread()->turnstile != TURNSTILE_NULL);
+       if (port->ip_specialreply) {
+               ip_lock(port);
+               ipc_port_adjust_special_reply_port_locked(port, NULL, flags, FALSE);
        }
 }
 
@@ -1988,8 +1996,7 @@ ipc_port_add_watchport_elem_locked(
                ipc_port_adjust_sync_link_state_locked(port, PORT_SYNC_LINK_ANY, NULL);
        }
 
-       *old_elem = port->ip_watchport_elem;
-       port->ip_watchport_elem = watchport_elem;
+       *old_elem = ipc_port_update_watchport_elem(port, watchport_elem);
 
        ipc_port_send_turnstile_recompute_push_locked(port);
        /* port and mqueue unlocked */
@@ -2015,7 +2022,7 @@ ipc_port_clear_watchport_elem_internal_conditional_locked(
        ip_lock_held(port);
        imq_held(&port->ip_messages);
 
-       if (port->ip_watchport_elem != watchport_elem) {
+       if (ipc_port_watchport_elem(port) != watchport_elem) {
                imq_unlock(&port->ip_messages);
                ip_unlock(port);
                return KERN_FAILURE;
@@ -2047,13 +2054,13 @@ ipc_port_replace_watchport_elem_conditional_locked(
        ip_lock_held(port);
        imq_held(&port->ip_messages);
 
-       if (port->ip_watchport_elem != old_watchport_elem) {
+       if (ipc_port_watchport_elem(port) != old_watchport_elem) {
                imq_unlock(&port->ip_messages);
                ip_unlock(port);
                return KERN_FAILURE;
        }
 
-       port->ip_watchport_elem = new_watchport_elem;
+       ipc_port_update_watchport_elem(port, new_watchport_elem);
        ipc_port_send_turnstile_recompute_push_locked(port);
        /* port and mqueue unlocked */
        return KERN_SUCCESS;
@@ -2073,15 +2080,10 @@ struct task_watchport_elem *
 ipc_port_clear_watchport_elem_internal(
        ipc_port_t                 port)
 {
-       struct task_watchport_elem *watchport_elem;
-
        ip_lock_held(port);
        imq_held(&port->ip_messages);
 
-       watchport_elem = port->ip_watchport_elem;
-       port->ip_watchport_elem = NULL;
-
-       return watchport_elem;
+       return ipc_port_update_watchport_elem(port, NULL);
 }
 
 /*
@@ -2129,7 +2131,7 @@ ipc_port_get_watchport_inheritor(
        ipc_port_t port)
 {
        imq_held(&port->ip_messages);
-       return port->ip_watchport_elem->twe_task->watchports->tw_thread;
+       return ipc_port_watchport_elem(port)->twe_task->watchports->tw_thread;
 }
 
 /*
@@ -2638,7 +2640,7 @@ ipc_port_release_sonce(
                return;
        }
 
-       ipc_port_adjust_special_reply_port(port, IPC_PORT_ADJUST_RESET_BOOSTRAP_CHECKIN, FALSE);
+       ipc_port_adjust_special_reply_port(port, IPC_PORT_ADJUST_RESET_BOOSTRAP_CHECKIN);
 
        ip_lock(port);
 
@@ -2697,7 +2699,8 @@ ipc_port_release_receive(
 
 ipc_port_t
 ipc_port_alloc_special(
-       ipc_space_t     space)
+       ipc_space_t             space,
+       ipc_port_init_flags_t   flags)
 {
        ipc_port_t port;
 
@@ -2716,7 +2719,7 @@ ipc_port_alloc_special(
        port->ip_references = 1;
        port->ip_object.io_bits = io_makebits(TRUE, IOT_PORT, 0);
 
-       ipc_port_init(port, space, 1);
+       ipc_port_init(port, space, flags, 1);
 
 #if     MACH_ASSERT
        ipc_port_init_debug(port, &buf[0], IP_CALLSTACK_MAX);
@@ -2777,7 +2780,9 @@ ipc_port_finalize(
        ipc_port_request_t requests = port->ip_requests;
 
        assert(port_send_turnstile(port) == TURNSTILE_NULL);
-       assert(ipc_port_rcv_turnstile(port) == TURNSTILE_NULL);
+       if (imq_is_turnstile_proxy(&port->ip_messages)) {
+               assert(ipc_port_rcv_turnstile(port) == TURNSTILE_NULL);
+       }
 
        if (ip_active(port)) {
                panic("Trying to free an active port. port %p", port);
index 3fccd24605f414e654703ff330dbc5c0ab96bbe1..413139c0e86f9de65a06983d5354bc8c7ab740f2 100644 (file)
  *  taken when the port was destroyed.
  */
 
+struct task_watchport_elem;
+
 typedef unsigned int ipc_port_timestamp_t;
 
 struct ipc_port {
@@ -141,8 +143,6 @@ struct ipc_port {
                struct turnstile *send_turnstile;
        } kdata2;
 
-       struct task_watchport_elem *ip_watchport_elem;
-
        mach_vm_address_t ip_context;
 
        natural_t ip_sprequests:1,      /* send-possible requests outstanding */
@@ -213,7 +213,7 @@ MACRO_END
 (IP_PREALLOC(port) ? &((port)->ip_premsg->ikm_turnstile) : &((port)->ip_send_turnstile))
 
 #define port_rcv_turnstile_address(port) \
-       (NULL)
+       &(port)->ip_messages.imq_wait_queue.waitq_ts
 
 
 /*
@@ -472,22 +472,31 @@ extern boolean_t ipc_port_clear_receiver(
        ipc_port_t              port,
        boolean_t               should_destroy);
 
+__options_decl(ipc_port_init_flags_t, uint32_t, {
+       IPC_PORT_INIT_NONE            = 0x00000000,
+       IPC_PORT_INIT_MAKE_SEND_RIGHT = 0x00000001,
+       IPC_PORT_INIT_MESSAGE_QUEUE   = 0x00000002,
+       IPC_PORT_INIT_SPECIAL_REPLY   = 0x00000004,
+});
+
 /* Initialize a newly-allocated port */
 extern void ipc_port_init(
        ipc_port_t              port,
        ipc_space_t             space,
+       ipc_port_init_flags_t   flags,
        mach_port_name_t        name);
 
 /* Allocate a port */
 extern kern_return_t ipc_port_alloc(
        ipc_space_t             space,
-       bool                    make_send_right,
+       ipc_port_init_flags_t   flags,
        mach_port_name_t        *namep,
        ipc_port_t              *portp);
 
 /* Allocate a port, with a specific name */
 extern kern_return_t ipc_port_alloc_name(
        ipc_space_t             space,
+       ipc_port_init_flags_t   flags,
        mach_port_name_t        name,
        ipc_port_t              *portp);
 
@@ -559,8 +568,7 @@ ipc_port_adjust_sync_link_state_locked(
 void
 ipc_port_adjust_special_reply_port(
        ipc_port_t special_reply_port,
-       uint8_t flags,
-       boolean_t get_turnstile);
+       uint8_t flags);
 
 void
 ipc_port_adjust_port_locked(
@@ -686,7 +694,8 @@ extern void ipc_port_finalize(
 
 /* Allocate a port in a special space */
 extern ipc_port_t ipc_port_alloc_special(
-       ipc_space_t     space);
+       ipc_space_t             space,
+       ipc_port_init_flags_t   flags);
 
 /* Deallocate a port in a special space */
 extern void ipc_port_dealloc_special(
@@ -711,12 +720,12 @@ extern void ipc_port_send_update_inheritor(ipc_port_t port,
     turnstile_update_flags_t flags);
 
 #define ipc_port_alloc_kernel()         \
-               ipc_port_alloc_special(ipc_space_kernel)
+               ipc_port_alloc_special(ipc_space_kernel, IPC_PORT_INIT_NONE)
 #define ipc_port_dealloc_kernel(port)   \
                ipc_port_dealloc_special((port), ipc_space_kernel)
 
 #define ipc_port_alloc_reply()          \
-               ipc_port_alloc_special(ipc_space_reply)
+               ipc_port_alloc_special(ipc_space_reply, IPC_PORT_INIT_MESSAGE_QUEUE)
 #define ipc_port_dealloc_reply(port)    \
                ipc_port_dealloc_special((port), ipc_space_reply)
 
index 523c496600894c3a89d45013e8b6b53a129bb5e2..e73364b485e85e3d0060636a476d3a7804e2e0d5 100644 (file)
@@ -110,7 +110,7 @@ ipc_pset_alloc(
        }
        /* pset and space are locked */
 
-       ipc_mqueue_init(&pset->ips_messages, TRUE /* set */);
+       ipc_mqueue_init(&pset->ips_messages, IPC_MQUEUE_KIND_SET);
        is_write_unlock(space);
 
        *namep = name;
@@ -149,7 +149,7 @@ ipc_pset_alloc_name(
        }
        /* pset is locked */
 
-       ipc_mqueue_init(&pset->ips_messages, TRUE /* set */);
+       ipc_mqueue_init(&pset->ips_messages, IPC_MQUEUE_KIND_SET);
 
        *psetp = pset;
        return KERN_SUCCESS;
@@ -186,7 +186,7 @@ ipc_pset_alloc_special(
        pset->ips_references = 1;
        pset->ips_object.io_bits = io_makebits(TRUE, IOT_PORT_SET, 0);
 
-       ipc_mqueue_init(&pset->ips_messages, TRUE /* set */);
+       ipc_mqueue_init(&pset->ips_messages, IPC_MQUEUE_KIND_SET);
 
        return pset;
 }
index e4a901230a1b3f5bef49d3332e783842e920f9a7..f367c8e00c826b2189bcdbd64d2b1b6780b15489 100644 (file)
@@ -667,7 +667,7 @@ mach_msg_receive_results_complete(ipc_object_t object)
 {
        thread_t self = current_thread();
        ipc_port_t port = IPC_PORT_NULL;
-       boolean_t get_turnstile = self->turnstile ? FALSE : TRUE;
+       boolean_t get_turnstile = (self->turnstile == TURNSTILE_NULL);
 
        if (io_otype(object) == IOT_PORT) {
                port = ip_object_to_port(object);
@@ -689,8 +689,12 @@ mach_msg_receive_results_complete(ipc_object_t object)
                flags |= IPC_PORT_ADJUST_SR_RECEIVED_MSG;
        }
 
-       ipc_port_adjust_special_reply_port(port,
-           flags, get_turnstile);
+       if (port->ip_specialreply || get_turnstile) {
+               ip_lock(port);
+               ipc_port_adjust_special_reply_port_locked(port, NULL,
+                   flags, get_turnstile);
+       }
+       assert(self->turnstile != TURNSTILE_NULL);
        /* thread now has a turnstile */
 }
 
index 9f4d8b677246bfa3c8584d257936f7a35b800e08..af47faa37eb5da33dc403780044ce19c4a8869c8 100644 (file)
@@ -725,9 +725,11 @@ mach_port_allocate_internal(
                }
 
                if (qosp->name) {
-                       kr = ipc_port_alloc_name(space, *namep, &port);
+                       kr = ipc_port_alloc_name(space, IPC_PORT_INIT_MESSAGE_QUEUE,
+                           *namep, &port);
                } else {
-                       kr = ipc_port_alloc(space, FALSE, namep, &port);
+                       kr = ipc_port_alloc(space, IPC_PORT_INIT_MESSAGE_QUEUE,
+                           namep, &port);
                }
                if (kr == KERN_SUCCESS) {
                        if (kmsg != IKM_NULL) {
@@ -2499,14 +2501,18 @@ mach_port_construct(
 {
        kern_return_t           kr;
        ipc_port_t              port;
+       ipc_port_init_flags_t   init_flags = IPC_PORT_INIT_MESSAGE_QUEUE;
 
        if (space == IS_NULL) {
                return KERN_INVALID_TASK;
        }
 
+       if (options->flags & MPO_INSERT_SEND_RIGHT) {
+               init_flags |= IPC_PORT_INIT_MAKE_SEND_RIGHT;
+       }
+
        /* Allocate a new port in the IPC space */
-       kr = ipc_port_alloc(space, (options->flags & MPO_INSERT_SEND_RIGHT),
-           name, &port);
+       kr = ipc_port_alloc(space, init_flags, name, &port);
        if (kr != KERN_SUCCESS) {
                return kr;
        }
index 82daadce69ac429ec5bdac663b793653e76029ec..59667c828222f44c7f6a472a00d8dce8849a4817 100644 (file)
@@ -329,7 +329,7 @@ backtrace_thread_user(void *thread, uintptr_t *bt, unsigned int max_frames,
 
        assert(ml_get_interrupts_enabled() == TRUE);
        if (!ml_get_interrupts_enabled()) {
-               return EINVAL;
+               goto out;
        }
 
        union {
@@ -349,7 +349,7 @@ backtrace_thread_user(void *thread, uintptr_t *bt, unsigned int max_frames,
        if (thread != current_thread()) {
                map = get_task_map_reference(get_threadtask(thread));
                if (map == NULL) {
-                       return EINVAL;
+                       goto out;
                }
                old_map = vm_map_switch(map);
        } else {
index 57effae1ccecd86795c68527a8d2a48c28b26dba..981eb3619d41f86e34cb0c41ebe299e8b053090e 100644 (file)
@@ -271,6 +271,8 @@ enum {
 #define KF_STACKSHOT_OVRD (0x10)
 #define KF_COMPRSV_OVRD (0x20)
 #define KF_INTERRUPT_MASKED_DEBUG_OVRD (0x40)
+#define KF_TRAPTRACE_OVRD (0x80)
+#define KF_IOTRACE_OVRD (0x100)
 
 boolean_t kern_feature_override(uint32_t fmask);
 
index e336fcc09f1f6edaa183ede2bb0e087ce4c9d439..a4a617d32c7d55e046b1088332d3aae01c77670b 100644 (file)
@@ -1278,6 +1278,10 @@ kernel_set_special_port(host_priv_t host_priv, int id, ipc_port_t port)
 
        host_lock(host_priv);
        old_port = host_priv->special[id];
+       if ((id == HOST_AMFID_PORT) && (task_pid(current_task()) != 1)) {
+               host_unlock(host_priv);
+               return KERN_NO_ACCESS;
+       }
        host_priv->special[id] = port;
        host_unlock(host_priv);
 
index 7d0384cf272ea27fed789306480749e068b19e43..8af9d9cb20d09eb1541f90a14d130efb1773e02a 100644 (file)
@@ -868,7 +868,8 @@ mach_reply_port(
        mach_port_name_t name;
        kern_return_t kr;
 
-       kr = ipc_port_alloc(current_task()->itk_space, FALSE, &name, &port);
+       kr = ipc_port_alloc(current_task()->itk_space, IPC_PORT_INIT_MESSAGE_QUEUE,
+           &name, &port);
        if (kr == KERN_SUCCESS) {
                ip_unlock(port);
        } else {
@@ -897,6 +898,8 @@ thread_get_special_reply_port(
        mach_port_name_t name;
        kern_return_t kr;
        thread_t thread = current_thread();
+       ipc_port_init_flags_t flags = IPC_PORT_INIT_MESSAGE_QUEUE |
+           IPC_PORT_INIT_MAKE_SEND_RIGHT | IPC_PORT_INIT_SPECIAL_REPLY;
 
        /* unbind the thread special reply port */
        if (IP_VALID(thread->ith_special_reply_port)) {
@@ -906,7 +909,7 @@ thread_get_special_reply_port(
                }
        }
 
-       kr = ipc_port_alloc(current_task()->itk_space, TRUE, &name, &port);
+       kr = ipc_port_alloc(current_task()->itk_space, flags, &name, &port);
        if (kr == KERN_SUCCESS) {
                ipc_port_bind_special_reply_port_locked(port);
                ip_unlock(port);
@@ -932,11 +935,11 @@ ipc_port_bind_special_reply_port_locked(
 {
        thread_t thread = current_thread();
        assert(thread->ith_special_reply_port == NULL);
+       assert(port->ip_specialreply);
+       assert(port->ip_sync_link_state == PORT_SYNC_LINK_ANY);
 
        ip_reference(port);
        thread->ith_special_reply_port = port;
-       port->ip_specialreply = 1;
-       port->ip_sync_link_state = PORT_SYNC_LINK_ANY;
        port->ip_messages.imq_srp_owner_thread = thread;
 
        ipc_special_reply_port_bits_reset(port);
@@ -1386,6 +1389,8 @@ mach_ports_lookup(
        return KERN_SUCCESS;
 }
 
+extern zone_t task_zone;
+
 kern_return_t
 task_conversion_eval(task_t caller, task_t victim)
 {
@@ -1409,6 +1414,8 @@ task_conversion_eval(task_t caller, task_t victim)
                return KERN_INVALID_SECURITY;
        }
 
+       zone_require(victim, task_zone);
+
 #if CONFIG_EMBEDDED
        /*
         * On embedded platforms, only a platform binary can resolve the task port
index ffe8d7658ae754edfaa683d4330d3d5151a98fd7..26176bc3615cb853ab69bfc51c3801efa25fe2d7 100644 (file)
@@ -192,8 +192,8 @@ KALLOC_ZINFO_SFREE(vm_size_t bytes)
  * 6144       N                    N                   N
  * 8192       Y                    N                   N
  * 12288      N                    X                   X
- * 16384      N                    N                   N
- * 32768      X                    N                   N
+ * 16384      N                    X                   N
+ * 32768      X                    X                   N
  *
  */
 static const struct kalloc_zone_config {
@@ -266,8 +266,15 @@ static const struct kalloc_zone_config {
        KZC_ENTRY(4096, false),
        KZC_ENTRY(6144, false),
        KZC_ENTRY(8192, false),
+       /* To limit internal fragmentation, only add the following zones if the
+        * page size is greater than 4K.
+        * Note that we use ARM_PGBYTES here (instead of one of the VM macros)
+        * since it's guaranteed to be a compile time constant.
+        */
+#if ARM_PGBYTES > 4096
        KZC_ENTRY(16384, false),
        KZC_ENTRY(32768, false),
+#endif /* ARM_PGBYTES > 4096 */
 
 #else
 #error missing or invalid zone size parameters for kalloc
index aa8edd434046d0612568f7119ea6ea2021076589..a2f1fe6a0d759369d124312ec04387da636c1ab0 100644 (file)
@@ -116,7 +116,7 @@ kpc_set_thread_counting(uint32_t classes)
 
                /* and schedule an AST for this thread... */
                if (!current_thread()->kpc_buf) {
-                       current_thread()->kperf_flags |= T_KPC_ALLOC;
+                       current_thread()->kperf_ast |= T_KPC_ALLOC;
                        act_set_kperf(current_thread());
                }
        }
@@ -150,7 +150,7 @@ kpc_update_thread_counters( thread_t thread )
 
        /* schedule any necessary allocations */
        if (!current_thread()->kpc_buf) {
-               current_thread()->kperf_flags |= T_KPC_ALLOC;
+               current_thread()->kperf_ast |= T_KPC_ALLOC;
                act_set_kperf(current_thread());
        }
 
@@ -234,12 +234,10 @@ kpc_thread_destroy(thread_t thread)
        kpc_counterbuf_free(buf);
 }
 
-/* ast callback on a thread */
 void
-kpc_thread_ast_handler( thread_t thread )
+kpc_thread_ast_handler(thread_t thread)
 {
-       /* see if we want an alloc */
-       if (thread->kperf_flags & T_KPC_ALLOC) {
+       if (thread->kperf_ast & T_KPC_ALLOC) {
                thread->kpc_buf = kpc_counterbuf_alloc();
        }
 }
index a0d9258723e56353e20ea92345d272f593fa152e..e905ee666a1862db6fb2c6e3d12ab29bb1feaf5d 100644 (file)
@@ -448,7 +448,7 @@ ledger_instantiate(ledger_template_t template, int entry_type)
                le->le_credit        = 0;
                le->le_debit         = 0;
                le->le_limit         = LEDGER_LIMIT_INFINITY;
-               le->le_warn_level    = LEDGER_LIMIT_INFINITY;
+               le->le_warn_percent  = LEDGER_PERCENT_NONE;
                le->_le.le_refill.le_refill_period = 0;
                le->_le.le_refill.le_last_refill   = 0;
        }
@@ -521,7 +521,8 @@ warn_level_exceeded(struct ledger_entry *le)
         * use positive limits.
         */
        balance = le->le_credit - le->le_debit;
-       if ((le->le_warn_level != LEDGER_LIMIT_INFINITY) && (balance > le->le_warn_level)) {
+       if (le->le_warn_percent != LEDGER_PERCENT_NONE &&
+           ((balance > (le->le_limit * le->le_warn_percent) >> 16))) {
                return 1;
        }
        return 0;
@@ -987,9 +988,9 @@ ledger_set_limit(ledger_t ledger, int entry, ledger_amount_t limit,
                assert(warn_level_percentage <= 100);
                assert(limit > 0); /* no negative limit support for warnings */
                assert(limit != LEDGER_LIMIT_INFINITY); /* warn % without limit makes no sense */
-               le->le_warn_level = (le->le_limit * warn_level_percentage) / 100;
+               le->le_warn_percent = warn_level_percentage * (1u << 16) / 100;
        } else {
-               le->le_warn_level = LEDGER_LIMIT_INFINITY;
+               le->le_warn_percent = LEDGER_PERCENT_NONE;
        }
 
        return KERN_SUCCESS;
@@ -1145,12 +1146,12 @@ ledger_disable_callback(ledger_t ledger, int entry)
        }
 
        /*
-        * le_warn_level is used to indicate *if* this ledger has a warning configured,
+        * le_warn_percent is used to indicate *if* this ledger has a warning configured,
         * in addition to what that warning level is set to.
         * This means a side-effect of ledger_disable_callback() is that the
         * warning level is forgotten.
         */
-       ledger->l_entries[entry].le_warn_level = LEDGER_LIMIT_INFINITY;
+       ledger->l_entries[entry].le_warn_percent = LEDGER_PERCENT_NONE;
        flag_clear(&ledger->l_entries[entry].le_flags, LEDGER_ACTION_CALLBACK);
        return KERN_SUCCESS;
 }
index 9be77bb0c282192605e3396fef0b7cbc5b14d2e1..e3a2ec2e631b826e43fef8fbb618ff8bf5c1a0c3 100644 (file)
@@ -72,8 +72,9 @@ struct ledger_template_info {
  */
 struct ledger_entry {
        volatile uint32_t        le_flags;
+#define LEDGER_PERCENT_NONE  UINT16_MAX
+       uint16_t                 le_warn_percent;
        ledger_amount_t          le_limit;
-       ledger_amount_t          le_warn_level;
        volatile ledger_amount_t le_credit __attribute__((aligned(8)));
        volatile ledger_amount_t le_debit  __attribute__((aligned(8)));
        union {
index c4e8347f1a2b3d615490907162d5ecca2fc52b1e..d0d03cf62d783a088480b5d778cc3a6d45f2f853 100644 (file)
@@ -302,14 +302,14 @@ mach_node_register(mach_node_t  node)
        proxy_space->is_node_id = nid;
 
        /* Create the bootstrap proxy port for this remote node */
-       bs_port = ipc_port_alloc_special(proxy_space);
+       bs_port = ipc_port_alloc_special(proxy_space, IPC_PORT_INIT_MESSAGE_QUEUE);
        if (bs_port == MACH_PORT_NULL) {
                kr = KERN_RESOURCE_SHORTAGE;
                goto out;
        }
 
        /* Create the control (ack) port for this remote node */
-       ack_port = ipc_port_alloc_special(proxy_space);
+       ack_port = ipc_port_alloc_special(proxy_space, IPC_PORT_INIT_MESSAGE_QUEUE);
        if (ack_port == MACH_PORT_NULL) {
                kr = KERN_RESOURCE_SHORTAGE;
                goto out;
index c59175da0edf2cceab9a7d32bd5c4ce73b7f3cb5..c312e0b4e04371d566984e16266c35102fe97a06 100644 (file)
@@ -6267,3 +6267,10 @@ sysctl_task_get_no_smt(void)
        return '0';
 }
 #endif /* DEVELOPMENT || DEBUG */
+
+
+__private_extern__ void
+thread_bind_cluster_type(char cluster_type)
+{
+       (void)cluster_type;
+}
index 2f806bdd0ce1c01b4cc78a2431efa3bc47548f4c..880e849605f091339a7878dac1771549882c5579 100644 (file)
@@ -454,6 +454,8 @@ __BEGIN_DECLS
 
 #ifdef  XNU_KERNEL_PRIVATE
 
+extern void thread_bind_cluster_type(char cluster_type);
+
 /* Toggles a global override to turn off CPU Throttling */
 extern void     sys_override_cpu_throttle(boolean_t enable_override);
 
index d01037df8ba9f2e4794892bb49fa0e91f5f215ca..917ae6db5c3823e5500820dad3d931080fd5741b 100644 (file)
@@ -214,7 +214,6 @@ extern int serverperfmode;
 unsigned int new_nkdbufs = 0;
 unsigned int wake_nkdbufs = 0;
 unsigned int write_trace_on_panic = 0;
-static char trace_typefilter[64] = { 0 };
 unsigned int trace_wrap = 0;
 boolean_t trace_serial = FALSE;
 boolean_t early_boot_complete = FALSE;
@@ -269,7 +268,6 @@ kernel_bootstrap(void)
        PE_parse_boot_argn("trace", &new_nkdbufs, sizeof(new_nkdbufs));
        PE_parse_boot_argn("trace_wake", &wake_nkdbufs, sizeof(wake_nkdbufs));
        PE_parse_boot_argn("trace_panic", &write_trace_on_panic, sizeof(write_trace_on_panic));
-       PE_parse_boot_arg_str("trace_typefilter", trace_typefilter, sizeof(trace_typefilter));
        PE_parse_boot_argn("trace_wrap", &trace_wrap, sizeof(trace_wrap));
 
        scale_setup();
@@ -556,6 +554,9 @@ kernel_bootstrap_thread(void)
        kernel_bootstrap_thread_log("ktrace_init");
        ktrace_init();
 
+       char trace_typefilter[256] = {};
+       PE_parse_boot_arg_str("trace_typefilter", trace_typefilter,
+           sizeof(trace_typefilter));
        kdebug_init(new_nkdbufs, trace_typefilter, trace_wrap);
 
 #ifdef  MACH_BSD
index 0374456e1090a163788a54542d0993dd1650b755..ebd6c2bb2ec5eb8fdee16c935cd542462d93b58b 100644 (file)
@@ -1640,6 +1640,7 @@ task_create_internal(
 #if __arm64__
        new_task->task_legacy_footprint = FALSE;
        new_task->task_extra_footprint_limit = FALSE;
+       new_task->task_ios13extended_footprint_limit = FALSE;
 #endif /* __arm64__ */
        new_task->task_region_footprint = FALSE;
        new_task->task_has_crossed_thread_limit = FALSE;
@@ -7312,6 +7313,7 @@ task_set_exc_guard_behavior(
 #if __arm64__
 extern int legacy_footprint_entitlement_mode;
 extern void memorystatus_act_on_legacy_footprint_entitlement(proc_t, boolean_t);
+extern void memorystatus_act_on_ios13extended_footprint_entitlement(proc_t);
 
 void
 task_set_legacy_footprint(
@@ -7330,11 +7332,30 @@ task_set_extra_footprint_limit(
                return;
        }
        task_lock(task);
-       if (!task->task_extra_footprint_limit) {
-               memorystatus_act_on_legacy_footprint_entitlement(task->bsd_info, TRUE);
-               task->task_extra_footprint_limit = TRUE;
+       if (task->task_extra_footprint_limit) {
+               task_unlock(task);
+               return;
+       }
+       task->task_extra_footprint_limit = TRUE;
+       task_unlock(task);
+       memorystatus_act_on_legacy_footprint_entitlement(task->bsd_info, TRUE);
+}
+
+void
+task_set_ios13extended_footprint_limit(
+       task_t task)
+{
+       if (task->task_ios13extended_footprint_limit) {
+               return;
+       }
+       task_lock(task);
+       if (task->task_ios13extended_footprint_limit) {
+               task_unlock(task);
+               return;
        }
+       task->task_ios13extended_footprint_limit = TRUE;
        task_unlock(task);
+       memorystatus_act_on_ios13extended_footprint_entitlement(task->bsd_info);
 }
 #endif /* __arm64__ */
 
index 1db8fb09d4bf01fd4767d227312532f933c44098..e47bb217c0aa9dc1f3d7d8ef9c9ba2871afb6e46 100644 (file)
@@ -427,6 +427,7 @@ struct task {
 #if __arm64__
        unsigned int    task_legacy_footprint:1;
        unsigned int    task_extra_footprint_limit:1;
+       unsigned int    task_ios13extended_footprint_limit:1;
 #endif /* __arm64__ */
        unsigned int    task_region_footprint:1;
        unsigned int    task_has_crossed_thread_limit:1;
@@ -1004,6 +1005,7 @@ extern boolean_t task_get_darkwake_mode(task_t);
 #if __arm64__
 extern void task_set_legacy_footprint(task_t task);
 extern void task_set_extra_footprint_limit(task_t task);
+extern void task_set_ios13extended_footprint_limit(task_t task);
 #endif /* __arm64__ */
 
 #if CONFIG_MACF
index f554222b137269053ffc505e5dfddfa2d3cbae52..e26f4dced01dce5ccd0295f5203003bc489967f5 100644 (file)
@@ -356,7 +356,7 @@ thread_bootstrap(void)
 #endif /* CONFIG_DTRACE */
 
 #if KPERF
-       thread_template.kperf_flags = 0;
+       thread_template.kperf_ast = 0;
        thread_template.kperf_pet_gen = 0;
        thread_template.kperf_c_switch = 0;
        thread_template.kperf_pet_cnt = 0;
index 83346637674f16dcdce945d6f1b05f8499c45be4..7242faac767d8dcf2c26cf449b635c63b0668034 100644 (file)
@@ -178,9 +178,8 @@ struct thread {
                struct priority_queue_entry     wait_prioq_links;       /* priority ordered waitq links */
        };
 
-       processor_t             runq;           /* run queue assignment */
-
        event64_t               wait_event;     /* wait queue event */
+       processor_t             runq;           /* run queue assignment */
        struct waitq           *waitq;          /* wait queue this thread is enqueued on */
        struct turnstile       *turnstile;      /* thread's turnstile, protected by primitives interlock */
        void                   *inheritor;      /* inheritor of the primitive the thread will block on */
@@ -204,7 +203,7 @@ struct thread {
        decl_simple_lock_data(, sched_lock);     /* scheduling lock (thread_lock()) */
        decl_simple_lock_data(, wake_lock);      /* for thread stop / wait (wake_lock()) */
 #endif
-       integer_t               options;                        /* options set by thread itself */
+       uint16_t                options;                        /* options set by thread itself */
 #define TH_OPT_INTMASK          0x0003          /* interrupt / abort level */
 #define TH_OPT_VMPRIV           0x0004          /* may allocate reserved memory */
 #define TH_OPT_SYSTEM_CRITICAL  0x0010          /* Thread must always be allowed to run - even under heavy load */
@@ -217,8 +216,8 @@ struct thread {
 #define TH_OPT_SEND_IMPORTANCE  0x0800          /* Thread will allow importance donation from kernel rpc */
 #define TH_OPT_ZONE_GC          0x1000          /* zone_gc() called on this thread */
 
-       boolean_t                       wake_active;    /* wake event on stop */
-       int                                     at_safe_point;  /* thread_abort_safely allowed */
+       bool                            wake_active;    /* wake event on stop */
+       bool                            at_safe_point;  /* thread_abort_safely allowed */
        ast_t                           reason;                 /* why we blocked */
        uint32_t                        quantum_remaining;
        wait_result_t                   wait_result;    /* outcome of wait -
@@ -349,9 +348,8 @@ struct thread {
        uint64_t                        safe_release;   /* when to release fail-safe */
 
        /* Call out from scheduler */
-       void                            (*sched_call)(
-               int                     type,
-               thread_t        thread);
+       void                            (*sched_call)(int type, thread_t thread);
+
 #if defined(CONFIG_SCHED_PROTO)
        uint32_t                        runqueue_generation;    /* last time runqueue was drained */
 #endif
@@ -388,18 +386,16 @@ struct thread {
        uint64_t                wait_sfi_begin_time;    /* start time for thread waiting in SFI */
 #endif
 
-       /* Timed wait expiration */
-       timer_call_data_t       wait_timer;
-       integer_t                       wait_timer_active;
-       boolean_t                       wait_timer_is_set;
-
-
        /*
         * Processor/cache affinity
         * - affinity_threads links task threads with the same affinity set
         */
-       affinity_set_t                  affinity_set;
        queue_chain_t                   affinity_threads;
+       affinity_set_t                  affinity_set;
+
+#if CONFIG_EMBEDDED
+       task_watch_t *  taskwatch;              /* task watch */
+#endif /* CONFIG_EMBEDDED */
 
        /* Various bits of state to stash across a continuation, exclusive to the current thread block point */
        union {
@@ -407,7 +403,7 @@ struct thread {
                        mach_msg_return_t       state;          /* receive state */
                        mach_port_seqno_t       seqno;          /* seqno of recvd message */
                        ipc_object_t            object;         /* object received on */
-                       mach_vm_address_t       msg_addr;       /* receive buffer pointer */
+                       vm_address_t            msg_addr;       /* receive buffer pointer */
                        mach_msg_size_t         rsize;          /* max size for recvd msg */
                        mach_msg_size_t         msize;          /* actual size for recvd msg */
                        mach_msg_option_t       option;         /* options for receive */
@@ -463,26 +459,28 @@ struct thread {
        struct ipc_kmsg_queue ith_messages;             /* messages to reap */
        mach_port_t ith_rpc_reply;                      /* reply port for kernel RPCs */
 
+       /* Pending thread ast(s) */
+       ast_t                                   ast;
+
        /* Ast/Halt data structures */
-       vm_offset_t                                     recover;                /* page fault recover(copyin/out) */
+       vm_offset_t                             recover;                /* page fault recover(copyin/out) */
 
        queue_chain_t                           threads;                /* global list of all threads */
 
        /* Activation */
-       queue_chain_t                   task_threads;
+       queue_chain_t                           task_threads;
 
        /* Task membership */
        struct task                             *task;
        vm_map_t                                map;
 #if DEVELOPMENT || DEBUG
-       boolean_t pmap_footprint_suspended;
+       bool      pmap_footprint_suspended;
 #endif /* DEVELOPMENT || DEBUG */
 
-       decl_lck_mtx_data(, mutex);
-
-
-       /* Pending thread ast(s) */
-       ast_t                                   ast;
+       /* Timed wait expiration */
+       timer_call_data_t       wait_timer;
+       uint16_t                wait_timer_active;
+       bool                    wait_timer_is_set;
 
        /* Miscellaneous bits guarded by mutex */
        uint32_t
@@ -495,6 +493,8 @@ struct thread {
            corpse_dup:1,                               /* TRUE when thread is an inactive duplicate in a corpse */
        :0;
 
+       decl_lck_mtx_data(, mutex);
+
        /* Ports associated with this thread */
        struct ipc_port                 *ith_self;                      /* not a right, doesn't hold ref */
        struct ipc_port                 *ith_sself;                     /* a send right */
@@ -528,15 +528,21 @@ struct thread {
 #define T_KPERF_CALLSTACK_DEPTH_OFFSET     (24)
 #define T_KPERF_SET_CALLSTACK_DEPTH(DEPTH) (((uint32_t)(DEPTH)) << T_KPERF_CALLSTACK_DEPTH_OFFSET)
 #define T_KPERF_GET_CALLSTACK_DEPTH(FLAGS) ((FLAGS) >> T_KPERF_CALLSTACK_DEPTH_OFFSET)
+#define T_KPERF_ACTIONID_OFFSET            (18)
+#define T_KPERF_SET_ACTIONID(AID)          (((uint32_t)(AID)) << T_KPERF_ACTIONID_OFFSET)
+#define T_KPERF_GET_ACTIONID(FLAGS)        ((FLAGS) >> T_KPERF_ACTIONID_OFFSET)
 #endif
 
-#define T_KPERF_AST_CALLSTACK (1U << 0) /* dump a callstack on thread's next AST */
-#define T_KPERF_AST_DISPATCH  (1U << 1) /* dump a name on thread's next AST */
-#define T_KPC_ALLOC           (1U << 2) /* thread needs a kpc_buf allocated */
-/* only go up to T_KPERF_CALLSTACK_DEPTH_OFFSET - 1 */
+#define T_KPERF_AST_CALLSTACK 0x1 /* dump a callstack on thread's next AST */
+#define T_KPERF_AST_DISPATCH  0x2 /* dump a name on thread's next AST */
+#define T_KPC_ALLOC           0x4 /* thread needs a kpc_buf allocated */
+
+#define T_KPERF_AST_ALL \
+    (T_KPERF_AST_CALLSTACK | T_KPERF_AST_DISPATCH | T_KPC_ALLOC)
+/* only go up to T_KPERF_ACTIONID_OFFSET - 1 */
 
 #ifdef KPERF
-       uint32_t kperf_flags;
+       uint32_t kperf_ast;
        uint32_t kperf_pet_gen;  /* last generation of PET that sampled this thread*/
        uint32_t kperf_c_switch; /* last dispatch detection */
        uint32_t kperf_pet_cnt;  /* how many times a thread has been sampled by PET */
@@ -552,8 +558,6 @@ struct thread {
        void *hv_thread_target;
 #endif /* HYPERVISOR */
 
-       uint64_t thread_id;             /*system wide unique thread-id*/
-
        /* Statistics accumulated per-thread and aggregated per-task */
        uint32_t                syscalls_unix;
        uint32_t                syscalls_mach;
@@ -563,6 +567,8 @@ struct thread {
        uint64_t                t_deduct_bank_ledger_time;   /* cpu time to be deducted from bank ledger */
        uint64_t                t_deduct_bank_ledger_energy; /* energy to be deducted from bank ledger */
 
+       uint64_t thread_id;             /*system wide unique thread-id*/
+
 #if MONOTONIC
        struct mt_thread t_monotonic;
 #endif /* MONOTONIC */
@@ -584,16 +590,12 @@ struct thread {
        } *overrides;
 
        uint32_t        kevent_overrides;
-       uint16_t        user_promotion_basepri;
-       uint16_t         kern_promotion_schedpri;
+       uint8_t         user_promotion_basepri;
+       uint8_t         kern_promotion_schedpri;
        _Atomic uint16_t kevent_ast_bits;
 
        io_stat_info_t                  thread_io_stats; /* per-thread I/O statistics */
 
-#if CONFIG_EMBEDDED
-       task_watch_t *  taskwatch;              /* task watch */
-#endif /* CONFIG_EMBEDDED */
-
        uint32_t                        thread_callout_interrupt_wakeups;
        uint32_t                        thread_callout_platform_idle_wakeups;
        uint32_t                        thread_timer_wakeups_bin_1;
index 2ac35e41415d19e5c4789d05637f28ad1f27cfcc..eeb8eebdb5bcee21fdc976ae4d4eacc8db0a96cb 100644 (file)
@@ -86,6 +86,12 @@ extern int precise_user_kernel_time;
  * Definitions for high resolution timers.
  */
 
+#if __LP64__
+#define TIMER_ALIGNMENT
+#else
+#define TIMER_ALIGNMENT __attribute__((packed, aligned(4)))
+#endif
+
 struct timer {
        uint64_t tstamp;
 #if defined(__LP64__)
@@ -96,7 +102,7 @@ struct timer {
        uint32_t high_bits;
        uint32_t high_bits_check;
 #endif /* !defined(__LP64__) */
-};
+} TIMER_ALIGNMENT;
 
 typedef struct timer timer_data_t, *timer_t;
 
index 6375a3704da93dc0209f16689d87a0fa6588aa7c..e2b189dcb103207b98c9c256cdbd90abce582486 100644 (file)
@@ -1896,9 +1896,8 @@ thread_get_waiting_turnstile(thread_t thread)
                return turnstile;
        }
 
-       /* Get the safeq if the waitq is a port queue */
-       if (waitq_is_port_queue(waitq)) {
-               waitq = waitq_get_safeq(waitq);
+       if (waitq_is_turnstile_proxy(waitq)) {
+               return waitq->waitq_ts;
        }
 
        /* Check if the waitq is a turnstile queue */
@@ -1952,8 +1951,11 @@ thread_get_update_flags_for_turnstile_propagation_stoppage(thread_t thread)
        }
 
        /* Get the safeq if the waitq is a port queue */
-       if (waitq_is_port_queue(waitq)) {
-               waitq = waitq_get_safeq(waitq);
+       if (waitq_is_turnstile_proxy(waitq)) {
+               if (waitq->waitq_ts) {
+                       return TSU_NO_PRI_CHANGE_NEEDED;
+               }
+               return TSU_NO_TURNSTILE;
        }
 
        /* Check if the waitq is a turnstile queue */
index 2348ef57209aa03d5f16ebe6649bccca03be47b3..e408f029b1606dc7346b91c5061f5390a33a9afb 100644 (file)
@@ -1809,19 +1809,29 @@ waitq_irq_safe(struct waitq *waitq)
        return waitq->waitq_irq;
 }
 
-struct waitq *
-waitq_get_safeq(struct waitq *waitq)
+static inline bool
+waitq_empty(struct waitq *wq)
 {
-       struct waitq *safeq;
+       if (waitq_is_turnstile_queue(wq)) {
+               return priority_queue_empty(&wq->waitq_prio_queue);
+       } else if (waitq_is_turnstile_proxy(wq)) {
+               struct turnstile *ts = wq->waitq_ts;
+               return ts == TURNSTILE_NULL ||
+                      priority_queue_empty(&ts->ts_waitq.waitq_prio_queue);
+       } else {
+               return queue_empty(&wq->waitq_queue);
+       }
+}
 
+static struct waitq *
+waitq_get_safeq(struct waitq *waitq)
+{
        /* Check if it's a port waitq */
-       if (waitq_is_port_queue(waitq)) {
-               assert(!waitq_irq_safe(waitq));
-               safeq = ipc_port_rcv_turnstile_waitq(waitq);
-       } else {
-               safeq = global_eventq(waitq);
+       if (waitq_is_turnstile_proxy(waitq)) {
+               struct turnstile *ts = waitq->waitq_ts;
+               return ts ? &ts->ts_waitq : NULL;
        }
-       return safeq;
+       return global_eventq(waitq);
 }
 
 static uint32_t
@@ -2387,6 +2397,15 @@ do_waitq_select_n_locked(struct waitq_select_args *args)
                /* JMM - add flag to waitq to avoid global lookup if no waiters */
                eventmask = _CAST_TO_EVENT_MASK(waitq);
                safeq = waitq_get_safeq(waitq);
+               if (safeq == NULL) {
+                       /*
+                        * in the WQT_TSPROXY case, if there's no turnstile,
+                        * there's no queue and no waiters, so we can move straight
+                        * to the waitq set recursion
+                        */
+                       goto handle_waitq_set;
+               }
+
                if (*nthreads == 0) {
                        spl = splsched();
                }
@@ -2464,6 +2483,7 @@ do_waitq_select_n_locked(struct waitq_select_args *args)
                return;
        }
 
+handle_waitq_set:
        /*
         * wait queues that are not in any sets
         * are the bottom of the recursion
@@ -2678,13 +2698,22 @@ waitq_select_thread_locked(struct waitq *waitq,
        kern_return_t kr;
        spl_t s;
 
-       s = splsched();
-
        /* Find and lock the interrupts disabled queue the thread is actually on */
        if (!waitq_irq_safe(waitq)) {
                safeq = waitq_get_safeq(waitq);
+               if (safeq == NULL) {
+                       /*
+                        * in the WQT_TSPROXY case, if there's no turnstile,
+                        * there's no queue and no waiters, so we can move straight
+                        * to the waitq set recursion
+                        */
+                       goto handle_waitq_set;
+               }
+
+               s = splsched();
                waitq_lock(safeq);
        } else {
+               s = splsched();
                safeq = waitq;
        }
 
@@ -2709,6 +2738,7 @@ waitq_select_thread_locked(struct waitq *waitq,
 
        splx(s);
 
+handle_waitq_set:
        if (!waitq->waitq_set_id) {
                return KERN_NOT_WAITING;
        }
@@ -2819,6 +2849,10 @@ waitq_assert_wait64_locked(struct waitq *waitq,
         */
        if (!waitq_irq_safe(waitq)) {
                safeq = waitq_get_safeq(waitq);
+               if (__improbable(safeq == NULL)) {
+                       panic("Trying to assert_wait on a turnstile proxy "
+                           "that hasn't been donated one (waitq: %p)", waitq);
+               }
                eventmask = _CAST_TO_EVENT_MASK(waitq);
                waitq_lock(safeq);
        } else {
@@ -2922,6 +2956,10 @@ waitq_pull_thread_locked(struct waitq *waitq, thread_t thread)
        /* Find the interrupts disabled queue thread is waiting on */
        if (!waitq_irq_safe(waitq)) {
                safeq = waitq_get_safeq(waitq);
+               if (__improbable(safeq == NULL)) {
+                       panic("Trying to clear_wait on a turnstile proxy "
+                           "that hasn't been donated one (waitq: %p)", waitq);
+               }
        } else {
                safeq = waitq;
        }
@@ -3246,8 +3284,12 @@ waitq_init(struct waitq *waitq, int policy)
        waitq->waitq_fifo = ((policy & SYNC_POLICY_REVERSED) == 0);
        waitq->waitq_irq = !!(policy & SYNC_POLICY_DISABLE_IRQ);
        waitq->waitq_prepost = 0;
-       waitq->waitq_type = WQT_QUEUE;
-       waitq->waitq_turnstile_or_port = !!(policy & SYNC_POLICY_TURNSTILE);
+       if (policy & SYNC_POLICY_TURNSTILE_PROXY) {
+               waitq->waitq_type = WQT_TSPROXY;
+       } else {
+               waitq->waitq_type = WQT_QUEUE;
+       }
+       waitq->waitq_turnstile = !!(policy & SYNC_POLICY_TURNSTILE);
        waitq->waitq_eventmask = 0;
 
        waitq->waitq_set_id = 0;
@@ -3259,6 +3301,9 @@ waitq_init(struct waitq *waitq, int policy)
                priority_queue_init(&waitq->waitq_prio_queue,
                    PRIORITY_QUEUE_BUILTIN_MAX_HEAP);
                assert(waitq->waitq_fifo == 0);
+       } else if (policy & SYNC_POLICY_TURNSTILE_PROXY) {
+               waitq->waitq_ts = TURNSTILE_NULL;
+               waitq->waitq_tspriv = NULL;
        } else {
                queue_init(&waitq->waitq_queue);
        }
@@ -3343,7 +3388,12 @@ waitq_deinit(struct waitq *waitq)
 {
        spl_t s;
 
-       if (!waitq || !waitq_is_queue(waitq)) {
+       assert(waitq);
+       if (!waitq_is_valid(waitq)) {
+               return;
+       }
+
+       if (!waitq_is_queue(waitq) && !waitq_is_turnstile_proxy(waitq)) {
                return;
        }
 
@@ -3351,25 +3401,33 @@ waitq_deinit(struct waitq *waitq)
                s = splsched();
        }
        waitq_lock(waitq);
-       if (!waitq_valid(waitq)) {
-               waitq_unlock(waitq);
-               if (waitq_irq_safe(waitq)) {
-                       splx(s);
+
+       if (waitq_valid(waitq)) {
+               waitq->waitq_isvalid = 0;
+               if (!waitq_irq_safe(waitq)) {
+                       waitq_unlink_all_unlock(waitq);
+                       /* waitq unlocked and set links deallocated */
+                       goto out;
                }
-               return;
        }
 
-       waitq->waitq_isvalid = 0;
-
-       if (!waitq_irq_safe(waitq)) {
-               waitq_unlink_all_unlock(waitq);
-               /* waitq unlocked and set links deallocated */
-       } else {
-               waitq_unlock(waitq);
+       waitq_unlock(waitq);
+       if (waitq_irq_safe(waitq)) {
                splx(s);
        }
 
-       assert(waitq_empty(waitq));
+out:
+#if MACH_ASSERT
+       if (waitq_is_turnstile_queue(waitq)) {
+               assert(priority_queue_empty(&waitq->waitq_prio_queue));
+       } else if (waitq_is_turnstile_proxy(waitq)) {
+               assert(waitq->waitq_ts == TURNSTILE_NULL);
+       } else {
+               assert(queue_empty(&waitq->waitq_queue));
+       }
+#else
+       (void)0;
+#endif // MACH_ASSERT
 }
 
 void
index 2d897573373730e0ca0ef4b116dd1e04ea68b890..efbdcc8830710f5f87aac23e47abee29b5ce64b5 100644 (file)
@@ -101,6 +101,7 @@ typedef enum e_waitq_lock_state {
 
 enum waitq_type {
        WQT_INVALID = 0,
+       WQT_TSPROXY = 0x1,
        WQT_QUEUE   = 0x2,
        WQT_SET     = 0x3,
 };
@@ -141,7 +142,7 @@ struct waitq {
            waitq_prepost:1,     /* waitq supports prepost? */
            waitq_irq:1,         /* waitq requires interrupts disabled */
            waitq_isvalid:1,     /* waitq structure is valid */
-           waitq_turnstile_or_port:1,     /* waitq is embedded in a turnstile (if irq safe), or port (if not irq safe) */
+           waitq_turnstile:1,   /* waitq is embedded in a turnstile */
            waitq_eventmask:_EVENT_MASK_BITS;
        /* the wait queue set (set-of-sets) to which this queue belongs */
 #if __arm64__
@@ -153,8 +154,12 @@ struct waitq {
        uint64_t waitq_set_id;
        uint64_t waitq_prepost_id;
        union {
-               queue_head_t            waitq_queue;            /* queue of elements */
-               struct priority_queue   waitq_prio_queue;       /* priority ordered queue of elements */
+               queue_head_t            waitq_queue;        /* queue of elements */
+               struct priority_queue   waitq_prio_queue;   /* priority ordered queue of elements */
+               struct {
+                       struct turnstile   *waitq_ts;           /* turnstile for WQT_TSPROXY */
+                       void               *waitq_tspriv;       /* private field for clients use */
+               };
        };
 };
 
@@ -184,11 +189,11 @@ extern void waitq_bootstrap(void);
 #define waitq_is_queue(wq) \
        ((wq)->waitq_type == WQT_QUEUE)
 
-#define waitq_is_turnstile_queue(wq) \
-       (((wq)->waitq_irq) && (wq)->waitq_turnstile_or_port)
+#define waitq_is_turnstile_proxy(wq) \
+       ((wq)->waitq_type == WQT_TSPROXY)
 
-#define waitq_is_port_queue(wq) \
-       (!((wq)->waitq_irq) && (wq)->waitq_turnstile_or_port)
+#define waitq_is_turnstile_queue(wq) \
+       (((wq)->waitq_irq) && (wq)->waitq_turnstile)
 
 #define waitq_is_set(wq) \
        ((wq)->waitq_type == WQT_SET && ((struct waitq_set *)(wq))->wqset_id != 0)
@@ -209,16 +214,6 @@ extern void waitq_bootstrap(void);
  */
 extern void waitq_invalidate_locked(struct waitq *wq);
 
-static inline boolean_t
-waitq_empty(struct waitq *wq)
-{
-       if (waitq_is_turnstile_queue(wq)) {
-               return priority_queue_empty(&(wq->waitq_prio_queue));
-       } else {
-               return queue_empty(&(wq->waitq_queue));
-       }
-}
-
 extern lck_grp_t waitq_lck_grp;
 
 #if __arm64__
@@ -466,8 +461,6 @@ extern int waitq_is_global(struct waitq *waitq);
 
 extern int waitq_irq_safe(struct waitq *waitq);
 
-extern struct waitq * waitq_get_safeq(struct waitq *waitq);
-
 #if CONFIG_WAITQ_STATS
 /*
  * waitq statistics
index 90d8e341f36f56242dcc28190150d12d5e4707c6..ae3951156951e11bb39eb91fe10f70579d0598fe 100644 (file)
@@ -120,6 +120,80 @@ kperf_system_memory_log(void)
            (uintptr_t)VM_PAGE_COMPRESSOR_COUNT);
 }
 
+static void
+kperf_sample_user_internal(struct kperf_usample *sbuf,
+    struct kperf_context *context, unsigned int actionid,
+    unsigned int sample_what)
+{
+       if (sample_what & SAMPLER_USTACK) {
+               kperf_ucallstack_sample(&sbuf->ucallstack, context);
+       }
+       if (sample_what & SAMPLER_TH_DISPATCH) {
+               kperf_thread_dispatch_sample(&sbuf->th_dispatch, context);
+       }
+       if (sample_what & SAMPLER_TH_INFO) {
+               kperf_thread_info_sample(&sbuf->th_info, context);
+       }
+
+       boolean_t intren = ml_set_interrupts_enabled(FALSE);
+
+       /*
+        * No userdata or sample_flags for this one.
+        */
+       BUF_DATA(PERF_GEN_EVENT | DBG_FUNC_START, sample_what, actionid);
+
+       if (sample_what & SAMPLER_USTACK) {
+               kperf_ucallstack_log(&sbuf->ucallstack);
+       }
+       if (sample_what & SAMPLER_TH_DISPATCH) {
+               kperf_thread_dispatch_log(&sbuf->th_dispatch);
+       }
+       if (sample_what & SAMPLER_TH_INFO) {
+               kperf_thread_info_log(&sbuf->th_info);
+       }
+
+       BUF_DATA(PERF_GEN_EVENT | DBG_FUNC_END, sample_what);
+
+       ml_set_interrupts_enabled(intren);
+}
+
+void
+kperf_sample_user(struct kperf_usample *sbuf, struct kperf_context *context,
+    unsigned int actionid, unsigned int sample_flags)
+{
+       if (actionid == 0 || actionid > actionc) {
+               return;
+       }
+
+       unsigned int sample_what = actionv[actionid - 1].sample;
+       unsigned int ucallstack_depth = actionv[actionid - 1].ucallstack_depth;
+
+       /* callstacks should be explicitly ignored */
+       if (sample_flags & SAMPLE_FLAG_EMPTY_CALLSTACK) {
+               sample_what &= ~(SAMPLER_KSTACK | SAMPLER_USTACK);
+       }
+       if (sample_flags & SAMPLE_FLAG_ONLY_SYSTEM) {
+               sample_what &= SAMPLER_SYS_MEM;
+       }
+       assert((sample_flags & (SAMPLE_FLAG_THREAD_ONLY | SAMPLE_FLAG_TASK_ONLY))
+           != (SAMPLE_FLAG_THREAD_ONLY | SAMPLE_FLAG_TASK_ONLY));
+       if (sample_flags & SAMPLE_FLAG_THREAD_ONLY) {
+               sample_what &= SAMPLER_THREAD_MASK;
+       }
+       if (sample_flags & SAMPLE_FLAG_TASK_ONLY) {
+               sample_what &= SAMPLER_TASK_MASK;
+       }
+
+       if (sample_what == 0) {
+               return;
+       }
+
+       sbuf->ucallstack.kpuc_nframes = ucallstack_depth ?:
+           MAX_UCALLSTACK_FRAMES;
+
+       kperf_sample_user_internal(sbuf, context, actionid, sample_what);
+}
+
 static kern_return_t
 kperf_sample_internal(struct kperf_sample *sbuf,
     struct kperf_context *context,
@@ -132,9 +206,6 @@ kperf_sample_internal(struct kperf_sample *sbuf,
        uint32_t userdata = actionid;
        bool task_only = false;
 
-       /* not much point continuing here, but what to do ? return
-        * Shutdown? cut a tracepoint and continue?
-        */
        if (sample_what == 0) {
                return SAMPLE_CONTINUE;
        }
@@ -170,14 +241,9 @@ kperf_sample_internal(struct kperf_sample *sbuf,
                sbuf->kcallstack.kpkc_nframes = MAX_KCALLSTACK_FRAMES;
        }
 
-       if (ucallstack_depth) {
-               sbuf->ucallstack.kpuc_nframes = ucallstack_depth;
-       } else {
-               sbuf->ucallstack.kpuc_nframes = MAX_UCALLSTACK_FRAMES;
-       }
-
+       ucallstack_depth = ucallstack_depth ?: MAX_UCALLSTACK_FRAMES;
        sbuf->kcallstack.kpkc_flags = 0;
-       sbuf->ucallstack.kpuc_flags = 0;
+       sbuf->usample.ucallstack.kpuc_flags = 0;
 
        if (sample_what & SAMPLER_TH_INFO) {
                kperf_thread_info_sample(&sbuf->th_info, context);
@@ -199,8 +265,8 @@ kperf_sample_internal(struct kperf_sample *sbuf,
        if (sample_what & SAMPLER_KSTACK) {
                if (sample_flags & SAMPLE_FLAG_CONTINUATION) {
                        kperf_continuation_sample(&(sbuf->kcallstack), context);
-                       /* outside of interrupt context, backtrace the current thread */
                } else if (sample_flags & SAMPLE_FLAG_NON_INTERRUPT) {
+                       /* outside of interrupt context, backtrace the current thread */
                        kperf_backtrace_sample(&(sbuf->kcallstack), context);
                } else {
                        kperf_kcallstack_sample(&(sbuf->kcallstack), context);
@@ -210,7 +276,6 @@ kperf_sample_internal(struct kperf_sample *sbuf,
                kperf_task_snapshot_sample(context->cur_task, &(sbuf->tk_snapshot));
        }
 
-       /* sensitive ones */
        if (!is_kernel) {
                if (sample_what & SAMPLER_MEMINFO) {
                        kperf_meminfo_sample(context->cur_task, &(sbuf->meminfo));
@@ -218,19 +283,13 @@ kperf_sample_internal(struct kperf_sample *sbuf,
 
                if (sample_flags & SAMPLE_FLAG_PEND_USER) {
                        if (sample_what & SAMPLER_USTACK) {
-                               pended_ucallstack = kperf_ucallstack_pend(context, sbuf->ucallstack.kpuc_nframes);
-                       }
-
-                       if (sample_what & SAMPLER_TH_DISPATCH) {
-                               pended_th_dispatch = kperf_thread_dispatch_pend(context);
-                       }
-               } else {
-                       if (sample_what & SAMPLER_USTACK) {
-                               kperf_ucallstack_sample(&(sbuf->ucallstack), context);
+                               pended_ucallstack = kperf_ucallstack_pend(context,
+                                   ucallstack_depth, actionid);
                        }
 
                        if (sample_what & SAMPLER_TH_DISPATCH) {
-                               kperf_thread_dispatch_sample(&(sbuf->th_dispatch), context);
+                               pended_th_dispatch =
+                                   kperf_thread_dispatch_pend(context, actionid);
                        }
                }
        }
@@ -307,14 +366,6 @@ log_sample:
                        if (pended_th_dispatch) {
                                BUF_INFO(PERF_TI_DISPPEND);
                        }
-               } else {
-                       if (sample_what & SAMPLER_USTACK) {
-                               kperf_ucallstack_log(&(sbuf->ucallstack));
-                       }
-
-                       if (sample_what & SAMPLER_TH_DISPATCH) {
-                               kperf_thread_dispatch_log(&(sbuf->th_dispatch));
-                       }
                }
        }
 
@@ -357,11 +408,11 @@ kperf_sample(struct kperf_sample *sbuf,
 
        /* the samplers to run */
        unsigned int sample_what = actionv[actionid - 1].sample;
+       unsigned int ucallstack_depth = actionv[actionid - 1].ucallstack_depth;
 
        /* do the actual sample operation */
        return kperf_sample_internal(sbuf, context, sample_what,
-                  sample_flags, actionid,
-                  actionv[actionid - 1].ucallstack_depth);
+                  sample_flags, actionid, ucallstack_depth);
 }
 
 void
@@ -412,11 +463,11 @@ __attribute__((noinline))
 void
 kperf_thread_ast_handler(thread_t thread)
 {
-       BUF_INFO(PERF_AST_HNDLR | DBG_FUNC_START, thread, kperf_get_thread_flags(thread));
+       uint32_t ast = thread->kperf_ast;
 
-       /* ~2KB of the stack for the sample since this is called from AST */
-       struct kperf_sample sbuf;
-       memset(&sbuf, 0, sizeof(struct kperf_sample));
+       BUF_INFO(PERF_AST_HNDLR | DBG_FUNC_START, thread, ast);
+
+       struct kperf_usample sbuf = {};
 
        task_t task = get_threadtask(thread);
 
@@ -425,49 +476,46 @@ kperf_thread_ast_handler(thread_t thread)
                return;
        }
 
-       /* make a context, take a sample */
        struct kperf_context ctx = {
                .cur_thread = thread,
                .cur_task = task,
                .cur_pid = task_pid(task),
        };
 
-       /* decode the flags to determine what to sample */
        unsigned int sample_what = 0;
-       uint32_t flags = kperf_get_thread_flags(thread);
-
-       if (flags & T_KPERF_AST_DISPATCH) {
+       if (ast & T_KPERF_AST_DISPATCH) {
                sample_what |= SAMPLER_TH_DISPATCH;
        }
-       if (flags & T_KPERF_AST_CALLSTACK) {
-               sample_what |= SAMPLER_USTACK;
-               sample_what |= SAMPLER_TH_INFO;
+       if (ast & T_KPERF_AST_CALLSTACK) {
+               /* TH_INFO for backwards compatibility */
+               sample_what |= SAMPLER_USTACK | SAMPLER_TH_INFO;
        }
 
-       uint32_t ucallstack_depth = T_KPERF_GET_CALLSTACK_DEPTH(flags);
-
-       int r = kperf_sample_internal(&sbuf, &ctx, sample_what, 0, 0, ucallstack_depth);
+       sbuf.ucallstack.kpuc_nframes =
+           T_KPERF_GET_CALLSTACK_DEPTH(ast) ?: MAX_UCALLSTACK_FRAMES;
+       unsigned int actionid = T_KPERF_GET_ACTIONID(ast);
+       kperf_sample_user_internal(&sbuf, &ctx, actionid, sample_what);
 
-       BUF_INFO(PERF_AST_HNDLR | DBG_FUNC_END, r);
+       BUF_INFO(PERF_AST_HNDLR | DBG_FUNC_END);
 }
 
-/* register AST bits */
 int
-kperf_ast_pend(thread_t thread, uint32_t set_flags)
+kperf_ast_pend(thread_t thread, uint32_t set_flags, unsigned int set_actionid)
 {
-       /* can only pend on the current thread */
        if (thread != current_thread()) {
-               panic("pending to non-current thread");
+               panic("kperf: pending AST to non-current thread");
        }
 
-       /* get our current bits */
-       uint32_t flags = kperf_get_thread_flags(thread);
+       uint32_t ast = thread->kperf_ast;
+       unsigned int actionid = T_KPERF_GET_ACTIONID(ast);
+       uint32_t flags = ast & T_KPERF_AST_ALL;
 
-       /* see if it's already been done or pended */
-       if (!(flags & set_flags)) {
-               /* set the bit on the thread */
-               flags |= set_flags;
-               kperf_set_thread_flags(thread, flags);
+       if ((flags | set_flags) != flags || actionid != set_actionid) {
+               ast &= ~T_KPERF_SET_ACTIONID(actionid);
+               ast |= T_KPERF_SET_ACTIONID(set_actionid);
+               ast |= set_flags;
+
+               thread->kperf_ast = ast;
 
                /* set the actual AST */
                act_set_kperf(thread);
@@ -480,14 +528,12 @@ kperf_ast_pend(thread_t thread, uint32_t set_flags)
 void
 kperf_ast_set_callstack_depth(thread_t thread, uint32_t depth)
 {
-       uint32_t ast_flags = kperf_get_thread_flags(thread);
-       uint32_t existing_callstack_depth =
-           T_KPERF_GET_CALLSTACK_DEPTH(ast_flags);
-
-       if (existing_callstack_depth < depth) {
-               ast_flags &= ~T_KPERF_SET_CALLSTACK_DEPTH(depth);
-               ast_flags |= T_KPERF_SET_CALLSTACK_DEPTH(depth);
-               kperf_set_thread_flags(thread, ast_flags);
+       uint32_t ast = thread->kperf_ast;
+       uint32_t existing_depth = T_KPERF_GET_CALLSTACK_DEPTH(ast);
+       if (existing_depth < depth) {
+               ast &= ~T_KPERF_SET_CALLSTACK_DEPTH(existing_depth);
+               ast |= T_KPERF_SET_CALLSTACK_DEPTH(depth);
+               thread->kperf_ast = ast;
        }
 }
 
@@ -689,6 +735,9 @@ kperf_action_set_ucallstack_depth(unsigned action_id, uint32_t depth)
        if (depth > MAX_UCALLSTACK_FRAMES) {
                return EINVAL;
        }
+       if (depth < 2) {
+               return EINVAL;
+       }
 
        actionv[action_id - 1].ucallstack_depth = depth;
 
@@ -705,6 +754,9 @@ kperf_action_set_kcallstack_depth(unsigned action_id, uint32_t depth)
        if (depth > MAX_KCALLSTACK_FRAMES) {
                return EINVAL;
        }
+       if (depth < 1) {
+               return EINVAL;
+       }
 
        actionv[action_id - 1].kcallstack_depth = depth;
 
index 420720b86ab6f2104fbc4889d1f9ad5f368edadc..f37d5dccd85d6bac8319268508adcf44c117b4c7 100644 (file)
@@ -33,8 +33,8 @@
 #include <stdint.h>
 #include <stdbool.h>
 
-/* fwd decl */
 struct kperf_sample;
+struct kperf_usample;
 struct kperf_context;
 
 /* bits for defining what to do on an action */
@@ -86,6 +86,12 @@ kern_return_t kperf_sample(struct kperf_sample *sbuf,
     unsigned actionid,
     unsigned sample_flags);
 
+/*
+ * Sample user space.
+ */
+void kperf_sample_user(struct kperf_usample *sbuf, struct kperf_context *ctx,
+    unsigned int actionid, unsigned int sample_flags);
+
 /* Whether the action provided samples non-system values. */
 bool kperf_action_has_non_system(unsigned actionid);
 bool kperf_action_has_thread(unsigned int actionid);
index d43ce88b4a3048a9db0294b493e7cca05fe709b3..f5b19d50825f9d03ea4039a01a5291e2e50df9d1 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2011-2019 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
-/* pend ast bits on a thread */
-extern int kperf_ast_pend(thread_t thread, uint32_t flags);
-extern void kperf_ast_set_callstack_depth(thread_t thread, uint32_t depth);
+/*
+ * Ensure that kperf is informed the next time this thread goes back to user
+ * space, to handle an action.
+ */
+int kperf_ast_pend(thread_t thread, uint32_t flags, unsigned int actionid);
+
+/*
+ * Set the depth for the user callstack sample.
+ */
+void kperf_ast_set_callstack_depth(thread_t thread, uint32_t depth);
index d6f0fb9a9deaab402ea4904e78de49581a32453d..4a38dd7c581523f94daa101d14397c91246ddaab 100644 (file)
@@ -254,7 +254,7 @@ kperf_backtrace_sample(struct kp_kcallstack *cs, struct kperf_context *context)
                cs->kpkc_nframes += 1;
        }
        if (trunc) {
-               cs->kpkc_nframes |= CALLSTACK_TRUNCATED;
+               cs->kpkc_flags |= CALLSTACK_TRUNCATED;
        }
 
        BUF_VERB(PERF_CS_BACKTRACE | DBG_FUNC_END, cs->kpkc_nframes);
@@ -437,12 +437,15 @@ kperf_ucallstack_log(struct kp_ucallstack *cs)
 }
 
 int
-kperf_ucallstack_pend(struct kperf_context * context, uint32_t depth)
+kperf_ucallstack_pend(struct kperf_context * context, uint32_t depth,
+    unsigned int actionid)
 {
-       int did_pend = kperf_ast_pend(context->cur_thread, T_KPERF_AST_CALLSTACK);
+       if (depth < 2) {
+               panic("HUH");
+       }
        kperf_ast_set_callstack_depth(context->cur_thread, depth);
-
-       return did_pend;
+       return kperf_ast_pend(context->cur_thread, T_KPERF_AST_CALLSTACK,
+           actionid);
 }
 
 static kern_return_t
index a144a8b952bcfa1cd0890ae19370f67bc0f88cd4..e4a0cd5e0273de98b0251bf32ddddf29264b9bbd 100644 (file)
@@ -70,7 +70,8 @@ void kperf_continuation_sample(struct kp_kcallstack *cs, struct kperf_context *)
 void kperf_backtrace_sample(struct kp_kcallstack *cs, struct kperf_context *context);
 
 void kperf_ucallstack_sample(struct kp_ucallstack *cs, struct kperf_context *);
-int kperf_ucallstack_pend(struct kperf_context *, uint32_t depth);
+int kperf_ucallstack_pend(struct kperf_context *, uint32_t depth,
+    unsigned int actionid);
 void kperf_ucallstack_log(struct kp_ucallstack *cs);
 
 #endif /* !defined(KPERF_CALLSTACK_H) */
index 17a94be8edb844bc5abeb78f4720514ff0ab6b33..3bffd178c32c3514b5549c010381d2e9a7c69da8 100644 (file)
@@ -250,19 +250,6 @@ kperf_on_cpu_update(void)
            kperf_lazy_wait_action != 0;
 }
 
-/* random misc-ish functions */
-uint32_t
-kperf_get_thread_flags(thread_t thread)
-{
-       return thread->kperf_flags;
-}
-
-void
-kperf_set_thread_flags(thread_t thread, uint32_t flags)
-{
-       thread->kperf_flags = flags;
-}
-
 unsigned int
 kperf_sampling_status(void)
 {
index 63434af8f37017e46a1c432abaa82bfd0f6a39f1..31f87f6a6239f28b9925d2ed12f151c1b9ea9c05 100644 (file)
@@ -41,9 +41,8 @@ extern lck_grp_t kperf_lck_grp;
 #define TRIGGER_TYPE_LAZY_WAIT (3)
 #define TRIGGER_TYPE_LAZY_CPU  (3)
 
-/* helpers to get and set AST flags on a thread */
-uint32_t kperf_get_thread_flags(thread_t thread);
-void kperf_set_thread_flags(thread_t thread, uint32_t flags);
+uint32_t kperf_get_thread_ast(thread_t thread);
+void kperf_set_thread_ast(thread_t thread, uint32_t flags);
 
 /*
  * Get and set dirtiness of thread, so kperf can track whether the thread
index 43df937a282071449f217d04c792e2fdf33bcd0b..3d3fcb0bfcec01d2c5ecf555b2faa3e7119a395a 100644 (file)
@@ -42,8 +42,7 @@ kperf_kpc_thread_ast(thread_t thread)
 {
        kpc_thread_ast_handler(thread);
        kperf_thread_ast_handler(thread);
-
-       thread->kperf_flags = 0;
+       thread->kperf_ast = 0;
 }
 
 void
index 0db17185b01b1d2155db83eece9c0329b7695700..09e73dc9ab1306e30adb0590f8e9a689171f2c5f 100644 (file)
@@ -363,7 +363,8 @@ pet_sample_thread(int pid, task_t task, thread_t thread, uint32_t idle_rate)
 {
        lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
 
-       uint32_t sample_flags = SAMPLE_FLAG_IDLE_THREADS | SAMPLE_FLAG_THREAD_ONLY;
+       uint32_t sample_flags = SAMPLE_FLAG_IDLE_THREADS |
+           SAMPLE_FLAG_THREAD_ONLY;
 
        BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_START);
 
@@ -388,6 +389,8 @@ pet_sample_thread(int pid, task_t task, thread_t thread, uint32_t idle_rate)
        thread->kperf_pet_cnt++;
 
        kperf_sample(pet_sample, &ctx, pet_action_id, sample_flags);
+       kperf_sample_user(&pet_sample->usample, &ctx, pet_action_id,
+           sample_flags);
 
        BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_END);
 }
index 9af5ba5b55d67255e0d8d365a680a7b3bab64b10..941ae53e8746f1e87d34e3deaa521dcd46947d9b 100644 (file)
 #include "kperf_kpc.h"
 #include "meminfo.h"
 
+/*
+ * For data that must be sampled in a fault-able context.
+ */
+struct kperf_usample {
+       struct kperf_thread_dispatch th_dispatch;
+       struct kp_ucallstack ucallstack;
+       struct kperf_thread_info th_info;
+};
+
 struct kperf_sample {
        struct kperf_thread_info       th_info;
        struct kperf_thread_scheduling th_scheduling;
        struct kperf_thread_snapshot   th_snapshot;
-       struct kperf_thread_dispatch   th_dispatch;
 
        struct kperf_task_snapshot tk_snapshot;
 
        struct kp_kcallstack kcallstack;
-       struct kp_ucallstack ucallstack;
        struct meminfo     meminfo;
 
+       struct kperf_usample usample;
+
 #if KPC
        struct kpcdata    kpcdata;
 #endif /* KPC */
index 901e500f750dd8b775e1c45e95970f08db7ff551..ca2d0c67d39eb707d8cc2cc17cd4b8ad19a199b0 100644 (file)
@@ -321,9 +321,11 @@ out:
 }
 
 int
-kperf_thread_dispatch_pend(struct kperf_context *context)
+kperf_thread_dispatch_pend(struct kperf_context *context,
+    unsigned int actionid)
 {
-       return kperf_ast_pend(context->cur_thread, T_KPERF_AST_DISPATCH);
+       return kperf_ast_pend(context->cur_thread, T_KPERF_AST_DISPATCH,
+                  actionid);
 }
 
 void
index 09a188554d59e2d2b62ca790c0793bab4f549f1c..fcd932608173b286a580a84146d7ba9b99e9fcff 100644 (file)
@@ -86,7 +86,7 @@ struct kperf_thread_dispatch {
 
 void kperf_thread_dispatch_sample(struct kperf_thread_dispatch *,
     struct kperf_context *);
-int kperf_thread_dispatch_pend(struct kperf_context *);
+int kperf_thread_dispatch_pend(struct kperf_context *, unsigned int actionid);
 void kperf_thread_dispatch_log(struct kperf_thread_dispatch *);
 
 void kperf_thread_inscyc_log(struct kperf_context *);
index b12c02b5bbfbcb05ab99655489e7ae3ad1450000..13fbdad05bb3cbe19e939b8c469ab80fabbd3d68 100644 (file)
@@ -35,6 +35,7 @@
 
 #include <mach/machine/_structs.h>
 #include <mach/message.h>
+#include <mach/vm_types.h>
 #include <mach/arm/thread_state.h>
 
 /*
@@ -277,6 +278,21 @@ const_thread_state64(const arm_unified_thread_state_t *its)
 
 #define ARM_SAVED_STATE (THREAD_STATE_NONE + 1)
 
+#if __ARM_VFP__
+#define VFPSAVE_ALIGN  16
+#define VFPSAVE_ATTRIB __attribute__((aligned (VFPSAVE_ALIGN)))
+#define THREAD_ALIGN   VFPSAVE_ALIGN
+
+/*
+ * vector floating point saved state
+ */
+struct arm_vfpsaved_state {
+       uint32_t r[64];
+       uint32_t fpscr;
+       uint32_t fpexc;
+};
+#endif
+
 struct arm_saved_state {
        uint32_t r[13];     /* General purpose register r0-r12 */
        uint32_t sp;        /* Stack pointer r13 */
@@ -286,6 +302,15 @@ struct arm_saved_state {
        uint32_t fsr;       /* Fault status */
        uint32_t far;       /* Virtual Fault Address */
        uint32_t exception; /* exception number */
+
+#if __ARM_VFP__
+       /* VFP state */
+       struct arm_vfpsaved_state VFPdata VFPSAVE_ATTRIB;
+       // for packing reasons chtread_self and DebugData
+       // are inside the the PcbData when __ARM_VFP__ is set
+       arm_debug_state_t        *VFPpadding_DebugData;
+       vm_address_t              VFPpadding_cthread_self;
+#endif
 };
 typedef struct arm_saved_state arm_saved_state_t;
 
index 0faf73ee82b4ef539bac4abc9179737189a46b4b..f6efdcbc169100fbc1e05e0a31b0677a1576a112 100644 (file)
 #define SHARED_REGION_NESTING_MIN_PPC64         0x0000000010000000ULL
 #define SHARED_REGION_NESTING_MAX_PPC64         0x0000000010000000ULL
 
-#define SHARED_REGION_BASE_ARM                  0x1A000000ULL
-#define SHARED_REGION_SIZE_ARM                  0x26000000ULL
-#define SHARED_REGION_NESTING_BASE_ARM          0x1A000000ULL
-#define SHARED_REGION_NESTING_SIZE_ARM          0x26000000ULL
+#define SHARED_REGION_BASE_ARM                  0x40000000ULL
+#define SHARED_REGION_SIZE_ARM                  0x40000000ULL
+#define SHARED_REGION_NESTING_BASE_ARM          0x40000000ULL
+#define SHARED_REGION_NESTING_SIZE_ARM          0x40000000ULL
 #define SHARED_REGION_NESTING_MIN_ARM           ?
 #define SHARED_REGION_NESTING_MAX_ARM           ?
 
index 605388fcc71f222ecdc2f2b0bed6c5c041e1d686..648c6e16b3e120abbe9f72f80293045227fbd2f8 100644 (file)
@@ -51,13 +51,8 @@ typedef int sync_policy_t;
 
 #define SYNC_POLICY_PREPOST             0x4
 #define SYNC_POLICY_DISABLE_IRQ         0x8
-
-/*
- * If the waitq is IRQ safe, 0x10 suggests it's a waitq embedded in turnstile.
- * If the waitq is not IRQ safe, 0x10 suggests it's a waitq of a port and should use it's turnstile safeq.
- */
 #define SYNC_POLICY_TURNSTILE           0x10
-#define SYNC_POLICY_PORT                0x10
+#define SYNC_POLICY_TURNSTILE_PROXY     0x20
 
 #endif  /* KERNEL_PRIVATE */
 
diff --git a/osfmk/man/index.html b/osfmk/man/index.html
deleted file mode 100644 (file)
index 2a9d0ff..0000000
+++ /dev/null
@@ -1,448 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-<html>
-<head>
-  <title>Mach Kernel Interface Reference Manual</title>
-</head>
-<body>
-<h3>Mach IPC Interface</h3>
-<blockquote>
-<p>
-Mach IPC presents itself in a few forms: message queues, lock-sets, 
-and semaphores (more may be added in the future). &nbsp;All share one common 
-charateristic: the capabilities presented by each are represented through 
-a handle known as a Mach port. &nbsp;Specific rights represented in these 
-Mach port capability handles allow the underlying IPC object to be used and 
-manipulated in consistent ways.</p>
-
-<h4>Mach Message Queue Interface</h4>
-<blockquote>
-<p>
-<a href="mach_msg.html">mach_msg</a> - Send and/or receive a message from the target port.<br>
-<a href="mach_msg.html">mach_msg_overwrite</a> - Send and/or receive messages with possible overwrite.<br>
-</p>
-Mach Message Queue Data Structures
-<p>
-<a href="mach_msg_descriptor.html">mach_msg_descriptor</a> - Specifies an element of a complex IPC message.<br>
-<a href="mach_msg_header.html">mach_msg_header</a> - Specifies the content of an IPC message header.<br>
-</p>
-</blockquote>
-
-<h4>Mach Lock-Set Interface</h4>
-<blockquote>
-<p>
-<a href="lock_acquire.html">lock_acquire</a> - Acquire ownership a lock<br>     
-<a href="lock_handoff.html">lock_handoff</a> - Hand-off ownership of a lock.<br>      
-<a href="lock_handoff_accept.html">lock_handoff_accept</a> - Accept lock ownership from a handoff.<br>      
-<a href="lock_make_stable.html">lock_make_stable</a> - Stabilize the state of the specified lock.<br>
-<a href="lock_release.html">lock_release</a> - Release ownership of a lock.<br>
-<a href="lock_set_create.html">lock_set_create</a> - Create a new lock set.<br>
-<a href="lock_set_destroy.html">lock_set_destroy</a> - Destroy a lock set and its associated locks.<br>
-<a href="lock_try.html">lock_try</a> - Attempt to acquire access rights to a lock.<br>
-</p>
-</blockquote>
-
-<h4>Mach Semaphore Interface</h4>
-<blockquote>
-<p>
-<a href="semaphore_create.html">semaphore_create</a> - Create a new semaphore.<br>
-<a href="semaphore_destroy.html">semaphore_destroy</a> - Destroy a semaphore.<br>
-<a href="semaphore_signal.html">semaphore_signal</a> - Increments the semaphore count.<br>
-<a href="semaphore_signal_all.html">semaphore_signal_all</a> - Wake up all threads blocked on a semaphore.<br>
-<a href="semaphore_wait.html">semaphore_wait</a> - Wait on the specified semaphore.<br>
-</p>
-</blockquote>
-
-<h4>Mach Port Management Interface</h4>
-<blockquote>
-<p>
-<a href="mach_port_allocate.html">mach_port_allocate</a> - Create caller-specified type of port right.<br>
-<a href="mach_port_allocate_full.html">mach_port_allocate_full</a> - Create a port right with full Mach port semantics.<br>
-<a href="mach_port_allocate_name.html">mach_port_allocate_name</a> - Create a port right with the caller-specified name.<br>
-<a href="mach_port_allocate_qos.html">mach_port_allocate_qos</a> - Allocate a port with specified "quality of service".<br>
-<a href="MP_allocate_subsystem.html">mach_port_allocate_subsystem</a> - Create a port right associated with the caller-specified subsystem.<br>
-<a href="mach_port_deallocate.html">mach_port_deallocate</a> - Decrement the target port right's user reference count.<br>
-<a href="mach_port_destroy.html">mach_port_destroy</a> - Deallocate all port rights associated with specified name.<br>
-<a href="mach_port_extract_right.html">mach_port_extract_right</a> - Remove the specified right from the target task and return it to the caller.<br>
-<a href="mach_port_get_attributes.html">mach_port_get_attributes</a> - Return information about target port as specified by the caller.<br>
-<a href="mach_port_get_refs.html">mach_port_get_refs</a> - Return the current count of user references on the target port right.<br>
-<a href="mach_port_get_set_status.html">mach_port_get_set_status</a> - Return the port right names contained in the target port set.<br>
-<a href="mach_port_insert_right.html">mach_port_insert_right</a> - Insert the specified port right into the target task.<br>
-<a href="mach_port_mod_refs.html">mach_port_mod_refs</a> - Modify the specified port right's count of user references.<br>
-<a href="mach_port_move_member.html">mach_port_move_member</a> - Move the specified receive right into or out of the specified port set.<br>
-<a href="mach_port_names.html">mach_port_names</a> - Return information about a task's port name space.<br>
-<a href="MP_request_notification.html">mach_port_request_notification</a> - Request notification of the specified port event type.<br>
-<a href="mach_port_set_attributes.html">mach_port_set_attributes</a> - Set the target port's attributes.<br>
-<a href="mach_port_set_mscount.html">mach_port_set_mscount</a> - Change the target port's make-send count.<br>
-<a href="mach_port_set_seqno.html">mach_port_set_seqno</a> - Change the current value of the target port's sequence number.<br>
-<a href="mach_port_type.html">mach_port_type</a> - Return the characteristics of the target port name.<br>
-<a href="mach_reply_port.html">mach_reply_port</a> - Allocate a new port and insert corresponding receive right in the calling task.<br>
-<a href="mach_subsystem_create.html"> mach_subsystem_create</a> - Used by a server to register information about an RPC subsystem with the kernel.<br>
-</p>
-Mach Port Data Structures
-<p>
-<a href="mach_port_limits.html">mach_port_limits</a> - Specifies a port's resource and message queue limits.<br>
-<a href="mach_port_qos.html">mach_port_qos</a> - Specifies a port's attributes with respect to "Quality Of Service."<br>
-<a href="mach_port_status.html">mach_port_status</a> - Used to present a port's current status with respect to various important attributes.<br>
-</p>
-Mach Port Notification Callbacks
-<p>
-<a href="do_mach_notify_dead_name.html">do_mach_notify_dead_name</a> - Handle the current instance of a dead-name notification.<br>
-<a href="do_mach_notify_no_senders.html">do_mach_notify_no_senders</a> - Handle the current instance of a no-more-senders notification.<br>
-<a href="DMN_port_deleted.html">do_mach_notify_port_deleted</a> - Handle the current instance of a port-deleted notification.<br>
-<a href="DMN_port_destroyed.html">do_mach_notify_port_destroyed</a> - Handle the current instance of a port-destroyed notification.<br>
-<a href="do_mach_notify_send_once.html">do_mach_notify_send_once</a> - Handle the current instance of a send-once notification.<br>
-</p>
-Mach Port Notification Callback Server Helpers
-<p>
-<a href="notify_server.html">notify_server</a> - Detect and handle a kernel-generated IPC notification.<br>
-</p>
-</blockquote>
-
-</blockquote>
-
-<h3>Mach Virtual Memory Interface</h3>
-<blockquote>
-<h4>Mach Virtual Memory Address Space Manipulation Interface</h4>
-<blockquote>
-<p>
-<a href="host_page_size.html">host_page_size</a> - Provide the system's virtual page size.<br>
-<a href="vm_allocate.html">vm_allocate</a> - Allocate a region of virtual memory.<br>
-<a href="vm_behavior_set.html">vm_behavior_set</a> - Specify expected access patterns for the target VM region.<br>
-<a href="vm_copy.html">vm_copy</a> - Copy a region of virtual memory.<br>
-<a href="vm_deallocate.html">vm_deallocate</a> - Deallocate a region of virtual memory.<br>
-<a href="vm_inherit.html">vm_inherit</a> - Set a VM region's inheritance attribute.<br>
-<a href="vm_machine_attribute.html">vm_machine_attribute</a> - Get/set the target memory region's special attributes.<br>
-<a href="vm_map.html">vm_map</a> - Map the specified memory object to a region of virtual memory.<br>
-<a href="vm_msync.html">vm_msync</a> - Synchronize the specified region of virtual memory.<br>
-<a href="vm_protect.html">vm_protect</a> - Set access privilege attribute for a region of virtual memory.<br>
-<a href="vm_read.html">vm_read</a> - Read the specified range of target task's address space.<br>
-<a href="vm_region.html">vm_region</a> - Return description of a virtual memory region.<br>
-<a href="vm_remap.html">vm_remap</a> - Map memory objects in one address space to that of another's.<br>
-<a href="vm_wire.html"> vm_wire</a> - Modify the target region's paging characteristics.<br>
-<a href="vm_write.html">vm_write</a> - Write data to the specified address in the target address space.<br>
-</p>
-Data Structures
-<p>
-<a href="vm_region_basic_info.html">vm_region_basic_info</a> - Defines the attributes of a task's memory region.<br>
-<a href="vm_statistics.html">vm_statistics</a> - Defines statistics for the kernel's use of virtual memory.<br>
-</p>
-</blockquote>
-
-<h4>External Memory Management Interface</h4>
-<blockquote>
-The External Memory Management Interface (EMMI) is undergoing significant change in the Darwin system.
-For this reason, the interface is not currently available to user-level programs.  Even for kernel
-extensions, use of these interfaces in not supported.  Instead, the BSD filesystem's Universal Buffer Cache (UBC)
-mechanism should be used.<br>
-<p>
-<a href="MO_change_attributes.html">memory_object_change_attributes</a> - Modify subset of memory object attributes.<br>
-<a href="memory_object_destroy.html">memory_object_destroy</a> - Shut down a memory object.<br>
-<a href="MO_get_attributes.html">memory_object_get_attributes</a> - Return current attributes for a memory object.<br>
-<a href="memory_object_lock_request.html">memory_object_lock_request</a> - Restrict access to memory object data.<br>
-<a href="MO_SY_completed.html">memory_object_synchronize_completed</a> - Synchronized data has been processed.<br>
-</p>
-Data Structures
-<p>
-<a href="memory_object_attr_info.html">memory_object_attr_info</a> - Defines memory object attributes.<br>
-<a href="memory_object_perf_info.html">memory_object_perf_info</a>- Specifies performance-related memory object attributes.<br>
-</p>
-External Memory Manager Interface Callbacks
-<p>
-<a href="memory_object_create.html">memory_object_create</a> - Assign a new memory object to the default memory manager.<br>
-<a href="MO_data_initialize.html">memory_object_data_initialize</a> - Provide initial data for a new memory object.<br>
-<a href="memory_object_data_request.html">memory_object_data_request</a> - Request that memory manager page-in specified data.<br>
-<a href="memory_object_data_return.html">memory_object_data_return</a> - Return memory object data to the appropriate memory manager.<br>
-<a href="memory_object_data_unlock.html">memory_object_data_unlock</a> - Request a memory manager release the lock on specific data.<br>
-<a href="memory_object_init.html">memory_object_init</a> - Inform a memory manager on first use of a memory object.<br>
-<a href="memory_object_synchronize.html">memory_object_synchronize</a> - Request synchronization of data with backing store.<br>
-<a href="memory_object_terminate.html">memory_object_terminate</a> - Relinquish access to a memory object.<br>
-</p>
-EMMI Callback Server Helpers
-<p>
-<a href="MO_default_server.html">memory_object_default_server</a> - Handle kernel operation request targeted for the default pager.<br>
-<a href="memory_object_server.html">memory_object_server</a> - Handle kernel operation request aimed at a given memory manager.<br>
-</p>
-</blockquote>
-
-<h4>Default Memory Management Interface</h4>
-<blockquote>
-<p>
-<a href="default_pager_add_segment.html">default_pager_add_segment</a> - Add additional backing storage for a default pager.<br>
-<a href="DP_backing_store_create.html">default_pager_backing_store_create</a> - Create a backing storage object.<br>
-<a href="DP_backing_store_delete.html"> default_pager_backing_store_delete</a> - Delete a backing storage object.<br>
-<a href="DP_backing_store_info.html">default_pager_backing_store_info</a> - Return information about a backing storage object.<br>
-<a href="default_pager_info.html">default_pager_info</a> - Furnish caller with information about the default pager.<br>
-<a href="DP_object_create.html">default_pager_object_create</a> - Initialize a non-persistent memory object.<br>
-<a href="HD_memory_manager.html">host_default_memory_manager</a> - Register/Lookup the host's default pager.<br>
-</p>
-</blockquote>
-
-</blockquote>
-
-<h3>Process Management Interface</h3>
-<blockquote>
-
-<h4>Task Interface</h4>
-<blockquote>
-<p>
-<a href="mach_ports_lookup.html">mach_ports_lookup</a> - Provide caller with an array of the target task's well-known ports.<br>
-<a href="mach_ports_register.html">mach_ports_register</a> - Register an array of well-known ports on behalf of the target task.<br>
-<a href="mach_task_self.html">mach_task_self</a> - Return a send right to the caller's task_self port.<br>
-<a href="task_create.html">task_create</a> - Create a new task.<br>
-<a href="task_get_emulation_vector.html">task_get_emulation_vector</a> - Return an array identifying the target task's user-level system call handlers.<br>
-<a href="task_get_exception_ports.html">task_get_exception_ports</a> - Return send rights to the target task's exception ports.<br>
-<a href="task_get_special_port.html">task_get_special_port</a> - Return a send write to the indicated special port.<br>
-<a href="task_info.html">task_info</a> - Return per-task information according to specified flavor.<br>
-<a href="task_resume.html">task_resume</a> - Decrement the target task's suspend count.<br>
-<a href="task_sample.html">task_sample</a> - Sample the target task's thread program counters periodically.<br>
-<a href="task_set_emulation.html">task_set_emulation</a> - Establish a user-level handler for a system call.<br>
-<a href="task_set_emulation_vector.html">task_set_emulation_vector</a> - Establish the target task's user-level system call handlers.<br>
-<a href="task_set_exception_ports.html">task_set_exception_ports</a> - Set target task's exception ports.<br>
-<a href="task_set_info.html">task_set_info</a> - Set task-specific information state.<br>
-<a href="task_set_port_space.html">task_set_port_space</a> - Set the size of the target task's port name space table.<br>
-<a href="task_set_special_port.html">task_set_special_port</a> - Set the indicated special port.<br>
-<a href="task_suspend.html">task_suspend</a> - Suspend the target task.<br>
-<a href="task_swap_exception_ports.html">task_swap_exception_ports</a> - Set target task's exception ports, returning the previous exception ports.<br>
-<a href="task_terminate.html">task_terminate</a> - Terminate the target task and deallocate its resources.<br>
-<a href="task_threads.html">task_threads</a> - Return the target task's list of threads.<br>
-</p>
-Task Data Structures
-<p>
-<a href="task_basic_info.html">task_basic_info</a> - Defines basic information for a task.<br>
-<a href="task_thread_times_info.html">task_thread_times_info</a> - Defines thread execution times information for tasks.<br>
-</p>
-</blockquote>
-
-<h4>Thread Interface</h4>
-<blockquote>
-<p>
-<a href="mach_thread_self.html">mach_thread_self</a> - Returns the thread self port.<br>
-<a href="thread_abort.html">thread_abort</a> - Abort a thread.<br>
-<a href="thread_abort_safely.html">thread_abort_safely</a> - Abort a thread, restartably.<br>
-<a href="thread_create.html">thread_create</a> - Create a thread within a task.<br>
-<a href="thread_create_running.html">thread_create_running</a> - Optimized creation of a running thread.<br>
-<a href="thread_depress_abort.html">thread_depress_abort</a> - Cancel thread scheduling depression.<br>
-<a href="thread_get_exception_ports.html">thread_get_exception_ports</a> - Return a send right to an exception port.<br>
-<a href="thread_get_special_port.html">thread_get_special_port</a> - Return a send right to the caller-specified special port.<br>
-<a href="thread_get_state.html">thread_get_state</a> - Return the execution state for a thread.<br>
-<a href="thread_info.html">thread_info</a> - Return information about a thread.<br>
-<a href="thread_resume.html">thread_resume</a> - Resume a thread.<br>
-<a href="thread_sample.html">thread_sample</a> - Perform periodic PC sampling for a thread.<br>
-<a href="thread_set_exception_ports.html">thread_set_exception_ports</a> - Set exception ports for a thread.<br>
-<a href="thread_set_special_port.html">thread_set_special_port</a> - Set caller-specified special port belonging to the target thread.<br>
-<a href="thread_set_state.html">thread_set_state</a> - Set the target thread's user-mode execution state.<br>
-<a href="thread_suspend.html">thread_suspend</a> - Suspend a thread.<br>
-<a href="TS_exception_ports.html">thread_swap_exception_ports</a> - Swap exception ports for a thread.<br>
-<a href="thread_terminate.html">thread_terminate</a> - Destroy a thread.<br>
-<a href="thread_wire.html">thread_wire</a> - Mark the thread as privileged with respect to kernel resources.<br>
-</p>
-Thread Data Structures
-<p>
-<a href="thread_basic_info.html">thread_basic_info</a> - Defines basic information for a thread.<br>
-</p>
-Thread Exception Callbacks
-<p>
-<a href="catch_exception_raise.html">catch_exception_raise</a> - Handles the occurrence of an exception within a thread.<br>
-</p>
-Thread Exception Callback Server Helpers
-<p>
-<a href="exc_server.html">exc_server</a> - Handle kernel-reported thread exception.<br>
-</p>
-</blockquote>
-
-<h4>Scheduling Interface</h4>
-<blockquote>
-<p>
-<a href="task_policy.html">task_policy</a> - Set target task's default scheduling policy state.<br>
-<a href="task_set_policy.html">task_set_policy</a> - Set target task's default scheduling policy state.<br>
-<a href="thread_policy.html">thread_policy</a> - Set target thread's scheduling policy state.<br>
-<a href="thread_set_policy.html">thread_set_policy</a> - Set target thread's scheduling policy state.<br>
-<a href="thread_switch.html">thread_switch</a> - Cause context switch with options.<br>
-</p>
-Scheduling Data Structures
-<p>
-<a href="policy_fifo_info.html">policy_fifo_info</a> - Specifies information associated with the system's First-In-First-Out scheduling policy.<br>
-<a href="policy_rr_info.html">policy_rr_info</a> - Specifies information associated with the system's Round Robin scheduling policy.<br>
-<a href="policy_timeshare_info.html">policy_timeshare_info</a> - Specifies information associated with the system's Timeshare scheduling policy.<br>
-</p>
-</blockquote>
-</blockquote>
-
-<h3>System Management Interface</h3>
-<blockquote>
-
-<h4>Host Interface</h4>
-<blockquote>
-<p>
-<a href="host_get_clock_service.html">host_get_clock_service</a> - Return a send right to a kernel clock's service port.<br>
-<a href="host_get_time.html">host_get_time</a> - Returns the current time as seen by that host.<br>
-<a href="host_info.html">host_info</a> - Return information about a host.<br>
-<a href="host_kernel_version.html">host_kernel_version</a> - Return kernel version information for a host.<br>
-<a href="host_statistics.html">host_statistics</a> - Return statistics for a host.<br>
-<a href="mach_host_self.html">mach_host_self</a> - Returns send rights to the task's host self port.<br>
-</p>
-Data Structures
-<p>
-<a href="host_basic_info.html">host_basic_info</a> - Used to present basic information about a host.<br>
-<a href="host_load_info.html">host_load_info</a> - Used to present a host's processor load information.<br>
-<a href="host_sched_info.html">host_sched_info</a> -  - Used to present the set of scheduler limits associated with the host.<br>
-<a href="kernel_resource_sizes.html">kernel_resource_sizes</a> - Used to present the sizes of kernel's major structures.<br>
-</p>
-</blockquote>
-
-<h4>Host Control Interface</h4>
-<blockquote>
-<p>
-<a href="host_adjust_time.html">host_adjust_time</a> - Arranges for the time on a specified host to be gradually changed by an adjustment value.<br>
-<a href="HD_memory_manager.html">host_default_memory_manager</a> - Set the default memory manager.<br>
-<a href="host_get_boot_info.html">host_get_boot_info</a> - Return operator boot information.<br>
-<a href="host_get_clock_control.html">host_get_clock_control</a> - Return a send right to a kernel clock's control port.<br>
-<a href="host_processor_slots.html">host_processor_slots</a> - Return a list of numbers that map processor slots to active processors.<br>
-<a href="host_processors.html">host_processors</a> - Return a list of send rights representing all processor ports.<br>
-<a href="host_reboot.html">host_reboot</a> - Reboot this host.<br>
-<a href="host_set_time.html">host_set_time</a> - Establishes the time on the specified host.<br>
-</p>
-</blockquote>
-
-<h4>Host Security Interface</h4>
-<blockquote>
-<p>
-<a href="host_security_create_task_token.html">host_security_create_task_token</a> - Create a new task with an explicit security token.<br>
-<a href="host_security_set_task_token.html">host_security_set_task_token</a> - Change the target task's security token.<br>
-</p>
-</blockquote>
-
-<h4>Resource Accounting Interface</h4>
-<blockquote>
-<i>
-The Mach resource accounting mechanism is not functional in the current Mac OS X/Darwin system.  It will become functional in a future release.
-</i>
-<p>
-<a href="ledger_create.html">ledger_create</a> - Create a subordinate ledger.<br>
-<a href="ledger_read.html">ledger_read</a> - Return the ledger limit and balance.<br>
-<a href="ledger_terminate.html">ledger_terminate</a> - Destroy a ledger.<br>
-<a href="ledger_transfer.html">ledger_transfer</a> - Transfer resources from a parent ledger to a child.<br>
-</p>
-</blockquote>
-
-<h4>Processor Management Interface</h4>
-<blockquote>
-<p>
-<a href="processor_control.html">processor_control</a> - Perform caller-specified operation on target processor.<br>
-<a href="processor_exit.html">processor_exit</a> - Exit a processor.<br>
-<a href="processor_info.html">processor_info</a> - Return information about a processor.<br>
-<a href="processor_start.html">processor_start</a> - Start a processor.<br>
-</p>
-Processor Data Structures
-<p>
-<a href="processor_basic_info.html">processor_basic_info</a> - Defines the basic information about a processor.<br>
-</p>
-</blockquote>
-
-<h4>Processor Set Interface</h4>
-<blockquote>
-<i>
-The processor set interface allows for the grouping of tasks and
-processors for the purpose of exclusive scheduling.  These interface
-are <b>deprecated</b> and should not be used in code that isn't tied
-to a particular release of Mac OS X/Darwin.  These will likely change
-or disappear in a future release.
-</i>
-<p>
-<a href="host_processor_sets.html">host_processor_sets</a> - Return a list of send rights representing all processor set name ports.<br>
-<a href="host_processor_set_priv.html">host_processor_set_priv</a> - Translate a processor set name port into a processor set control port.<br>
-<a href="processor_assign.html">processor_assign</a> - Assign a processor to a processor set.<br>
-<a href="processor_get_assignment.html">processor_get_assignment</a> - Get current assignment for a processor.<br>
-<a href="processor_set_create.html">processor_set_create</a> - Create a new processor set.<br>
-<a href="processor_set_default.html">processor_set_default</a> - Return the default processor set.<br>
-<a href="processor_set_destroy.html">processor_set_destroy</a> - Destroy the target processor set.<br>
-<a href="processor_set_info.html">processor_set_info</a> - Return processor set state according to caller-specified flavor.<br>
-<a href="processor_set_max_priority.html">processor_set_max_priority</a> - Sets the maximum scheduling priority for a processor set.<br>
-<a href="P_set_policy_control.html">processor_set_policy_control</a> - Set target processor set's scheduling policy state.<br>
-<a href="P_set_policy_disable.html">processor_set_policy_disable</a> - Enables a scheduling policy for a processor set.<br>
-<a href="P_set_policy_enable.html">processor_set_policy_enable</a> - Enables a scheduling policy for a processor set.<br>
-<a href="processor_set_statistics.html">processor_set_statistics</a> - Return scheduling statistics for a processor set.<br>
-<a href="processor_set_tasks.html">processor_set_tasks</a> - Return all tasks currently assigned to the target processor set.<br>
-<a href="processor_set_threads.html">processor_set_threads</a> - Return all threads currently assigned to the target processor set.<br>
-<a href="task_assign.html">task_assign</a> - Assign a task to a processor set.<br>
-<a href="task_assign_default.html">task_assign_default</a> -  Assign a task to the default processor set.<br>
-<a href="task_get_assignment.html">task_get_assignment</a> - Create a new task with an explicit security token.<br>
-<a href="thread_assign.html">thread_assign</a> - Assign a thread to a processor set.<br>
-<a href="thread_assign_default.html">thread_assign_default</a> - Assign a thread to the default processor set.<br>
-<a href="thread_get_assignment.html">thread_get_assignment</a> - Return the processor set to which a thread is assigned.<br>
-</p>
-Processor Set Data Structures
-<p>
-<a href="processor_set_basic_info.html">processor_set_basic_info</a> - Defines the basic information about a processor set.<br>
-<a href="processor_set_load_info.html">processor_set_load_info</a> - Defines the scheduling statistics for a processor set.<br>
-</p>
-</blockquote>
-
-<h4>Clock Interface</h4>
-<blockquote>
-<p>
-<a href="clock_alarm.html">clock_alarm</a> - Set up an alarm.<br>
-<a href="clock_get_attributes.html">clock_get_attributes</a> - Return attributes of a clock.<br>
-<a href="clock_get_time.html">clock_get_time</a> - Return the current time.<br>
-<a href="clock_map_time.html">clock_map_time</a> - Return a memory object that maps a clock.<br>
-<a href="clock_set_attributes.html">clock_set_attributes</a> - Set a particular clock's attributes.<br>
-<a href="clock_set_time.html">clock_set_time</a> - Set the current time.<br>
-<a href="clock_sleep.html">clock_sleep</a> - Delay the invoking thread until a specified time.<br>
-</p>
-Clock Data Structures
-<p>
-<a href="mapped_tvalspec.html">mapped_tvalspec</a> - Specifies the format the kernel uses to maintain a mapped clock's time.<br>
-<a href="tvalspec.html">tvalspec</a> - Defines format of system time values.<br>
-</p>
-Clock Interface Callbacks
-<p>
-<a href="clock_alarm_reply.html">clock_alarm_reply</a> - Ring a preset alarm.<br>
-</p>
-Clock Callback Server Helpers
-<p>
-<a href="clock_reply_server.html"> clock_reply_server</a> - Handle kernel-generated alarm.<br>
-</p>
-</blockquote>
-
-<h4>Multi-Computer Support Interface</h4>
-<blockquote>
-<i>
-These multi-computer support interfaces are no longer supported by
-the Mac OS X/Darwin kernel.  If and when multi-computer support is
-added back in, something like these will likely be added.
-</i>
-<p>
-<a href="host_page_size.html">host_page_size</a> - Returns the page size for the given host.<br>
-<a href="ledger_get_remote.html">ledger_get_remote</a> - Return send right to specified host's remote ledger port.<br>
-<a href="ledger_set_remote.html">ledger_set_remote</a> - Set this host's remote ledger port.<br>
-</p>
-</blockquote>
-
-</blockquote>
-
-<h3>Machine Specific Interface</h3>
-<blockquote>
-
-<h4>Intel 386 Support</h4>
-<blockquote>
-<p>
-<a href="i386_get_ldt.html">i386_get_ldt</a> - Returns per-thread segment descriptors from the local descriptor table (LDT).<br>
-<a href="i386_io_port_add.html">i386_io_port_add</a> - Adds a device to the I/O permission bitmap for a thread. <br>
-<a href="i386_io_port_list.html">i386_io_port_list</a> - Returns a list of the devices named in the thread's I/O permission bitmap.<br>
-<a href="i386_io_port_remove.html">i386_io_port_remove</a> - Removes the specified device from the thread's I/O permission bitmap.<br>
-<a href="i386_set_ldt.html">i386_set_ldt</a> - Allows a thread to have a private local descriptor table (LDT).<br>
-</p>
-</blockquote>
-
-<h4>PowerPC Support</h4>
-<blockquote>
-<p>
-</p>
-</blockquote>
-
-</blockquote>
-
-</BODY>
-
-</HTML>
-
index 6c5a42214b4e350824cd78201d2ee2f384b12980..c79a03e57b471e7190b20946d361eeba8749229c 100644 (file)
@@ -598,12 +598,7 @@ vm_compressor_init(void)
        PE_parse_boot_argn("vm_compression_limit", &vm_compression_limit, sizeof(vm_compression_limit));
 
 #ifdef CONFIG_EMBEDDED
-#if XNU_TARGET_OS_WATCH
-       // rdar://problem/51012698
-       vm_compressor_minorcompact_threshold_divisor = 40;
-#else
        vm_compressor_minorcompact_threshold_divisor = 20;
-#endif
        vm_compressor_majorcompact_threshold_divisor = 30;
        vm_compressor_unthrottle_threshold_divisor = 40;
        vm_compressor_catchup_threshold_divisor = 60;
index 277c964870313d0ec8237db8d6335d8742c8f23a..e3956937b4c80a5960048cead8e03ea0041cd2f6 100644 (file)
@@ -4088,7 +4088,9 @@ FastPmapEnter:
                                        }
 
                                        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, event_code, trace_real_vaddr, (fault_info.user_tag << 16) | (caller_prot << 8) | type_of_fault, m->vmp_offset, get_current_unique_pid(), 0);
-
+                                       if (need_retry == FALSE) {
+                                               KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_REAL_FAULT_FAST), get_current_unique_pid(), 0, 0, 0, 0);
+                                       }
                                        DTRACE_VM6(real_fault, vm_map_offset_t, real_vaddr, vm_map_offset_t, m->vmp_offset, int, event_code, int, caller_prot, int, type_of_fault, int, fault_info.user_tag);
                                }
                                if (kr == KERN_SUCCESS &&
@@ -5087,6 +5089,7 @@ handle_copy_delay:
                        }
 
                        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, event_code, trace_real_vaddr, (fault_info.user_tag << 16) | (caller_prot << 8) | type_of_fault, m->vmp_offset, get_current_unique_pid(), 0);
+                       KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_REAL_FAULT_SLOW), get_current_unique_pid(), 0, 0, 0, 0);
 
                        DTRACE_VM6(real_fault, vm_map_offset_t, real_vaddr, vm_map_offset_t, m->vmp_offset, int, event_code, int, caller_prot, int, type_of_fault, int, fault_info.user_tag);
                }
index 031cb82981cd4a1633ca656efc20d5d9f0896625..d130132c30bd3b34ab6251047e6b1e0ce9a688b9 100644 (file)
@@ -13246,6 +13246,7 @@ protection_failure:
        *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
        *object = VME_OBJECT(entry);
        *out_prot = prot;
+       KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_MAP_LOOKUP_OBJECT), VM_KERNEL_UNSLIDE_OR_PERM(*object), 0, 0, 0, 0);
 
        if (fault_info) {
                fault_info->interruptible = THREAD_UNINT; /* for now... */
@@ -17672,6 +17673,7 @@ vm_map_msync(
 
                        local_map = VME_SUBMAP(entry);
                        local_offset = VME_OFFSET(entry);
+                       vm_map_reference(local_map);
                        vm_map_unlock(map);
                        if (vm_map_msync(
                                    local_map,
@@ -17680,6 +17682,7 @@ vm_map_msync(
                                    sync_flags) == KERN_INVALID_ADDRESS) {
                                had_hole = TRUE;
                        }
+                       vm_map_deallocate(local_map);
                        continue;
                }
                object = VME_OBJECT(entry);
index eedfb09e4fae6cb5bcbaecacf20b1a7f7ad4ba93..c4fde23b120a512c4ef8548be6f9400ef293bf15 100644 (file)
@@ -184,6 +184,7 @@ struct vm_object {
                                                 * copy_call.
                                                 */
        struct vm_object        *shadow;        /* My shadow */
+       memory_object_t         pager;          /* Where to get data */
 
        union {
                vm_object_offset_t vou_shadow_offset;   /* Offset into shadow */
@@ -196,7 +197,6 @@ struct vm_object {
                                                 */
        } vo_un2;
 
-       memory_object_t         pager;          /* Where to get data */
        vm_object_offset_t      paging_offset;  /* Offset into memory object */
        memory_object_control_t pager_control;  /* Where data comes back */
 
@@ -328,12 +328,12 @@ struct vm_object {
         * they are updated via atomic compare and swap
         */
        vm_object_offset_t      last_alloc;     /* last allocation offset */
+       vm_offset_t             cow_hint;       /* last page present in     */
+                                               /* shadow but not in object */
        int                     sequential;     /* sequential access size */
 
        uint32_t                pages_created;
        uint32_t                pages_used;
-       vm_offset_t             cow_hint;       /* last page present in     */
-                                               /* shadow but not in object */
        /* hold object lock when altering */
        unsigned        int
            wimg_bits:8,                /* cache WIMG bits         */
@@ -373,8 +373,8 @@ struct vm_object {
 #endif /* VM_OBJECT_ACCESS_TRACKING */
 
        uint8_t                 scan_collisions;
+       uint8_t                 __object4_unused_bits[1];
        vm_tag_t                wire_tag;
-       uint8_t                 __object4_unused_bits[2];
 
 #if CONFIG_PHANTOM_CACHE
        uint32_t                phantom_object_id;
index 378c4765ca12fbf7d74e53b270b22f4826cd2ca8..d9e16eab31e529d02ab1e5b818457230f883f16d 100644 (file)
@@ -113,6 +113,9 @@ extern boolean_t vm_pressure_events_enabled;
 #define VM_REAL_FAULT_ADDR_PURGABLE             0x03
 #define VM_REAL_FAULT_ADDR_EXTERNAL             0x04
 #define VM_REAL_FAULT_ADDR_SHAREDCACHE          0x05
+#define VM_REAL_FAULT_FAST                      0x06
+#define VM_REAL_FAULT_SLOW                      0x07
+#define VM_MAP_LOOKUP_OBJECT                    0x08
 
 
 
index ce27db8dd3fce41a7cb3a883fc11a1f4ae4a95b9..08fd380f261e776e2154931824e395e99cc69b65 100644 (file)
@@ -43,6 +43,8 @@
 #include <kperf/context.h>
 #include <kperf/action.h>
 
+#include <kern/monotonic.h>
+
 /* Fixed counter mask -- three counters, each with OS and USER */
 #define IA32_FIXED_CTR_ENABLE_ALL_CTRS_ALL_RINGS (0x333)
 #define IA32_FIXED_CTR_ENABLE_ALL_PMI (0x888)
@@ -67,16 +69,6 @@ IA32_FIXED_CTR_CTRL(void)
        return rdmsr64( MSR_IA32_PERF_FIXED_CTR_CTRL );
 }
 
-static uint64_t
-IA32_FIXED_CTRx(uint32_t ctr)
-{
-#ifdef USE_RDPMC
-       return rdpmc64(RDPMC_FIXED_COUNTER_SELECTOR | ctr);
-#else /* !USE_RDPMC */
-       return rdmsr64(MSR_IA32_PERF_FIXED_CTR0 + ctr);
-#endif /* !USE_RDPMC */
-}
-
 #ifdef FIXED_COUNTER_RELOAD
 static void
 wrIA32_FIXED_CTRx(uint32_t ctr, uint64_t value)
@@ -326,37 +318,13 @@ kpc_set_fixed_config(kpc_config_t *configv)
 int
 kpc_get_fixed_counters(uint64_t *counterv)
 {
-       int i, n = kpc_fixed_count();
-
-#ifdef FIXED_COUNTER_SHADOW
-       uint64_t status;
-
-       /* snap the counters */
-       for (i = 0; i < n; i++) {
-               counterv[i] = FIXED_SHADOW(ctr) +
-                   (IA32_FIXED_CTRx(i) - FIXED_RELOAD(ctr));
-       }
-
-       /* Grab the overflow bits */
-       status = rdmsr64(MSR_IA32_PERF_GLOBAL_STATUS);
-
-       /* If the overflow bit is set for a counter, our previous read may or may not have been
-        * before the counter overflowed. Re-read any counter with it's overflow bit set so
-        * we know for sure that it has overflowed. The reason this matters is that the math
-        * is different for a counter that has overflowed. */
-       for (i = 0; i < n; i++) {
-               if ((1ull << (i + 32)) & status) {
-                       counterv[i] = FIXED_SHADOW(ctr) +
-                           (kpc_fixed_max() - FIXED_RELOAD(ctr) + 1 /* Wrap */) + IA32_FIXED_CTRx(i);
-               }
-       }
-#else
-       for (i = 0; i < n; i++) {
-               counterv[i] = IA32_FIXED_CTRx(i);
-       }
-#endif
-
+#if MONOTONIC
+       mt_fixed_counts(counterv);
        return 0;
+#else /* MONOTONIC */
+#pragma unused(counterv)
+       return ENOTSUP;
+#endif /* !MONOTONIC */
 }
 
 int
index 78b2cfa4f8ba2b4cb822dc35b86d7f522944a528..4c5e65c907222f3f7dd190bfcfdbeb5c169082c1 100644 (file)
@@ -273,16 +273,31 @@ net_tuntests: CODE_SIGN_ENTITLEMENTS = network_entitlements.plist
 
 ifneq (osx,$(TARGET_NAME))
 EXCLUDED_SOURCES += no32exec_35914211.c no32exec_35914211_helper.c
-endif
+else  # target = osx
+CUSTOM_TARGETS += no32exec_35914211_helper no32exec_35914211_helper_binprefs
+
+no32exec_35914211_helper: INVALID_ARCHS = x86_64 i386
+no32exec_35914211_helper:
+       $(CC) $(LDFLAGS) $(CFLAGS) -arch i386  no32exec_35914211_helper.c -o $(SYMROOT)/$@;
+       env CODESIGN_ALLOCATE=$(CODESIGN_ALLOCATE) $(CODESIGN) --force --sign - --timestamp=none $(SYMROOT)/$@;
+
+install-no32exec_35914211_helper:
+       mkdir -p $(INSTALLDIR)
+       cp $(SYMROOT)/no32exec_35914211_helper $(INSTALLDIR)/
 
-no32exec_35914211_helper: INVALID_ARCHS = x86_64
+no32exec_35914211_helper_binprefs: INVALID_ARCHS = x86_64 i386
 no32exec_35914211_helper_binprefs:
-       $(CC) $(OTHER_CFLAGS) $(CFLAGS) $(OTHER_LDFLAGS) $(LDFLAGS) -ldarwintest -arch i386 -arch x86_64 \
-               no32exec_35914211_helper_binprefs.c -o $(SYMROOT)/no32exec_35914211_helper_binprefs
+       $(CC) $(OTHER_CFLAGS) $(CFLAGS) $(OTHER_LDFLAGS) $(LDFLAGS) -arch i386 -arch x86_64  no32exec_35914211_helper.c -o $(SYMROOT)/$@;
+       env CODESIGN_ALLOCATE=$(CODESIGN_ALLOCATE) $(CODESIGN) --force --sign - --timestamp=none $(SYMROOT)/$@;
+
+install-no32exec_35914211_helper_binprefs:
+       mkdir -p $(INSTALLDIR)
+       cp $(SYMROOT)/no32exec_35914211_helper_binprefs $(INSTALLDIR)/
 
 no32exec_35914211: INVALID_ARCHS = i386
 no32exec_35914211: no32exec_35914211_helper
 no32exec_35914211: no32exec_35914211_helper_binprefs
+endif  # (osx,$(TARGET_NAME)))
 
 MIG:=SDKROOT=$(SDKROOT) $(shell xcrun -sdk "$(TARGETSDK)" -find mig)
 
index b1f87634f9599952a53378cab38a69ade15c165b..3ce06731d1325501e67fb6032052f19af5287eb8 100644 (file)
@@ -7,23 +7,14 @@
 #include <stdlib.h>
 #include <signal.h>
 
-static int binprefs_child_is_64 = 0;
-
-static void
-signal_handler(__unused int sig)
-{
-       binprefs_child_is_64 = 1;
-       return;
-}
-
-T_DECL(no32exec_bootarg_with_spawn, "make sure the no32exec boot-arg is honored, using posix_spawn", T_META_BOOTARGS_SET("-no32exec"))
+T_DECL(no32exec_bootarg_with_spawn, "make sure we can't posix_spawn 32-bit")
 {
        int spawn_ret, pid;
        char path[1024];
        uint32_t size = sizeof(path);
 
        T_QUIET; T_ASSERT_EQ(_NSGetExecutablePath(path, &size), 0, NULL);
-       T_QUIET; T_ASSERT_LT(strlcat(path, "_helper", size), size, NULL);
+       T_QUIET; T_ASSERT_LT(strlcat(path, "_helper", size), (unsigned long)size, NULL);
 
        spawn_ret = posix_spawn(&pid, path, NULL, NULL, NULL, NULL);
        if (spawn_ret == 0) {
@@ -34,8 +25,30 @@ T_DECL(no32exec_bootarg_with_spawn, "make sure the no32exec boot-arg is honored,
        T_ASSERT_EQ(spawn_ret, EBADARCH, NULL);
 }
 
-T_DECL(no32exec_bootarg_with_spawn_binprefs, "make sure the no32exec boot-arg is honored, using posix_spawn"
-    "with binprefs on a fat i386/x86_64 Mach-O", T_META_BOOTARGS_SET("-no32exec"))
+T_DECL(no32_exec_bootarg_with_exec, "make sure we can't fork and exec 32-bit")
+{
+       int pid;
+       char path[1024];
+       uint32_t size = sizeof(path);
+
+       T_QUIET; T_ASSERT_EQ(_NSGetExecutablePath(path, &size), 0, NULL);
+       T_QUIET; T_ASSERT_LT(strlcat(path, "_helper", size), (unsigned long)size, NULL);
+
+       pid = fork();
+       T_QUIET; T_ASSERT_POSIX_SUCCESS(pid, "fork");
+
+       if (pid == 0) { /* child */
+               execve(path, NULL, NULL); /* this should fail, resulting in the call to exit below */
+               exit(errno);
+       } else { /* parent */
+               int wait_ret = 0;
+               waitpid(pid, &wait_ret, 0);
+               T_QUIET; T_ASSERT_TRUE(WIFEXITED(wait_ret), "child should have called exit()");
+               T_ASSERT_EQ(WEXITSTATUS(wait_ret), EBADARCH, "execve should set errno = EBADARCH");
+       }
+}
+
+T_DECL(no32exec_bootarg_with_spawn_binprefs, "make sure we honor no32exec, using posix_spawn with binprefs on a fat i386/x86_64 Mach-O")
 {
        int pid, ret;
        posix_spawnattr_t spawnattr;
@@ -44,9 +57,7 @@ T_DECL(no32exec_bootarg_with_spawn_binprefs, "make sure the no32exec boot-arg is
        char path[1024];
        uint32_t size = sizeof(path);
        T_QUIET; T_ASSERT_EQ(_NSGetExecutablePath(path, &size), 0, NULL);
-       T_QUIET; T_ASSERT_LT(strlcat(path, "_helper_binprefs", size), size, NULL);
-
-       T_QUIET; T_ASSERT_NE(signal(SIGUSR1, signal_handler), SIG_ERR, "signal");
+       T_QUIET; T_ASSERT_LT(strlcat(path, "_helper_binprefs", size), (unsigned long)size, NULL);
 
        ret = posix_spawnattr_init(&spawnattr);
        T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "posix_spawnattr_init");
@@ -57,37 +68,37 @@ T_DECL(no32exec_bootarg_with_spawn_binprefs, "make sure the no32exec boot-arg is
        ret = posix_spawn(&pid, path, NULL, &spawnattr, NULL, NULL);
        T_ASSERT_EQ(ret, 0, "posix_spawn should succeed despite 32-bit binpref appearing first");
 
-       sleep(1);
-       ret = kill(pid, SIGUSR1); // ping helper; helper should ping back if running 64-bit
-       T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kill");
-
-       ret = wait(NULL);
+       int wait_ret = 0;
+       ret = waitpid(pid, &wait_ret, 0);
        T_QUIET; T_ASSERT_EQ(ret, pid, "child pid");
 
-       T_ASSERT_EQ(binprefs_child_is_64, 1, "child process should be running in 64-bit mode");
+       T_QUIET; T_ASSERT_EQ(WIFEXITED(wait_ret), 1, "child process should have called exit()");
+       T_ASSERT_EQ(WEXITSTATUS(wait_ret), 8, "child process should be running in 64-bit mode");
 
        ret = posix_spawnattr_destroy(&spawnattr);
        T_QUIET; T_ASSERT_EQ(ret, 0, "posix_spawnattr_destroy");
 }
 
-T_DECL(no32_exec_bootarg_with_exec, "make sure the no32exec boot-arg is honored, using fork and exec", T_META_BOOTARGS_SET("-no32exec"))
+T_DECL(no32exec_bootarg_with_32only_spawn_binprefs, "make sure we honor no32exec, using posix_spawn with 32-bit only binprefs on a fat i386/x86_64 Mach-O")
 {
-       int pid;
+       int pid, ret, spawn_ret;
+       posix_spawnattr_t spawnattr;
+       cpu_type_t cpuprefs[] = { CPU_TYPE_X86 };
+
        char path[1024];
        uint32_t size = sizeof(path);
-
        T_QUIET; T_ASSERT_EQ(_NSGetExecutablePath(path, &size), 0, NULL);
-       T_QUIET; T_ASSERT_LT(strlcat(path, "_helper", size), size, NULL);
+       T_QUIET; T_ASSERT_LT(strlcat(path, "_helper_binprefs", size), (unsigned long)size, NULL);
 
-       pid = fork();
-       T_QUIET; T_ASSERT_POSIX_SUCCESS(pid, "fork");
+       ret = posix_spawnattr_init(&spawnattr);
+       T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "posix_spawnattr_init");
 
-       if (pid == 0) { /* child */
-               execve(path, NULL, NULL); /* this should fail, resulting in the call to exit below */
-               exit(errno);
-       } else { /* parent */
-               int wait_ret = 0;
-               waitpid(pid, &wait_ret, 0);
-               T_ASSERT_EQ(WEXITSTATUS(wait_ret), EBADARCH, "execve should set errno = EBADARCH");
-       }
+       ret = posix_spawnattr_setbinpref_np(&spawnattr, sizeof(cpuprefs) / sizeof(cpuprefs[0]), cpuprefs, NULL);
+       T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "posix_spawnattr_setbinpref_np");
+
+       spawn_ret = posix_spawn(&pid, path, NULL, &spawnattr, NULL, NULL);
+       T_ASSERT_EQ(spawn_ret, EBADARCH, "posix_spawn should return EBADARCH since only 32-bit binpref is requested");
+
+       ret = posix_spawnattr_destroy(&spawnattr);
+       T_QUIET; T_ASSERT_EQ(ret, 0, "posix_spawnattr_destroy");
 }
index 99fb6be2fc8cf41b984815a25eb5bd4b9212da21..04069dcdc30fd0fadd6f60251dd70537b4cce564 100644 (file)
@@ -1,6 +1,17 @@
-#include <darwintest.h>
+/* This is a file that compiles as a 32-bit helper to test
+ * forking of 32-bit programs, now that 32-bit has been
+ * deprecated on macOS despite still requiring its support in
+ * the watchOS simulator.
+ */
 
-T_DECL(null_test, "nothing to see here")
+#include <stdio.h>
+#include <unistd.h>
+
+int
+main(int argc __unused, char **argv)
 {
-       T_SKIP("nothing to see here");
+       (void)argc;
+       size_t retval = sizeof(void *);
+       printf("%s(%d): sizeof(void *) = %lu\n", argv[0], getpid(), retval);
+       return (int)retval;
 }
diff --git a/tests/no32exec_35914211_helper_binprefs.c b/tests/no32exec_35914211_helper_binprefs.c
deleted file mode 100644 (file)
index 0909633..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-#include <darwintest.h>
-#include <unistd.h>
-#include <signal.h>
-
-int can_signal_parent = 0;
-
-void
-signal_handler(int sig)
-{
-       if (sig == SIGUSR1) {
-               can_signal_parent = 1;
-       }
-       return;
-}
-
-T_DECL(no32exec_bootarg_with_spawn_binprefs_helper, "helper for no32exec_bootarg_with_spawn_binprefs test")
-{
-       unsigned long ptrSize = sizeof(long);
-       int ppid = getppid();
-
-       signal(SIGUSR1, signal_handler);
-       signal(SIGALRM, signal_handler);
-
-       // parent will signal us if they're no32exec_bootarg_with_spawn_binprefs, otherwise timeout
-       alarm(3);
-       pause();
-
-       /* signal to parent process if we are running in 64-bit mode */
-       if (can_signal_parent && ptrSize == 8) {
-               kill(ppid, SIGUSR1);
-       }
-
-       T_SKIP("nothing to see here");
-}
index e58c7752fd4947cb686857d0e41d03ca402d6773..bcebeb495144c6b376cf063076b8b5b951f0dcc9 100755 (executable)
@@ -49,7 +49,10 @@ class value(object):
                 other = long(other)
             return me.__cmp__(other)
         if type(other) is value:
-            return int(self).__cmp__(int(other))
+            try:
+                return int(self).__cmp__(int(other))
+            except TypeError: # Try promoting to long
+                return long(self).__cmp__(long(other))
         raise TypeError("Cannot compare value with type {}".format(type(other)))
     
     def __str__(self):
index 6ac2b4eccbb8887fe95de2cd9614e65ca7f681aa..88ea13b5bbb2fa9050477de7d637602226c130bf 100755 (executable)
@@ -1549,7 +1549,7 @@ def ShowMQueue(cmd_args=None, cmd_options={}):
         pset = unsigned(ArgumentStringToInt(cmd_args[0])) - unsigned(psetoff)
         print PrintPortSetSummary.header
         PrintPortSetSummary(kern.GetValueFromAddress(pset, 'struct ipc_pset *'), space)
-    elif int(wq_type) == 2:
+    elif int(wq_type) in [2, 1]:
         portoff = getfieldoffset('struct ipc_port', 'ip_messages')
         port = unsigned(ArgumentStringToInt(cmd_args[0])) - unsigned(portoff)
         print PrintPortSummary.header
index a73dc5b8a95021b6f840696e15609e3179e7e2c9..963db7bcc604df9de3fe26f521461871c4e29910 100755 (executable)
@@ -2772,12 +2772,13 @@ def ShowTaskVMEntries(task, show_pager_info, show_all_shadows):
         return None
     showmapvme(task.map, 0, 0, show_pager_info, show_all_shadows, False)
 
-@lldb_command("showmapvme", "A:B:PRST")
+@lldb_command("showmapvme", "A:B:F:PRST")
 def ShowMapVME(cmd_args=None, cmd_options={}):
     """Routine to print out info about the specified vm_map and its vm entries
         usage: showmapvme <vm_map> [-A start] [-B end] [-S] [-P]
         Use -A <start> flag to start at virtual address <start>
         Use -B <end> flag to end at virtual address <end>
+        Use -F <virtaddr> flag to find just the VME containing the given VA
         Use -S flag to show VM object shadow chains
         Use -P flag to show pager info (mapped file, compressed pages, ...)
         Use -R flag to reverse order
@@ -2796,6 +2797,9 @@ def ShowMapVME(cmd_args=None, cmd_options={}):
         start_vaddr = unsigned(int(cmd_options['-A'], 16))
     if "-B" in cmd_options:
         end_vaddr = unsigned(int(cmd_options['-B'], 16))
+    if "-F" in cmd_options:
+        start_vaddr = unsigned(int(cmd_options['-F'], 16))
+        end_vaddr = start_vaddr
     if "-P" in cmd_options:
         show_pager_info = True
     if "-S" in cmd_options:
@@ -3117,7 +3121,7 @@ def showvmtags(cmd_args=None, cmd_options={}):
     if "-A" in cmd_options:
         all_tags = True
     page_size = unsigned(kern.globals.page_size)
-    nsites = unsigned(kern.globals.vm_allocation_tag_highest)
+    nsites = unsigned(kern.globals.vm_allocation_tag_highest) + 1
     tagcounts = [0] * nsites
     tagpeaks = [0] * nsites
     tagmapped = [0] * nsites
@@ -3139,7 +3143,7 @@ def showvmtags(cmd_args=None, cmd_options={}):
 
     total = 0
     totalmapped = 0
-    print " vm_allocation_tag_highest: {:<7d}  ".format(nsites)
+    print " vm_allocation_tag_highest: {:<7d}  ".format(nsites - 1)
     print " {:<7s}  {:>7s}   {:>7s}   {:>7s}  {:<50s}".format("tag.kmod", "peak", "size", "mapped", "name")
     for tag in range(nsites):
         if all_tags or tagcounts[tag] or tagmapped[tag]:
@@ -4432,7 +4436,7 @@ def showmemoryentry(entry, idx=0, queue_len=0):
     if entry.is_sub_map == 1:
         showmapvme(entry.backing.map, 0, 0, show_pager_info, show_all_shadows)
     if entry.is_copy == 1:
-        showmapcopyvme(entry.backing.copy, 0, 0, 0, show_pager_info, show_all_shadows, 0)
+        showmapcopyvme(entry.backing.copy, 0, 0, show_pager_info, show_all_shadows, 0)
     if entry.is_sub_map == 0 and entry.is_copy == 0:
         showvmobject(entry.backing.object, entry.offset, entry.size, show_pager_info, show_all_shadows)
 
index 237927c691be99c7ff55a5fb467379c1e6cf1f15..414a4e11d59d5c6a1acea015ffda144e8deb3b49 100755 (executable)
@@ -738,6 +738,7 @@ def DumpRawTraceFile(cmd_args=[], cmd_options={}):
 
     if lp64 :
         KDBG_TIMESTAMP_MASK = 0xffffffffffffffff
+        KDBG_CPU_SHIFT      = 0
     else :
         KDBG_TIMESTAMP_MASK = 0x00ffffffffffffff
         KDBG_CPU_SHIFT      = 56
@@ -967,7 +968,8 @@ def DumpRawTraceFile(cmd_args=[], cmd_options={}):
                 htab[min_kdbp].kd_prev_timebase += 1
 
                 e.timestamp = htab[min_kdbp].kd_prev_timebase & KDBG_TIMESTAMP_MASK
-                e.timestamp |= (min_cpu << KDBG_CPU_SHIFT)
+                if not lp64:
+                    e.timestamp |= (min_cpu << KDBG_CPU_SHIFT)
             else :
                 htab[min_kdbp].kd_prev_timebase = earliest_time
 
index cf7afc73c6018d0a831a7f02bb194bb8a299e25d..3a107da8b48aad26d1b503270fc5904abdf57cc3 100755 (executable)
@@ -1547,8 +1547,8 @@ def GetLedgerEntrySummary(ledger_template, ledger, i, show_footprint_interval_ma
     else:
         out_str += "        - "
 
-    if (unsigned(ledger.le_warn_level) != ledger_limit_infinity):
-        out_str += "{:9d} ".format((unsigned(ledger.le_warn_level) * 100) / unsigned(ledger.le_limit))
+    if (unsigned(ledger.le_warn_percent) < 65535):
+        out_str += "{:9d} ".format(unsigned(ledger.le_warn_percent * 100. / 65536))
     else:
         out_str += "        - "
 
index 6768635c0a68df17270a58a5822d66812e682894..dd3a38eb16e194ec8db75de9c4f60c1d93c88498 100755 (executable)
@@ -7,7 +7,7 @@ import sys
 def GetWaitqStateStr(waitq):
     wq_types = {
             0: 'INV',
-            1: '???',
+            1: ' TS',
             2: '  Q',
             3: 'SET'
     }
index 7ec9ca7c8c88e76ae5419f73ea85f6b72848dbb9..473e06a8cc2f2d9a0d9168482d266ab85b3c7460 100755 (executable)
@@ -984,7 +984,7 @@ def WalkList(cmd_args=[], cmd_options={}):
         else:
             print "{0: <#020x}".format(i)
 
-def iotrace_parse_Copt(Copt):
+def trace_parse_Copt(Copt):
     """Parses the -C option argument and returns a list of CPUs
     """
     cpusOpt = Copt
@@ -1017,30 +1017,17 @@ def iotrace_parse_Copt(Copt):
     return chosen_cpus
 
 
-@lldb_command('iotrace', 'C:N:S:RB')
-def IOTrace_cmd(cmd_args=[], cmd_options={}):
-    """ Prints the iotrace ring buffers for all CPUs by default.
-        Arguments:
-          -B                              : Print backtraces for each ring entry
-          -C <cpuSpec#>[,...,<cpuSpec#N>] : Limit trace entries to those generated by the specified CPUs (each cpuSpec can be a
-                                            single CPU number or a range separated by a dash (e.g. "0-3"))
-          -N <count>                      : Limit output to the first <count> entries (across all chosen CPUs)
-          -R                              : Display results in reverse-sorted order (oldest first; default is newest-first)
-          -S <sort_key_field_name>        : Sort output by specified iotrace_entry_t field name (instead of by timestamp)
+IDX_CPU = 0
+IDX_RINGPOS = 1
+IDX_RINGENTRY = 2
+def Trace_cmd(cmd_args=[], cmd_options={}, headerString=lambda:"", entryString=lambda x:"", ring=[], entries_per_cpu=0, max_backtraces=0):
+    """Generic trace dumper helper function
     """
-    IDX_CPU = 0
-    IDX_RINGPOS = 1
-    IDX_RINGENTRY = 2
-    MAX_IOTRACE_BACKTRACES = 16
-
-    if kern.arch != "x86_64":
-        print "Sorry, iotrace is an x86-only command."
-        return
 
     if '-S' in cmd_options:
         field_arg = cmd_options['-S']
         try:
-            getattr(kern.globals.iotrace_ring[0][0], field_arg)
+            getattr(ring[0][0], field_arg)
             sort_key_field_name = field_arg
         except AttributeError:
             raise ArgumentError("Invalid sort key field name `%s'" % field_arg)
@@ -1048,7 +1035,7 @@ def IOTrace_cmd(cmd_args=[], cmd_options={}):
             sort_key_field_name = 'start_time_abs'
 
     if '-C' in cmd_options:
-        chosen_cpus = iotrace_parse_Copt(cmd_options['-C'])
+        chosen_cpus = trace_parse_Copt(cmd_options['-C'])
     else:
         chosen_cpus = [x for x in range(kern.globals.real_ncpus)]
 
@@ -1066,7 +1053,7 @@ def IOTrace_cmd(cmd_args=[], cmd_options={}):
     # the original ring index, and the iotrace entry. 
     entries = []
     for x in chosen_cpus:
-        ring_slice = [(x, y, kern.globals.iotrace_ring[x][y]) for y in range(kern.globals.iotrace_entries_per_cpu)]
+        ring_slice = [(x, y, ring[x][y]) for y in range(entries_per_cpu)]
         entries.extend(ring_slice)
 
     total_entries = len(entries)
@@ -1086,31 +1073,90 @@ def IOTrace_cmd(cmd_args=[], cmd_options={}):
     else:
         entries_to_display = total_entries
 
-    print "%-19s %-8s %-10s %-20s SZ %-18s %-17s DATA" % (
-        "START TIME",
-        "DURATION",
-        "CPU#[RIDX]",
-        "      TYPE",
-        "   VIRT ADDR",
-        "   PHYS ADDR")
+    print headerString()
 
     for x in xrange(entries_to_display):
-        print "%-20u(%6u) %6s[%02d] %-20s %d 0x%016x 0x%016x 0x%x" % (
-            entries[x][IDX_RINGENTRY].start_time_abs,
-            entries[x][IDX_RINGENTRY].duration,
-            "CPU%d" % entries[x][IDX_CPU],
-            entries[x][IDX_RINGPOS],
-            str(entries[x][IDX_RINGENTRY].iotype).split("=")[1].strip(),
-            entries[x][IDX_RINGENTRY].size,
-            entries[x][IDX_RINGENTRY].vaddr,
-            entries[x][IDX_RINGENTRY].paddr,
-            entries[x][IDX_RINGENTRY].val)
+        print entryString(entries[x])
+
         if backtraces:
-            for btidx in range(MAX_IOTRACE_BACKTRACES):
+            for btidx in range(max_backtraces):
                 nextbt = entries[x][IDX_RINGENTRY].backtrace[btidx]
                 if nextbt == 0:
                     break
                 print "\t" + GetSourceInformationForAddress(nextbt)
+
+
+@lldb_command('iotrace', 'C:N:S:RB')
+def IOTrace_cmd(cmd_args=[], cmd_options={}):
+    """ Prints the iotrace ring buffers for all CPUs by default.
+        Arguments:
+          -B                              : Print backtraces for each ring entry
+          -C <cpuSpec#>[,...,<cpuSpec#N>] : Limit trace entries to those generated by the specified CPUs (each cpuSpec can be a
+                                            single CPU number or a range separated by a dash (e.g. "0-3"))
+          -N <count>                      : Limit output to the first <count> entries (across all chosen CPUs)
+          -R                              : Display results in reverse-sorted order (oldest first; default is newest-first)
+          -S <sort_key_field_name>        : Sort output by specified iotrace_entry_t field name (instead of by timestamp)
+    """
+    MAX_IOTRACE_BACKTRACES = 16
+
+    if kern.arch != "x86_64":
+        print "Sorry, iotrace is an x86-only command."
+        return
+
+    hdrString = lambda : "%-19s %-8s %-10s %-20s SZ  %-18s %-17s DATA" % (
+        "START TIME",
+        "DURATION",
+        "CPU#[RIDX]",
+        "      TYPE",
+        "   VIRT ADDR",
+        "   PHYS ADDR")
+
+    entryString = lambda x : "%-20u(%6u) %6s[%02d] %-20s %-2d 0x%016x 0x%016x 0x%x" % (
+        x[IDX_RINGENTRY].start_time_abs,
+        x[IDX_RINGENTRY].duration,
+        "CPU%d" % x[IDX_CPU],
+        x[IDX_RINGPOS],
+        str(x[IDX_RINGENTRY].iotype).split("=")[1].strip(),
+        x[IDX_RINGENTRY].size,
+        x[IDX_RINGENTRY].vaddr,
+        x[IDX_RINGENTRY].paddr,
+        x[IDX_RINGENTRY].val)
+
+    Trace_cmd(cmd_args, cmd_options, hdrString, entryString, kern.globals.iotrace_ring, kern.globals.iotrace_entries_per_cpu, MAX_IOTRACE_BACKTRACES)
+
+
+@lldb_command('ttrace', 'C:N:S:RB')
+def TrapTrace_cmd(cmd_args=[], cmd_options={}):
+    """ Prints the iotrace ring buffers for all CPUs by default.
+        Arguments:
+          -B                              : Print backtraces for each ring entry
+          -C <cpuSpec#>[,...,<cpuSpec#N>] : Limit trace entries to those generated by the specified CPUs (each cpuSpec can be a
+                                            single CPU number or a range separated by a dash (e.g. "0-3"))
+          -N <count>                      : Limit output to the first <count> entries (across all chosen CPUs)
+          -R                              : Display results in reverse-sorted order (oldest first; default is newest-first)
+          -S <sort_key_field_name>        : Sort output by specified traptrace_entry_t field name (instead of by timestamp)
+    """
+    MAX_TRAPTRACE_BACKTRACES = 8
+
+    if kern.arch != "x86_64":
+        print "Sorry, ttrace is an x86-only command."
+        return
+
+    hdrString = lambda : "%-30s CPU#[RIDX] VECT INTERRUPTED_THREAD PREMLV INTRLV INTERRUPTED_PC" % (
+        "START TIME   (DURATION [ns])")
+    entryString = lambda x : "%-20u(%6s) %8s[%02d] 0x%02x 0x%016x %6d %6d %s" % (
+        x[IDX_RINGENTRY].start_time_abs,
+        str(x[IDX_RINGENTRY].duration) if hex(x[IDX_RINGENTRY].duration) != "0xffffffffffffffff" else 'inprog',
+        "CPU%d" % x[IDX_CPU],
+        x[IDX_RINGPOS],
+        int(x[IDX_RINGENTRY].vector),
+        x[IDX_RINGENTRY].curthread,
+        x[IDX_RINGENTRY].curpl,
+        x[IDX_RINGENTRY].curil,
+        GetSourceInformationForAddress(x[IDX_RINGENTRY].interrupted_pc))
+
+    Trace_cmd(cmd_args, cmd_options, hdrString, entryString, kern.globals.traptrace_ring,
+        kern.globals.traptrace_entries_per_cpu, MAX_TRAPTRACE_BACKTRACES)
                 
 
 
index cd1963c5655bd2fe63dbf67149837c8b53ad7fc7..db9ed81c5d6b3f6a002348b301029d8b67cb6aa7 100644 (file)
@@ -66,6 +66,7 @@ typedef enum my_policy_type { MY_POLICY_REALTIME, MY_POLICY_TIMESHARE, MY_POLICY
 
 #define CONSTRAINT_NANOS        (20000000ll)    /* 20 ms */
 #define COMPUTATION_NANOS       (10000000ll)    /* 10 ms */
+#define RT_CHURN_COMP_NANOS     ( 1000000ll)    /*  1 ms */
 #define TRACEWORTHY_NANOS       (10000000ll)    /* 10 ms */
 #define TRACEWORTHY_NANOS_TEST  ( 2000000ll)    /*  2 ms */
 
@@ -105,8 +106,10 @@ static uint32_t                 g_iteration_sleeptime_us = 0;
 static uint32_t                 g_priority = 0;
 static uint32_t                 g_churn_pri = 0;
 static uint32_t                 g_churn_count = 0;
+static uint32_t                 g_rt_churn_count = 0;
 
 static pthread_t*               g_churn_threads = NULL;
+static pthread_t*               g_rt_churn_threads = NULL;
 
 /* Threshold for dropping a 'bad run' tracepoint */
 static uint64_t                 g_traceworthy_latency_ns = TRACEWORTHY_NANOS;
@@ -126,6 +129,8 @@ static boolean_t                g_drop_priority = FALSE;
 /* Test whether realtime threads are scheduled on the separate CPUs */
 static boolean_t                g_test_rt = FALSE;
 
+static boolean_t                g_rt_churn = FALSE;
+
 /* On SMT machines, test whether realtime threads are scheduled on the correct CPUs */
 static boolean_t                g_test_rt_smt = FALSE;
 
@@ -151,6 +156,8 @@ static semaphore_t              g_broadcastsem;
 static semaphore_t              g_leadersem;
 static semaphore_t              g_readysem;
 static semaphore_t              g_donesem;
+static semaphore_t              g_rt_churn_sem;
+static semaphore_t              g_rt_churn_start_sem;
 
 /* Global variables (chain) */
 static semaphore_t             *g_semarr;
@@ -270,6 +277,129 @@ join_churn_threads(void)
        }
 }
 
+/*
+ * Set policy
+ */
+static int
+rt_churn_thread_setup(void)
+{
+       kern_return_t kr;
+       thread_time_constraint_policy_data_t pol;
+
+       /* Hard-coded realtime parameters (similar to what Digi uses) */
+       pol.period      = 100000;
+       pol.constraint  = (uint32_t) nanos_to_abs(CONSTRAINT_NANOS * 2);
+       pol.computation = (uint32_t) nanos_to_abs(RT_CHURN_COMP_NANOS * 2);
+       pol.preemptible = 0;         /* Ignored by OS */
+
+       kr = thread_policy_set(mach_thread_self(), THREAD_TIME_CONSTRAINT_POLICY,
+           (thread_policy_t) &pol, THREAD_TIME_CONSTRAINT_POLICY_COUNT);
+       mach_assert_zero_t(0, kr);
+
+       return 0;
+}
+
+static void *
+rt_churn_thread(__unused void *arg)
+{
+       rt_churn_thread_setup();
+
+       for (uint32_t i = 0; i < g_iterations; i++) {
+               kern_return_t kr = semaphore_wait_signal(g_rt_churn_start_sem, g_rt_churn_sem);
+               mach_assert_zero_t(0, kr);
+
+               volatile double x = 0.0;
+               volatile double y = 0.0;
+
+               uint64_t endspin = mach_absolute_time() + nanos_to_abs(RT_CHURN_COMP_NANOS);
+               while (mach_absolute_time() < endspin) {
+                       y = y + 1.5 + x;
+                       x = sqrt(y);
+               }
+       }
+
+       kern_return_t kr = semaphore_signal(g_rt_churn_sem);
+       mach_assert_zero_t(0, kr);
+
+       return NULL;
+}
+
+static void
+wait_for_rt_churn_threads(void)
+{
+       for (uint32_t i = 0; i < g_rt_churn_count; i++) {
+               kern_return_t kr = semaphore_wait(g_rt_churn_sem);
+               mach_assert_zero_t(0, kr);
+       }
+}
+
+static void
+start_rt_churn_threads(void)
+{
+       for (uint32_t i = 0; i < g_rt_churn_count; i++) {
+               kern_return_t kr = semaphore_signal(g_rt_churn_start_sem);
+               mach_assert_zero_t(0, kr);
+       }
+}
+
+static void
+create_rt_churn_threads(void)
+{
+       if (g_rt_churn_count == 0) {
+               /* Leave 1 CPU to ensure that the main thread can make progress */
+               g_rt_churn_count = g_numcpus - 1;
+       }
+
+       errno_t err;
+
+       struct sched_param param = { .sched_priority = (int)g_churn_pri };
+       pthread_attr_t attr;
+
+       /* Array for churn threads */
+       g_rt_churn_threads = (pthread_t*) valloc(sizeof(pthread_t) * g_rt_churn_count);
+       assert(g_rt_churn_threads);
+
+       if ((err = pthread_attr_init(&attr))) {
+               errc(EX_OSERR, err, "pthread_attr_init");
+       }
+
+       if ((err = pthread_attr_setschedparam(&attr, &param))) {
+               errc(EX_OSERR, err, "pthread_attr_setschedparam");
+       }
+
+       if ((err = pthread_attr_setschedpolicy(&attr, SCHED_RR))) {
+               errc(EX_OSERR, err, "pthread_attr_setschedpolicy");
+       }
+
+       for (uint32_t i = 0; i < g_rt_churn_count; i++) {
+               pthread_t new_thread;
+
+               if ((err = pthread_create(&new_thread, &attr, rt_churn_thread, NULL))) {
+                       errc(EX_OSERR, err, "pthread_create");
+               }
+               g_rt_churn_threads[i] = new_thread;
+       }
+
+       if ((err = pthread_attr_destroy(&attr))) {
+               errc(EX_OSERR, err, "pthread_attr_destroy");
+       }
+
+       /* Wait until all threads have checked in */
+       wait_for_rt_churn_threads();
+}
+
+static void
+join_rt_churn_threads(void)
+{
+       /* Rejoin rt churn threads */
+       for (uint32_t i = 0; i < g_rt_churn_count; i++) {
+               errno_t err = pthread_join(g_rt_churn_threads[i], NULL);
+               if (err) {
+                       errc(EX_OSERR, err, "pthread_join %d", i);
+               }
+       }
+}
+
 /*
  * Figure out what thread policy to use
  */
@@ -828,6 +958,12 @@ main(int argc, char **argv)
        kr = semaphore_create(mach_task_self(), &g_readysem, SYNC_POLICY_FIFO, 0);
        mach_assert_zero(kr);
 
+       kr = semaphore_create(mach_task_self(), &g_rt_churn_sem, SYNC_POLICY_FIFO, 0);
+       mach_assert_zero(kr);
+
+       kr = semaphore_create(mach_task_self(), &g_rt_churn_start_sem, SYNC_POLICY_FIFO, 0);
+       mach_assert_zero(kr);
+
        atomic_store_explicit(&g_done_threads, 0, memory_order_relaxed);
 
        /* Create the threads */
@@ -850,6 +986,9 @@ main(int argc, char **argv)
        if (g_churn_pri) {
                create_churn_threads();
        }
+       if (g_rt_churn) {
+               create_rt_churn_threads();
+       }
 
        /* Let everyone get settled */
        kr = semaphore_wait(g_main_sem);
@@ -869,6 +1008,11 @@ main(int argc, char **argv)
                        g_one_long_spin_id = (uint32_t)rand() % g_numthreads;
                }
 
+               if (g_rt_churn) {
+                       start_rt_churn_threads();
+                       usleep(100);
+               }
+
                debug_log("%d Main thread reset\n", i);
 
                atomic_store_explicit(&g_done_threads, 0, memory_order_seq_cst);
@@ -883,6 +1027,10 @@ main(int argc, char **argv)
 
                assert(atomic_load_explicit(&g_done_threads, memory_order_relaxed) == g_numthreads);
 
+               if (g_rt_churn) {
+                       wait_for_rt_churn_threads();
+               }
+
                /*
                 * We report the worst latencies relative to start time
                 * and relative to the lead worker thread.
@@ -933,6 +1081,10 @@ main(int argc, char **argv)
                }
        }
 
+       if (g_rt_churn) {
+               join_rt_churn_threads();
+       }
+
        if (g_churn_pri) {
                join_churn_threads();
        }
@@ -1104,6 +1256,7 @@ parse_args(int argc, char *argv[])
                OPT_PRIORITY,
                OPT_CHURN_PRI,
                OPT_CHURN_COUNT,
+               OPT_RT_CHURN_COUNT,
        };
 
        static struct option longopts[] = {
@@ -1113,6 +1266,7 @@ parse_args(int argc, char *argv[])
                { "priority",           required_argument,      NULL,                           OPT_PRIORITY  },
                { "churn-pri",          required_argument,      NULL,                           OPT_CHURN_PRI },
                { "churn-count",        required_argument,      NULL,                           OPT_CHURN_COUNT },
+               { "rt-churn-count",     required_argument,      NULL,                           OPT_RT_CHURN_COUNT },
                { "switched_apptype",   no_argument,            (int*)&g_seen_apptype,          TRUE },
                { "spin-one",           no_argument,            (int*)&g_do_one_long_spin,      TRUE },
                { "spin-all",           no_argument,            (int*)&g_do_all_spin,           TRUE },
@@ -1122,6 +1276,7 @@ parse_args(int argc, char *argv[])
                { "test-rt",            no_argument,            (int*)&g_test_rt,               TRUE },
                { "test-rt-smt",        no_argument,            (int*)&g_test_rt_smt,           TRUE },
                { "test-rt-avoid0",     no_argument,            (int*)&g_test_rt_avoid0,        TRUE },
+               { "rt-churn",           no_argument,            (int*)&g_rt_churn,              TRUE },
                { "histogram",          no_argument,            (int*)&g_histogram,             TRUE },
                { "verbose",            no_argument,            (int*)&g_verbose,               TRUE },
                { "help",               no_argument,            NULL,                           'h' },
@@ -1153,6 +1308,9 @@ parse_args(int argc, char *argv[])
                case OPT_CHURN_COUNT:
                        g_churn_count = read_dec_arg();
                        break;
+               case OPT_RT_CHURN_COUNT:
+                       g_rt_churn_count = read_dec_arg();
+                       break;
                case '?':
                case 'h':
                default:
diff --git a/tools/trace/ktruss.lua b/tools/trace/ktruss.lua
new file mode 100755 (executable)
index 0000000..514a8b7
--- /dev/null
@@ -0,0 +1,28 @@
+#!/usr/local/bin/recon
+
+local ktrace = require 'ktrace'
+
+if not arg[1] or arg[1] == '-h' then
+  print[[
+usage: ktruss <syscall-name> [<more-names> ...]
+
+Use Kernel TRace to print User Space Syscalls (ktruss).]]
+  os.exit(arg[1] == nil)
+end
+
+local sess = ktrace.Session.new()
+
+for i = 1, #arg do
+  sess:add_callback_pair('BSC_' .. arg[i], function (start, finish)
+    print(('%s[%d]: %s(0x%x, 0x%x, 0x%x, 0x%x) -> %d'):format(
+        sess:procname_for_threadid(start.threadid),
+        sess:pid_for_threadid(start.threadid), arg[1], start[1], start[2],
+        start[3], start[4], finish[2]))
+  end)
+end
+
+local ok, err = sess:start()
+if not ok then
+  io.stderr:write('tracing failed: ', err, '\n')
+  os.exit(1)
+end