]> git.saurik.com Git - apple/xnu.git/commitdiff
xnu-6153.101.6.tar.gz macos-10154 v6153.101.6
authorApple <opensource@apple.com>
Fri, 1 May 2020 21:28:03 +0000 (21:28 +0000)
committerApple <opensource@apple.com>
Fri, 1 May 2020 21:28:03 +0000 (21:28 +0000)
260 files changed:
bsd/bsm/audit_kevents.h
bsd/dev/dtrace/dtrace.c
bsd/kern/kern_backtrace.c
bsd/kern/kern_control.c
bsd/kern/kern_core.c
bsd/kern/kern_descrip.c
bsd/kern/kern_exec.c
bsd/kern/kern_malloc.c
bsd/kern/kern_memorystatus_freeze.c
bsd/kern/kern_mib.c
bsd/kern/kern_mman.c
bsd/kern/kern_overrides.c
bsd/kern/kern_proc.c
bsd/kern/kern_sysctl.c
bsd/kern/policy_check.c
bsd/kern/subr_log.c
bsd/kern/subr_prf.c
bsd/kern/syscalls.master
bsd/kern/trace_codes
bsd/kern/uipc_socket.c
bsd/man/man2/getattrlist.2
bsd/man/man4/random.4
bsd/miscfs/devfs/devfs_vfsops.c
bsd/miscfs/devfs/devfs_vnops.c
bsd/miscfs/routefs/routefs_ops.c
bsd/net/content_filter.c
bsd/net/content_filter.h
bsd/net/dlil.c
bsd/net/if_bridge.c
bsd/net/if_bridgevar.h
bsd/net/kpi_interface.h
bsd/net/kpi_interfacefilter.h
bsd/net/kpi_protocol.h
bsd/net/necp.c
bsd/net/necp.h
bsd/net/necp_client.c
bsd/net/network_agent.c
bsd/netinet/dhcp.h
bsd/netinet/flow_divert.c
bsd/netinet/flow_divert_proto.h
bsd/netinet/in_pcb.c
bsd/netinet/ip_output.c
bsd/netinet/tcp_input.c
bsd/netinet/tcp_output.c
bsd/netinet/tcp_timer.c
bsd/netinet6/ip6_output.c
bsd/netinet6/nd6_prproxy.c
bsd/netkey/key.c
bsd/netkey/key.h
bsd/netkey/keydb.h
bsd/nfs/krpc_subr.c
bsd/nfs/nfs.h
bsd/nfs/nfs4_subs.c
bsd/nfs/nfs4_vnops.c
bsd/nfs/nfs_bio.c
bsd/nfs/nfs_boot.c
bsd/nfs/nfs_conf.h [new file with mode: 0644]
bsd/nfs/nfs_gss.c
bsd/nfs/nfs_lock.c
bsd/nfs/nfs_node.c
bsd/nfs/nfs_serv.c
bsd/nfs/nfs_socket.c
bsd/nfs/nfs_srvcache.c
bsd/nfs/nfs_subs.c
bsd/nfs/nfs_syscalls.c
bsd/nfs/nfs_upcall.c
bsd/nfs/nfs_vfsops.c
bsd/nfs/nfs_vnops.c
bsd/nfs/nfsm_subs.h
bsd/nfs/nfsnode.h
bsd/sys/_types/_fd_def.h
bsd/sys/attr.h
bsd/sys/dtrace.h
bsd/sys/imgact.h
bsd/sys/kdebug.h
bsd/sys/kern_memorystatus_freeze.h
bsd/sys/kpi_mbuf.h
bsd/sys/mount_internal.h
bsd/sys/proc.h
bsd/sys/socketvar.h
bsd/sys/spawn_internal.h
bsd/sys/stat.h
bsd/sys/vnode.h
bsd/sys/vnode_internal.h
bsd/vfs/vfs_attrlist.c
bsd/vfs/vfs_cache.c
bsd/vfs/vfs_conf.c
bsd/vfs/vfs_fsevents.c
bsd/vfs/vfs_lookup.c
bsd/vfs/vfs_subr.c
bsd/vfs/vfs_syscalls.c
bsd/vm/vnode_pager.c
config/BSDKernel.exports
config/IOKit.exports
config/MACFramework.exports
config/MasterVersion
config/Private.exports
iokit/DriverKit/IOBufferMemoryDescriptor.iig
iokit/DriverKit/IOKitKeys.h [new file with mode: 0644]
iokit/DriverKit/IOMemoryDescriptor.iig
iokit/DriverKit/IOMemoryMap.iig
iokit/DriverKit/IOReturn.h
iokit/DriverKit/IOService.iig
iokit/DriverKit/IOServiceNotificationDispatchSource.iig [new file with mode: 0644]
iokit/DriverKit/Makefile
iokit/DriverKit/OSAction.iig
iokit/DriverKit/OSObject.iig
iokit/IOKit/IOCatalogue.h
iokit/IOKit/IOKitKeys.h
iokit/IOKit/IOKitServer.h
iokit/IOKit/IORegistryEntry.h
iokit/IOKit/IOReturn.h
iokit/IOKit/IOService.h
iokit/IOKit/pwr_mgt/IOPM.h
iokit/IOKit/pwr_mgt/IOPMPrivate.h
iokit/IOKit/pwr_mgt/RootDomain.h
iokit/Kernel/IOCatalogue.cpp
iokit/Kernel/IODeviceTreeSupport.cpp
iokit/Kernel/IOKitDebug.cpp
iokit/Kernel/IOPMrootDomain.cpp
iokit/Kernel/IORegistryEntry.cpp
iokit/Kernel/IOService.cpp
iokit/Kernel/IOServicePM.cpp
iokit/Kernel/IOUserClient.cpp
iokit/Kernel/IOUserServer.cpp
iokit/conf/files
libkern/c++/OSMetaClass.cpp
libkern/libkern/OSKextLib.h
libkern/libkern/c++/OSMetaClass.h
libsyscall/mach/mach_port.c
libsyscall/wrappers/_libc_funcptr.c
libsyscall/wrappers/_libkernel_init.h
libsyscall/wrappers/spawn/posix_spawn.c
libsyscall/wrappers/spawn/spawn.h
libsyscall/wrappers/terminate_with_reason.c
osfmk/UserNotification/KUNCUserNotifications.c
osfmk/arm/arm_init.c
osfmk/arm/cswitch.s
osfmk/arm/locks.h
osfmk/arm/locks_arm.c
osfmk/arm/machine_routines.c
osfmk/arm/machine_routines_asm.s
osfmk/arm/pcb.c
osfmk/arm/pmap.c
osfmk/arm64/cswitch.s
osfmk/arm64/kpc.c
osfmk/arm64/locore.s
osfmk/arm64/machine_routines.c
osfmk/arm64/monotonic_arm64.c
osfmk/arm64/pcb.c
osfmk/arm64/proc_reg.h
osfmk/arm64/start.s
osfmk/bank/bank.c
osfmk/conf/files
osfmk/device/device_types.h
osfmk/device/iokit_rpc.c
osfmk/i386/AT386/model_dep.c
osfmk/i386/cpu_data.h
osfmk/i386/cpuid.c
osfmk/i386/cpuid.h
osfmk/i386/fpu.c
osfmk/i386/i386_init.c
osfmk/i386/locks.h
osfmk/i386/locks_i386.c
osfmk/i386/locks_i386_opt.c
osfmk/i386/machine_routines.c
osfmk/i386/machine_routines.h
osfmk/i386/pcb.c
osfmk/i386/proc_reg.h
osfmk/i386/user_ldt.c
osfmk/ipc/ipc_importance.c
osfmk/ipc/ipc_init.c
osfmk/ipc/ipc_kmsg.c
osfmk/ipc/ipc_object.c
osfmk/ipc/ipc_object.h
osfmk/ipc/ipc_port.h
osfmk/ipc/ipc_space.c
osfmk/ipc/ipc_space.h
osfmk/ipc/ipc_types.h
osfmk/ipc/ipc_voucher.c
osfmk/ipc/mach_debug.c
osfmk/kern/arcade.c
osfmk/kern/audit_sessionport.c
osfmk/kern/backtrace.c
osfmk/kern/backtrace.h
osfmk/kern/block_hint.h
osfmk/kern/circle_queue.h
osfmk/kern/clock.c
osfmk/kern/host_notify.c
osfmk/kern/ipc_clock.c
osfmk/kern/ipc_host.c
osfmk/kern/ipc_kobject.c
osfmk/kern/ipc_kobject.h
osfmk/kern/ipc_mig.c
osfmk/kern/ipc_mig.h
osfmk/kern/ipc_misc.c
osfmk/kern/ipc_sync.c
osfmk/kern/ipc_tt.c
osfmk/kern/kalloc.c
osfmk/kern/kern_stackshot.c
osfmk/kern/mk_timer.c
osfmk/kern/sched_clutch.c
osfmk/kern/sched_clutch.h
osfmk/kern/sched_clutch.md
osfmk/kern/startup.c
osfmk/kern/suid_cred.c [new file with mode: 0644]
osfmk/kern/suid_cred.h [new file with mode: 0644]
osfmk/kern/sysdiagnose.c
osfmk/kern/task.c
osfmk/kern/telemetry.c
osfmk/kern/thread.c
osfmk/kern/thread.h
osfmk/kern/work_interval.c
osfmk/kperf/callstack.c
osfmk/mach/i386/_structs.h
osfmk/mach/i386/fp_reg.h
osfmk/mach/i386/thread_state.h
osfmk/mach/mach_port.defs
osfmk/mach/mach_types.defs
osfmk/mach/mach_types.h
osfmk/mach/sysdiagnose_notification.defs
osfmk/mach/task.defs
osfmk/mach_debug/mach_debug_types.h
osfmk/vm/memory_object.c
osfmk/vm/vm_compressor.c
osfmk/vm/vm_compressor.h
osfmk/vm/vm_fault.c
osfmk/vm/vm_map.c
osfmk/vm/vm_map.h
osfmk/vm/vm_shared_region.c
osfmk/vm/vm_user.c
osfmk/x86_64/monotonic_x86_64.c
security/mac_base.c
security/mac_framework.h
security/mac_policy.h
security/mac_vfs.c
tests/Makefile
tests/bpflib.c [new file with mode: 0644]
tests/bpflib.h [new file with mode: 0644]
tests/fcntl.c [new file with mode: 0644]
tests/in_cksum.c [new file with mode: 0644]
tests/in_cksum.h [new file with mode: 0644]
tests/iokit/io_catalog_send_data.m [new file with mode: 0644]
tests/kpc.c
tests/kperf.c
tests/kperf_helpers.h
tests/launchd_plists/com.apple.xnu.test.task_create_suid_cred.plist [new file with mode: 0644]
tests/memorystatus_freeze_test.c
tests/net_bridge.c [new file with mode: 0644]
tests/netagent_race_infodisc_56244905.c [new file with mode: 0644]
tests/socket_0byte_udp_poll_58140856.c [new file with mode: 0644]
tests/stackshot_accuracy.m
tests/stackshot_tests.m
tests/task_create_suid_cred.c [new file with mode: 0644]
tests/task_create_suid_cred_entitlement.plist [new file with mode: 0644]
tools/lldbmacros/core/kernelcore.py
tools/lldbmacros/core/operating_system.py
tools/lldbmacros/ipc.py
tools/lldbmacros/kcdata.py
tools/lldbmacros/scheduler.py

index 37dc16b53f2e86a6cfb1025743cca6d4a364b981..a484e8528c7868ba2e717e4088b68913f2edb1aa 100644 (file)
 #define AUE_PIDFORTASK          43049   /* Darwin-specific. */
 #define AUE_SYSCTL_NONADMIN     43050
 #define AUE_COPYFILE            43051   /* Darwin-specific. */
-#define AUE_DBGPORTFORPID       43052   /* Darwin-specific. */
 /*
  * Events added to OpenBSM for FreeBSD and Linux; may also be used by Darwin
  * in the future.
 #define AUE_SETATTRLISTAT       43212   /* Darwin. */
 #define AUE_FMOUNT              43213   /* Darwin. */
 #define AUE_FSGETPATH_EXTENDED  43214   /* Darwin. */
+#define AUE_DBGPORTFORPID       43215   /* Darwin-specific. */
 
 #define AUE_SESSION_START       44901   /* Darwin. */
 #define AUE_SESSION_UPDATE      44902   /* Darwin. */
index a48f1e6f78d14bf43ab05bbbd8196b44a380ff6d..8b315b4b2eca4b24e5feef05bd5fc8c1be8fc5d8 100644 (file)
@@ -18660,7 +18660,7 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv
                 * Range check the count. How much data can we pass around?
                 * FIX ME!
                 */
-               if (dtmodsyms_count == 0 || (dtmodsyms_count > 100 * 1024)) {
+               if (dtmodsyms_count == 0) {
                        cmn_err(CE_WARN, "dtmodsyms_count is not valid");
                        return (EINVAL);
                }
@@ -18669,6 +18669,12 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv
                 * Allocate a correctly sized structure and copyin the data.
                 */
                module_symbols_size = DTRACE_MODULE_SYMBOLS_SIZE(dtmodsyms_count);
+               if (module_symbols_size > (size_t)dtrace_copy_maxsize()) {
+                       size_t dtmodsyms_max = DTRACE_MODULE_SYMBOLS_COUNT(dtrace_copy_maxsize());
+                       cmn_err(CE_WARN, "dtmodsyms_count %ld is too high, maximum is %ld", dtmodsyms_count, dtmodsyms_max);
+                       return (ENOBUFS);
+               }
+
                if ((module_symbols = kmem_alloc(module_symbols_size, KM_SLEEP)) == NULL) 
                        return (ENOMEM);
                        
index d5b5ca727bb4c79849d4a5b49f43a18303ce48bc..f51656aa12b73b121597659b18fd6ff36b722dd5 100644 (file)
@@ -73,8 +73,8 @@ backtrace_sysctl SYSCTL_HANDLER_ARGS
                return ENOBUFS;
        }
        memset(bt, 0, bt_size);
-       error = backtrace_user(bt, bt_len, &bt_filled, NULL, NULL);
-       if (error) {
+       bt_filled = backtrace_user(bt, bt_len, &error, NULL, NULL);
+       if (error != 0) {
                goto out;
        }
        bt_filled = min(bt_filled, bt_len);
index 3142cbda603f922529ca5a65300297c702978228..5430ff820668713051fcbbdfc64c29e9d230065a 100644 (file)
@@ -102,6 +102,7 @@ struct ctl_cb {
        struct sockaddr_ctl     sac;
        u_int32_t               usecount;
        u_int32_t               kcb_usecount;
+       u_int32_t               require_clearing_count;
 #if DEVELOPMENT || DEBUG
        enum ctl_status         status;
 #endif /* DEVELOPMENT || DEBUG */
@@ -370,24 +371,45 @@ ctl_sofreelastref(struct socket *so)
 }
 
 /*
- * Use this function to serialize calls into the kctl subsystem
+ * Use this function and ctl_kcb_require_clearing to serialize
+ * critical calls into the kctl subsystem
  */
 static void
 ctl_kcb_increment_use_count(struct ctl_cb *kcb, lck_mtx_t *mutex_held)
 {
        LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
-       while (kcb->kcb_usecount > 0) {
+       while (kcb->require_clearing_count > 0) {
+               msleep(&kcb->require_clearing_count, mutex_held, PSOCK | PCATCH, "kcb_require_clearing", NULL);
+       }
+       kcb->kcb_usecount++;
+}
+
+static void
+ctl_kcb_require_clearing(struct ctl_cb *kcb, lck_mtx_t *mutex_held)
+{
+       assert(kcb->kcb_usecount != 0);
+       kcb->require_clearing_count++;
+       kcb->kcb_usecount--;
+       while (kcb->kcb_usecount > 0) { // we need to wait until no one else is running
                msleep(&kcb->kcb_usecount, mutex_held, PSOCK | PCATCH, "kcb_usecount", NULL);
        }
        kcb->kcb_usecount++;
 }
 
 static void
-clt_kcb_decrement_use_count(struct ctl_cb *kcb)
+ctl_kcb_done_clearing(struct ctl_cb *kcb)
+{
+       assert(kcb->require_clearing_count != 0);
+       kcb->require_clearing_count--;
+       wakeup((caddr_t)&kcb->require_clearing_count);
+}
+
+static void
+ctl_kcb_decrement_use_count(struct ctl_cb *kcb)
 {
        assert(kcb->kcb_usecount != 0);
        kcb->kcb_usecount--;
-       wakeup_one((caddr_t)&kcb->kcb_usecount);
+       wakeup((caddr_t)&kcb->kcb_usecount);
 }
 
 static int
@@ -401,6 +423,7 @@ ctl_detach(struct socket *so)
 
        lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK);
        ctl_kcb_increment_use_count(kcb, mtx_held);
+       ctl_kcb_require_clearing(kcb, mtx_held);
 
        if (kcb->kctl != NULL && kcb->kctl->bind != NULL &&
            kcb->userdata != NULL && !(so->so_state & SS_ISCONNECTED)) {
@@ -419,7 +442,8 @@ ctl_detach(struct socket *so)
        kcb->status = KCTL_DISCONNECTED;
 #endif /* DEVELOPMENT || DEBUG */
        so->so_flags |= SOF_PCBCLEARING;
-       clt_kcb_decrement_use_count(kcb);
+       ctl_kcb_done_clearing(kcb);
+       ctl_kcb_decrement_use_count(kcb);
        return 0;
 }
 
@@ -573,6 +597,7 @@ ctl_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
 
        lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK);
        ctl_kcb_increment_use_count(kcb, mtx_held);
+       ctl_kcb_require_clearing(kcb, mtx_held);
 
        error = ctl_setup_kctl(so, nam, p);
        if (error) {
@@ -593,7 +618,8 @@ ctl_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
        socket_lock(so, 0);
 
 out:
-       clt_kcb_decrement_use_count(kcb);
+       ctl_kcb_done_clearing(kcb);
+       ctl_kcb_decrement_use_count(kcb);
        return error;
 }
 
@@ -609,6 +635,7 @@ ctl_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
 
        lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK);
        ctl_kcb_increment_use_count(kcb, mtx_held);
+       ctl_kcb_require_clearing(kcb, mtx_held);
 
 #if DEVELOPMENT || DEBUG
        if (kcb->status != KCTL_DISCONNECTED && ctl_panic_debug) {
@@ -668,7 +695,8 @@ end:
                lck_mtx_unlock(ctl_mtx);
        }
 out:
-       clt_kcb_decrement_use_count(kcb);
+       ctl_kcb_done_clearing(kcb);
+       ctl_kcb_decrement_use_count(kcb);
        return error;
 }
 
@@ -680,6 +708,7 @@ ctl_disconnect(struct socket *so)
        if ((kcb = (struct ctl_cb *)so->so_pcb)) {
                lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK);
                ctl_kcb_increment_use_count(kcb, mtx_held);
+               ctl_kcb_require_clearing(kcb, mtx_held);
                struct kctl             *kctl = kcb->kctl;
 
                if (kctl && kctl->disconnect) {
@@ -706,7 +735,8 @@ ctl_disconnect(struct socket *so)
                kctlstat.kcs_gencnt++;
                lck_mtx_unlock(ctl_mtx);
                socket_lock(so, 0);
-               clt_kcb_decrement_use_count(kcb);
+               ctl_kcb_done_clearing(kcb);
+               ctl_kcb_decrement_use_count(kcb);
        }
        return 0;
 }
@@ -798,7 +828,7 @@ ctl_usr_rcvd(struct socket *so, int flags)
        ctl_sbrcv_trim(so);
 
 out:
-       clt_kcb_decrement_use_count(kcb);
+       ctl_kcb_decrement_use_count(kcb);
        return error;
 }
 
@@ -842,7 +872,7 @@ ctl_send(struct socket *so, int flags, struct mbuf *m,
        if (error != 0) {
                OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_send_fail);
        }
-       clt_kcb_decrement_use_count(kcb);
+       ctl_kcb_decrement_use_count(kcb);
 
        return error;
 }
@@ -906,7 +936,7 @@ ctl_send_list(struct socket *so, int flags, struct mbuf *m,
        if (error != 0) {
                OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_send_list_fail);
        }
-       clt_kcb_decrement_use_count(kcb);
+       ctl_kcb_decrement_use_count(kcb);
 
        return error;
 }
@@ -1415,7 +1445,7 @@ ctl_ctloutput(struct socket *so, struct sockopt *sopt)
        }
 
 out:
-       clt_kcb_decrement_use_count(kcb);
+       ctl_kcb_decrement_use_count(kcb);
        return error;
 }
 
index 46fbd3ee55282275697dcb5964bb774b9812929c..bbf2fcac55b73834f2cccb6f6c9804997c95132c 100644 (file)
 #include <security/mac_framework.h>
 #endif /* CONFIG_MACF */
 
-#if CONFIG_CSR
-#include <sys/codesign.h>
-#include <sys/csr.h>
-#endif
-
 typedef struct {
        int     flavor;                 /* the number for this flavor */
        mach_msg_type_number_t  count;  /* count of ints in this flavor */
@@ -291,18 +286,6 @@ coredump(proc_t core_proc, uint32_t reserve_mb, int coredump_flags)
        }
 #endif
 
-#if CONFIG_CSR
-       /* If the process is restricted, CSR isn't configured to allow
-        * restricted processes to be debugged, and CSR isn't configured in
-        * AppleInternal mode, then don't dump core. */
-       if (cs_restricted(core_proc) &&
-           csr_check(CSR_ALLOW_TASK_FOR_PID) &&
-           csr_check(CSR_ALLOW_APPLE_INTERNAL)) {
-               error = EPERM;
-               goto out2;
-       }
-#endif
-
        if (IS_64BIT_PROCESS(core_proc)) {
                is_64 = 1;
                mach_header_sz = sizeof(struct mach_header_64);
index 320c27b2c1be8fe18547a807098907c6df8f2038..8e7a7db7483b27f95a2cd573b0c687c37f0de295 100644 (file)
@@ -1972,7 +1972,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
 
                        kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
                        kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
-                       if (kr != KERN_SUCCESS) {
+                       if (kr != KERN_SUCCESS || kernel_blob_size < fs.fs_blob_size) {
                                error = ENOMEM;
                                vnode_put(vp);
                                goto outdrop;
@@ -1981,7 +1981,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
                        if (uap->cmd == F_ADDSIGS) {
                                error = copyin(fs.fs_blob_start,
                                    (void *) kernel_blob_addr,
-                                   kernel_blob_size);
+                                   fs.fs_blob_size);
                        } else { /* F_ADDFILESIGS || F_ADDFILESIGS_RETURN || F_ADDFILESIGS_FOR_DYLD_SIM */
                                int resid;
 
index e4ec2a210cfd21515ac62a02a6df46116f19108c..afa0cb8207ebcedde2c8b7568af9133b50bbda1b 100644 (file)
@@ -1863,6 +1863,7 @@ exec_handle_port_actions(struct image_params *imgp,
        kern_return_t kr;
        boolean_t task_has_watchport_boost = task_has_watchports(current_task());
        boolean_t in_exec = (imgp->ip_flags & IMGPF_EXEC);
+       boolean_t suid_cred_specified = FALSE;
 
        for (i = 0; i < pacts->pspa_count; i++) {
                act = &pacts->pspa_actions[i];
@@ -1886,6 +1887,16 @@ exec_handle_port_actions(struct image_params *imgp,
                                goto done;
                        }
                        break;
+
+               case PSPA_SUID_CRED:
+                       /* Only a single suid credential can be specified. */
+                       if (suid_cred_specified) {
+                               ret = EINVAL;
+                               goto done;
+                       }
+                       suid_cred_specified = TRUE;
+                       break;
+
                default:
                        ret = EINVAL;
                        goto done;
@@ -1973,6 +1984,11 @@ exec_handle_port_actions(struct image_params *imgp,
                        /* hold on to this till end of spawn */
                        actions->registered_array[registered_i++] = port;
                        break;
+
+               case PSPA_SUID_CRED:
+                       imgp->ip_sc_port = port;
+                       break;
+
                default:
                        ret = EINVAL;
                        break;
@@ -3748,6 +3764,10 @@ bad:
                        imgp->ip_cs_error = OS_REASON_NULL;
                }
 #endif
+               if (imgp->ip_sc_port != NULL) {
+                       ipc_port_release_send(imgp->ip_sc_port);
+                       imgp->ip_sc_port = NULL;
+               }
        }
 
 #if CONFIG_DTRACE
@@ -5381,7 +5401,8 @@ exec_handle_sugid(struct image_params *imgp)
            kauth_cred_getuid(cred) != imgp->ip_origvattr->va_uid) ||
            ((imgp->ip_origvattr->va_mode & VSGID) != 0 &&
            ((kauth_cred_ismember_gid(cred, imgp->ip_origvattr->va_gid, &leave_sugid_clear) || !leave_sugid_clear) ||
-           (kauth_cred_getgid(cred) != imgp->ip_origvattr->va_gid)))) {
+           (kauth_cred_getgid(cred) != imgp->ip_origvattr->va_gid))) ||
+           (imgp->ip_sc_port != NULL)) {
 #if CONFIG_MACF
 /* label for MAC transition and neither VSUID nor VSGID */
 handle_mac_transition:
@@ -5408,6 +5429,33 @@ handle_mac_transition:
                 * proc's ucred lock. This prevents others from accessing
                 * a garbage credential.
                 */
+
+               if (imgp->ip_sc_port != NULL) {
+                       extern int suid_cred_verify(ipc_port_t, vnode_t, uint32_t *);
+                       int ret = -1;
+                       uid_t uid = UINT32_MAX;
+
+                       /*
+                        * Check that the vnodes match. If a script is being
+                        * executed check the script's vnode rather than the
+                        * interpreter's.
+                        */
+                       struct vnode *vp = imgp->ip_scriptvp != NULL ? imgp->ip_scriptvp : imgp->ip_vp;
+
+                       ret = suid_cred_verify(imgp->ip_sc_port, vp, &uid);
+                       if (ret == 0) {
+                               apply_kauth_cred_update(p, ^kauth_cred_t (kauth_cred_t my_cred) {
+                                       return kauth_cred_setresuid(my_cred,
+                                       KAUTH_UID_NONE,
+                                       uid,
+                                       uid,
+                                       KAUTH_UID_NONE);
+                               });
+                       } else {
+                               error = EPERM;
+                       }
+               }
+
                if (imgp->ip_origvattr->va_mode & VSUID) {
                        apply_kauth_cred_update(p, ^kauth_cred_t (kauth_cred_t my_cred) {
                                return kauth_cred_setresuid(my_cred,
index 51fbadb8b49c04af0910aaad0a4210a2f7a77ebf..c9c87bc1676bb4d5255dadc5ae67226fff99c7a8 100644 (file)
 
 #include <miscfs/specfs/specdev.h>
 
+#include <nfs/nfs_conf.h>
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
 #include <nfs/nfsnode.h>
@@ -145,7 +146,7 @@ const char *memname[] = {
        "iov32",        /* 19 M_IOV32 */
        "mount",        /* 20 M_MOUNT */
        "fhandle",              /* 21 M_FHANDLE */
-#if (NFSCLIENT || NFSSERVER)
+#if CONFIG_NFS
        "NFS req",              /* 22 M_NFSREQ */
        "NFS mount",    /* 23 M_NFSMNT */
        "NFS node",             /* 24 M_NFSNODE */
@@ -187,7 +188,7 @@ const char *memname[] = {
        "NQNFS Lease",  /* 47 M_NQLEASE */
        "NQNFS Host",   /* 48 M_NQMHOST */
        "Export Host",  /* 49 M_NETADDR */
-#if (NFSCLIENT || NFSSERVER)
+#if CONFIG_NFS
        "NFS srvsock",  /* 50 M_NFSSVC */
        "NFS uid",              /* 51 M_NFSUID */
        "NFS daemon",   /* 52 M_NFSD */
@@ -202,7 +203,7 @@ const char *memname[] = {
        "mrt",                  /* 56 M_MRTABLE */
        "",             /* 57 unused entry */
        "",             /* 58 unused entry */
-#if (NFSCLIENT || NFSSERVER)
+#if CONFIG_NFS
        "NFSV3 srvdesc",/* 59 M_NFSRVDESC */
        "NFSV3 diroff", /* 60 M_NFSDIROFF */
        "NFSV3 bigfh",  /* 61 M_NFSBIGFH */
@@ -343,7 +344,7 @@ struct kmzones {
        { SOS(user32_iovec), KMZ_LOOKUPZONE, FALSE },   /* 19 M_IOV32 */
        { SOS(mount), KMZ_CREATEZONE, FALSE },          /* 20 M_MOUNT */
        { 0, KMZ_MALLOC, FALSE },                       /* 21 M_FHANDLE */
-#if (NFSCLIENT || NFSSERVER)
+#if CONFIG_NFS
        { SOS(nfsreq), KMZ_CREATEZONE, FALSE },         /* 22 M_NFSREQ */
        { SOS(nfsmount), KMZ_CREATEZONE, FALSE },        /* 23 M_NFSMNT */
        { SOS(nfsnode), KMZ_CREATEZONE, FALSE },        /* 24 M_NFSNODE */
@@ -381,7 +382,7 @@ struct kmzones {
        { 0, KMZ_MALLOC, FALSE },                       /* 47 M_NQLEASE */
        { 0, KMZ_MALLOC, FALSE },                       /* 48 M_NQMHOST */
        { 0, KMZ_MALLOC, FALSE },                       /* 49 M_NETADDR */
-#if (NFSCLIENT || NFSSERVER)
+#if CONFIG_NFS
        { SOX(nfsrv_sock),
          KMZ_CREATEZONE_ACCT, FALSE },                 /* 50 M_NFSSVC */
        { 0, KMZ_MALLOC, FALSE },                       /* 51 M_NFSUID */
@@ -400,7 +401,7 @@ struct kmzones {
        { SOX(mrt), KMZ_CREATEZONE, TRUE },             /* 56 M_MRTABLE */
        { 0, KMZ_MALLOC, FALSE },                       /* 57 unused entry */
        { 0, KMZ_MALLOC, FALSE },                       /* 58 unused entry */
-#if (NFSCLIENT || NFSSERVER)
+#if CONFIG_NFS
        { SOS(nfsrv_descript),
          KMZ_CREATEZONE_ACCT, FALSE },                 /* 59 M_NFSRVDESC */
        { SOS(nfsdmap), KMZ_CREATEZONE, FALSE },        /* 60 M_NFSDIROFF */
index c83a80d72bca65182938456de4603971d5b21dfe..a6e720285279098521085bc03c733b2e2b51d207 100644 (file)
@@ -128,6 +128,53 @@ unsigned int memorystatus_freeze_private_shared_pages_ratio = 2; /* Ratio of pri
 unsigned int memorystatus_thaw_count = 0;
 unsigned int memorystatus_refreeze_eligible_count = 0; /* # of processes currently thawed i.e. have state on disk & in-memory */
 
+/* Freezer counters collected for telemtry */
+static struct memorystatus_freezer_stats_t {
+       /*
+        * # of processes that we've considered freezing.
+        * Used to normalize the error reasons below.
+        */
+       uint64_t mfs_process_considered_count;
+
+       /*
+        * The following counters track how many times we've failed to freeze
+        * a process because of a specific FREEZER_ERROR.
+        */
+       /* EXCESS_SHARED_MEMORY */
+       uint64_t mfs_error_excess_shared_memory_count;
+       /* LOW_PRIVATE_SHARED_RATIO */
+       uint64_t mfs_error_low_private_shared_ratio_count;
+       /* NO_COMPRESSOR_SPACE */
+       uint64_t mfs_error_no_compressor_space_count;
+       /* NO_SWAP_SPACE */
+       uint64_t mfs_error_no_swap_space_count;
+       /* pages < memorystatus_freeze_pages_min */
+       uint64_t mfs_error_below_min_pages_count;
+       /* dasd determined it was unlikely to be relaunched. */
+       uint64_t mfs_error_low_probability_of_use_count;
+       /* transient reasons (like inability to acquire a lock). */
+       uint64_t mfs_error_other_count;
+
+       /*
+        * # of times that we saw memorystatus_available_pages <= memorystatus_freeze_threshold.
+        * Used to normalize skipped_full_count and shared_mb_high_count.
+        */
+       uint64_t mfs_below_threshold_count;
+
+       /* Skipped running the freezer because we were out of slots */
+       uint64_t mfs_skipped_full_count;
+
+       /* Skipped running the freezer because we were over the shared mb limit*/
+       uint64_t mfs_skipped_shared_mb_high_count;
+
+       /*
+        * How many pages have not been sent to swap because they were in a shared object?
+        * This is being used to gather telemtry so we can understand the impact we'd have
+        * on our NAND budget if we did swap out these pages.
+        */
+       uint64_t mfs_shared_pages_skipped;
+} memorystatus_freezer_stats = {0};
+
 #endif /* XNU_KERNEL_PRIVATE */
 
 static inline boolean_t memorystatus_can_freeze_processes(void);
@@ -144,6 +191,7 @@ static uint64_t memorystatus_freeze_pageouts = 0;
 #define DEGRADED_WINDOW_MINS    (30)
 #define NORMAL_WINDOW_MINS      (24 * 60)
 
+/* Protected by the freezer_mutex */
 static throttle_interval_t throttle_intervals[] = {
        { DEGRADED_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
        { NORMAL_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
@@ -166,6 +214,52 @@ SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count, CTLFLAG_RD | CTLFLAG_LOC
 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_thaw_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count, 0, "");
 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_pageouts, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_pageouts, "");
 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_budget_pages_remaining, "");
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_excess_shared_memory_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_excess_shared_memory_count, "");
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_private_shared_ratio_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count, "");
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_compressor_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_compressor_space_count, "");
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_swap_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_swap_space_count, "");
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_below_min_pages_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_below_min_pages_count, "");
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_probability_of_use_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_probability_of_use_count, "");
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_other_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_other_count, "");
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_process_considered_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_process_considered_count, "");
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_below_threshold_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_below_threshold_count, "");
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_full_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_full_count, "");
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_shared_mb_high_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count, "");
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_shared_pages_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_shared_pages_skipped, "");
+
+/*
+ * Calculates the hit rate for the freezer.
+ * The hit rate is defined as the percentage of procs that are currently in the
+ * freezer which we have thawed.
+ * A low hit rate means we're freezing bad candidates since they're not re-used.
+ */
+static int sysctl_memorystatus_freezer_thaw_percentage SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+       size_t thaw_count = 0, frozen_count = 0;
+       int thaw_percentage = 100;
+       unsigned int band = (unsigned int) memorystatus_freeze_jetsam_band;
+       proc_t p = PROC_NULL;
+       proc_list_lock();
+
+       p = memorystatus_get_first_proc_locked(&band, FALSE);
+
+       while (p) {
+               if (p->p_memstat_state & P_MEMSTAT_FROZEN) {
+                       if (p->p_memstat_thaw_count > 0) {
+                               thaw_count++;
+                       }
+                       frozen_count++;
+               }
+               p = memorystatus_get_next_proc_locked(&band, p, FALSE);
+       }
+       proc_list_unlock();
+       if (frozen_count > 0) {
+               thaw_percentage = 100 * thaw_count / frozen_count;
+       }
+       return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
+}
+SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage, "I", "");
 
 
 #if DEVELOPMENT || DEBUG
@@ -248,6 +342,7 @@ sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS
 again:
        p = proc_find(pid);
        if (p != NULL) {
+               memorystatus_freezer_stats.mfs_process_considered_count++;
                uint32_t purgeable, wired, clean, dirty, shared;
                uint32_t max_pages = 0, state = 0;
 
@@ -297,18 +392,24 @@ again:
                }
 
                error = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
+               if (!error || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
+                       memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
+               }
 
                if (error) {
                        char reason[128];
                        if (freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) {
+                               memorystatus_freezer_stats.mfs_error_excess_shared_memory_count++;
                                strlcpy(reason, "too much shared memory", 128);
                        }
 
                        if (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
+                               memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count++;
                                strlcpy(reason, "low private-shared pages ratio", 128);
                        }
 
                        if (freezer_error_code == FREEZER_ERROR_NO_COMPRESSOR_SPACE) {
+                               memorystatus_freezer_stats.mfs_error_no_compressor_space_count++;
                                strlcpy(reason, "no compressor space", 128);
                        }
 
@@ -402,11 +503,20 @@ static int
 sysctl_memorystatus_demote_frozen_process SYSCTL_HANDLER_ARGS
 {
 #pragma unused(arg1, arg2, oidp, req)
+       int error, val;
+       /*
+        * Only demote on write to prevent demoting during `sysctl -a`.
+        * The actual value written doesn't matter.
+        */
+       error = sysctl_handle_int(oidp, &val, 0, req);
+       if (error || !req->newptr) {
+               return error;
+       }
        memorystatus_demote_frozen_processes(false);
        return 0;
 }
 
-SYSCTL_PROC(_kern, OID_AUTO, memorystatus_demote_frozen_processes, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_memorystatus_demote_frozen_process, "I", "");
+SYSCTL_PROC(_kern, OID_AUTO, memorystatus_demote_frozen_processes, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_memorystatus_demote_frozen_process, "I", "");
 
 static int
 sysctl_memorystatus_available_pages_thaw SYSCTL_HANDLER_ARGS
@@ -1081,10 +1191,17 @@ memorystatus_is_process_eligible_for_freeze(proc_t p)
                }
        }
 
+       /*
+        * This proc is a suspended application.
+        * We're interested in tracking what percentage of these
+        * actually get frozen.
+        */
+       memorystatus_freezer_stats.mfs_process_considered_count++;
 
        /* Only freeze applications meeting our minimum resident page criteria */
        memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL);
        if (pages < memorystatus_freeze_pages_min) {
+               memorystatus_freezer_stats.mfs_error_below_min_pages_count++;
                goto out;
        }
 
@@ -1094,6 +1211,7 @@ memorystatus_is_process_eligible_for_freeze(proc_t p)
         * memorystatus_freeze_top_process holds the proc_list_lock while it traverses the bands.
         */
        if ((p->p_listflag & P_LIST_EXITED) != 0) {
+               memorystatus_freezer_stats.mfs_error_other_count++;
                goto out;
        }
 
@@ -1110,6 +1228,7 @@ memorystatus_is_process_eligible_for_freeze(proc_t p)
                }
 
                if (probability_of_use == 0) {
+                       memorystatus_freezer_stats.mfs_error_low_probability_of_use_count++;
                        goto out;
                }
        }
@@ -1196,6 +1315,9 @@ memorystatus_freeze_process_sync(proc_t p)
                    memorystatus_available_pages, 0, 0, 0, 0);
 
                ret = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
+               if (ret == KERN_SUCCESS || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
+                       memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
+               }
 
                KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END,
                    memorystatus_available_pages, aPid, 0, 0, 0);
@@ -1241,15 +1363,17 @@ memorystatus_freeze_process_sync(proc_t p)
                                        ret = 0;
                                }
 
-                               proc_list_lock();
 
                                /* Update stats */
                                for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
                                        throttle_intervals[i].pageouts += dirty;
                                }
-                       } else {
-                               proc_list_lock();
                        }
+                       memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
+                       os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: freezing (specific) pid %d [%s] done memorystatus_freeze_budget_pages_remaining %llu froze %u pages",
+                           aPid, ((p && *p->p_name) ? p->p_name : "unknown"), memorystatus_freeze_budget_pages_remaining, dirty);
+
+                       proc_list_lock();
 
                        memorystatus_freeze_pageouts += dirty;
 
@@ -1260,25 +1384,25 @@ memorystatus_freeze_process_sync(proc_t p)
                                 * can freeze a more eligible process at this moment in time?
                                 */
                        }
-
-                       memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
-                       os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: freezing (specific) pid %d [%s] done memorystatus_freeze_budget_pages_remaining %llu froze %u pages",
-                           aPid, ((p && *p->p_name) ? p->p_name : "unknown"), memorystatus_freeze_budget_pages_remaining, dirty);
                } else {
                        char reason[128];
                        if (freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) {
+                               memorystatus_freezer_stats.mfs_error_excess_shared_memory_count++;
                                strlcpy(reason, "too much shared memory", 128);
                        }
 
                        if (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
+                               memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count++;
                                strlcpy(reason, "low private-shared pages ratio", 128);
                        }
 
                        if (freezer_error_code == FREEZER_ERROR_NO_COMPRESSOR_SPACE) {
+                               memorystatus_freezer_stats.mfs_error_no_compressor_space_count++;
                                strlcpy(reason, "no compressor space", 128);
                        }
 
                        if (freezer_error_code == FREEZER_ERROR_NO_SWAP_SPACE) {
+                               memorystatus_freezer_stats.mfs_error_no_swap_space_count++;
                                strlcpy(reason, "no swap space", 128);
                        }
 
@@ -1298,6 +1422,9 @@ exit:
        return ret;
 }
 
+/*
+ * Caller must hold the freezer_mutex and it will be locked on return.
+ */
 static int
 memorystatus_freeze_top_process(void)
 {
@@ -1311,6 +1438,7 @@ memorystatus_freeze_top_process(void)
        coalition_t coal = COALITION_NULL;
        pid_t pid_list[MAX_XPC_SERVICE_PIDS];
        unsigned int    ntasks = 0;
+       LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
 
        KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_START, memorystatus_available_pages, 0, 0, 0, 0);
 
@@ -1432,6 +1560,7 @@ freeze_process:
 
                p = proc_ref_locked(p);
                if (!p) {
+                       memorystatus_freezer_stats.mfs_error_other_count++;
                        break;
                }
 
@@ -1441,6 +1570,9 @@ freeze_process:
                    memorystatus_available_pages, 0, 0, 0, 0);
 
                kr = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
+               if (kr == KERN_SUCCESS || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
+                       memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
+               }
 
                KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END,
                    memorystatus_available_pages, aPid, 0, 0, 0);
@@ -1484,15 +1616,16 @@ freeze_process:
                                        ret = 0;
                                }
 
-                               proc_list_lock();
-
                                /* Update stats */
                                for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
                                        throttle_intervals[i].pageouts += dirty;
                                }
-                       } else {
-                               proc_list_lock();
                        }
+                       memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
+                       os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: %sfreezing (%s) pid %d [%s] done, memorystatus_freeze_budget_pages_remaining %llu %sfroze %u pages\n",
+                           refreeze_processes? "re" : "", (coal == NULL ? "general" : "coalition-driven"), aPid, ((p && *p->p_name) ? p->p_name : "unknown"), memorystatus_freeze_budget_pages_remaining, refreeze_processes? "Re" : "", dirty);
+
+                       proc_list_lock();
 
                        memorystatus_freeze_pageouts += dirty;
 
@@ -1504,10 +1637,6 @@ freeze_process:
                                 */
                        }
 
-                       memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
-                       os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: %sfreezing (%s) pid %d [%s] done, memorystatus_freeze_budget_pages_remaining %llu %sfroze %u pages\n",
-                           refreeze_processes? "re" : "", (coal == NULL ? "general" : "coalition-driven"), aPid, ((p && *p->p_name) ? p->p_name : "unknown"), memorystatus_freeze_budget_pages_remaining, refreeze_processes? "Re" : "", dirty);
-
                        /* Return KERN_SUCCESS */
                        ret = kr;
 
@@ -1603,18 +1732,22 @@ freeze_process:
 
                        char reason[128];
                        if (freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) {
+                               memorystatus_freezer_stats.mfs_error_excess_shared_memory_count++;
                                strlcpy(reason, "too much shared memory", 128);
                        }
 
                        if (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
+                               memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count++;
                                strlcpy(reason, "low private-shared pages ratio", 128);
                        }
 
                        if (freezer_error_code == FREEZER_ERROR_NO_COMPRESSOR_SPACE) {
+                               memorystatus_freezer_stats.mfs_error_no_compressor_space_count++;
                                strlcpy(reason, "no compressor space", 128);
                        }
 
                        if (freezer_error_code == FREEZER_ERROR_NO_SWAP_SPACE) {
+                               memorystatus_freezer_stats.mfs_error_no_swap_space_count++;
                                strlcpy(reason, "no swap space", 128);
                        }
 
@@ -1836,8 +1969,8 @@ memorystatus_demote_frozen_processes(boolean_t force_one)
 
        if (force_one == FALSE) {
                /*
-                * We use this counter to track daily thaws.
-                * So we only reset it to 0 under the normal
+                * We use these counters to track daily hit rates.
+                * So we only reset them to 0 under the normal
                 * mode.
                 */
                memorystatus_thaw_count = 0;
@@ -1846,6 +1979,72 @@ memorystatus_demote_frozen_processes(boolean_t force_one)
        proc_list_unlock();
 }
 
+/*
+ * Calculate a new freezer budget.
+ * @param time_since_last_interval_expired_sec How long has it been (in seconds) since the previous interval expired.
+ * @param burst_multiple The burst_multiple for the new period
+ * @param interval_duration_min How many minutes will the new interval be?
+ * @param rollover The amount to rollover from the previous budget.
+ *
+ * @return A budget for the new interval.
+ */
+static uint32_t
+memorystatus_freeze_calculate_new_budget(
+       unsigned int time_since_last_interval_expired_sec,
+       unsigned int burst_multiple,
+       unsigned int interval_duration_min,
+       uint32_t rollover)
+{
+       uint64_t freeze_daily_budget = 0;
+       unsigned int daily_budget_pageouts = 0;
+       unsigned int freeze_daily_pageouts_max = 0;
+       const static unsigned int kNumSecondsInDay = 60 * 60 * 24;
+       /* Precision factor for days_missed. 2 decimal points. */
+       const static unsigned int kFixedPointFactor = 100;
+       unsigned int days_missed, budget_missed;
+
+       /* Get the daily budget from the storage layer */
+       if (vm_swap_max_budget(&freeze_daily_budget)) {
+               memorystatus_freeze_daily_mb_max = (freeze_daily_budget / (1024 * 1024));
+               os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: memorystatus_freeze_daily_mb_max set to %dMB\n", memorystatus_freeze_daily_mb_max);
+       }
+       /* Calculate the daily pageout budget */
+       freeze_daily_pageouts_max = memorystatus_freeze_daily_mb_max * (1024 * 1024 / PAGE_SIZE);
+
+       daily_budget_pageouts = (burst_multiple * (((uint64_t) interval_duration_min * freeze_daily_pageouts_max) / (kNumSecondsInDay / 60)));
+
+       /*
+        * Add additional budget for time since the interval expired.
+        * For example, if the interval expired n days ago, we should get an additional n days
+        * of budget since we didn't use any budget during those n days.
+        */
+       days_missed = time_since_last_interval_expired_sec * kFixedPointFactor / kNumSecondsInDay;
+       budget_missed = days_missed * freeze_daily_pageouts_max / kFixedPointFactor;
+       return rollover + daily_budget_pageouts + budget_missed;
+}
+
+#if DEVELOPMENT || DEBUG
+
+static int
+sysctl_memorystatus_freeze_calculate_new_budget SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+       int error = 0;
+       unsigned int time_since_last_interval_expired_sec = 0;
+       unsigned int new_budget;
+
+       error = sysctl_handle_int(oidp, &time_since_last_interval_expired_sec, 0, req);
+       if (error || !req->newptr) {
+               return error;
+       }
+       new_budget = memorystatus_freeze_calculate_new_budget(time_since_last_interval_expired_sec, 1, NORMAL_WINDOW_MINS, 0);
+       return copyout(&new_budget, req->oldptr, MIN(sizeof(req->oldlen), sizeof(new_budget)));
+}
+
+SYSCTL_PROC(_vm, OID_AUTO, memorystatus_freeze_calculate_new_budget, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED,
+    0, 0, &sysctl_memorystatus_freeze_calculate_new_budget, "I", "");
+
+#endif /* DEVELOPMENT || DEBUG */
 
 /*
  * This function will do 4 things:
@@ -1861,6 +2060,9 @@ memorystatus_demote_frozen_processes(boolean_t force_one)
  * 4) calculate the current rate of pageouts for DEGRADED_WINDOW_MINS duration. If that rate is below
  *    what we would normally expect, then we are running low on our daily budget and need to enter
  *    degraded perf. mode.
+ *
+ *    Caller must hold the freezer mutex
+ *    Caller must not hold the proc_list lock
  */
 
 static void
@@ -1868,7 +2070,9 @@ memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed)
 {
        clock_sec_t sec;
        clock_nsec_t nsec;
-       mach_timespec_t ts;
+       mach_timespec_t now_ts;
+       LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
+       LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
 
        unsigned int freeze_daily_pageouts_max = 0;
 
@@ -1883,15 +2087,15 @@ memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed)
 #endif
 
        clock_get_system_nanotime(&sec, &nsec);
-       ts.tv_sec = sec;
-       ts.tv_nsec = nsec;
+       now_ts.tv_sec = sec;
+       now_ts.tv_nsec = nsec;
 
        struct throttle_interval_t *interval = NULL;
 
        if (memorystatus_freeze_degradation == TRUE) {
                interval = degraded_throttle_window;
 
-               if (CMP_MACH_TIMESPEC(&ts, &interval->ts) >= 0) {
+               if (CMP_MACH_TIMESPEC(&now_ts, &interval->ts) >= 0) {
                        memorystatus_freeze_degradation = FALSE;
                        interval->pageouts = 0;
                        interval->max_pageouts = 0;
@@ -1902,28 +2106,17 @@ memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed)
 
        interval = normal_throttle_window;
 
-       if (CMP_MACH_TIMESPEC(&ts, &interval->ts) >= 0) {
-               /*
-                * New throttle window.
-                * Rollover any unused budget.
-                * Also ask the storage layer what the new budget needs to be.
-                */
-               uint64_t freeze_daily_budget = 0;
-               unsigned int daily_budget_pageouts = 0;
-
-               if (vm_swap_max_budget(&freeze_daily_budget)) {
-                       memorystatus_freeze_daily_mb_max = (freeze_daily_budget / (1024 * 1024));
-                       os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: memorystatus_freeze_daily_mb_max set to %dMB\n", memorystatus_freeze_daily_mb_max);
-               }
-
-               freeze_daily_pageouts_max = memorystatus_freeze_daily_mb_max * (1024 * 1024 / PAGE_SIZE);
-
-               daily_budget_pageouts =  (interval->burst_multiple * (((uint64_t)interval->mins * freeze_daily_pageouts_max) / NORMAL_WINDOW_MINS));
-               interval->max_pageouts = (interval->max_pageouts - interval->pageouts) + daily_budget_pageouts;
+       if (CMP_MACH_TIMESPEC(&now_ts, &interval->ts) >= 0) {
+               /* How long has it been since the previous interval expired? */
+               mach_timespec_t expiration_period_ts = now_ts;
+               SUB_MACH_TIMESPEC(&expiration_period_ts, &interval->ts);
 
+               interval->max_pageouts = memorystatus_freeze_calculate_new_budget(
+                       expiration_period_ts.tv_sec, interval->burst_multiple,
+                       interval->mins, interval->max_pageouts - interval->pageouts);
                interval->ts.tv_sec = interval->mins * 60;
                interval->ts.tv_nsec = 0;
-               ADD_MACH_TIMESPEC(&interval->ts, &ts);
+               ADD_MACH_TIMESPEC(&interval->ts, &now_ts);
                /* Since we update the throttle stats pre-freeze, adjust for overshoot here */
                if (interval->pageouts > interval->max_pageouts) {
                        interval->pageouts -= interval->max_pageouts;
@@ -1931,6 +2124,7 @@ memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed)
                        interval->pageouts = 0;
                }
                *budget_pages_allowed = interval->max_pageouts;
+               memorystatus_freezer_stats.mfs_shared_pages_skipped = 0;
 
                memorystatus_demote_frozen_processes(FALSE); /* normal mode...don't force a demotion */
        } else {
@@ -1968,7 +2162,7 @@ memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed)
                                time_left.tv_sec = interval->ts.tv_sec;
                                time_left.tv_nsec = 0;
 
-                               SUB_MACH_TIMESPEC(&time_left, &ts);
+                               SUB_MACH_TIMESPEC(&time_left, &now_ts);
 
                                if (budget_left <= budget_threshold) {
                                        /*
@@ -2004,7 +2198,7 @@ memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed)
        }
 
        MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_update_throttle_interval: throttle updated - %d frozen (%d max) within %dm; %dm remaining; throttle %s\n",
-           interval->pageouts, interval->max_pageouts, interval->mins, (interval->ts.tv_sec - ts->tv_sec) / 60,
+           interval->pageouts, interval->max_pageouts, interval->mins, (interval->ts.tv_sec - now_ts->tv_sec) / 60,
            interval->throttle ? "on" : "off");
 }
 
@@ -2063,12 +2257,21 @@ memorystatus_freeze_thread_should_run(void)
                goto out;
        }
 
-       if ((memorystatus_frozen_count >= memorystatus_frozen_processes_max) &&
-           (memorystatus_refreeze_eligible_count < MIN_THAW_REFREEZE_THRESHOLD)) {
-               goto out;
+       memorystatus_freezer_stats.mfs_below_threshold_count++;
+
+       if ((memorystatus_frozen_count >= memorystatus_frozen_processes_max)) {
+               /*
+                * Consider this as a skip even if we wake up to refreeze because
+                * we won't freeze any new procs.
+                */
+               memorystatus_freezer_stats.mfs_skipped_full_count++;
+               if (memorystatus_refreeze_eligible_count < MIN_THAW_REFREEZE_THRESHOLD) {
+                       goto out;
+               }
        }
 
        if (memorystatus_frozen_shared_mb_max && (memorystatus_frozen_shared_mb >= memorystatus_frozen_shared_mb_max)) {
+               memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count++;
                goto out;
        }
 
index 667b17a7d7b3aad2397317ec376da5242a4dd2cc..c09b4217bc631b0312bbeec20422418620e9005c 100644 (file)
@@ -586,7 +586,6 @@ SYSCTL_PROC(_hw_optional, OID_AUTO, hle, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN
 SYSCTL_PROC(_hw_optional, OID_AUTO, adx, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasADX, 0, sysctl_cpu_capability, "I", "");
 SYSCTL_PROC(_hw_optional, OID_AUTO, mpx, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasMPX, 0, sysctl_cpu_capability, "I", "");
 SYSCTL_PROC(_hw_optional, OID_AUTO, sgx, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasSGX, 0, sysctl_cpu_capability, "I", "");
-#if !defined(RC_HIDE_XNU_J137)
 SYSCTL_PROC(_hw_optional, OID_AUTO, avx512f, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasAVX512F, 0, sysctl_cpu_capability, "I", "");
 SYSCTL_PROC(_hw_optional, OID_AUTO, avx512cd, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasAVX512CD, 0, sysctl_cpu_capability, "I", "");
 SYSCTL_PROC(_hw_optional, OID_AUTO, avx512dq, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasAVX512DQ, 0, sysctl_cpu_capability, "I", "");
@@ -594,7 +593,6 @@ SYSCTL_PROC(_hw_optional, OID_AUTO, avx512bw, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG
 SYSCTL_PROC(_hw_optional, OID_AUTO, avx512vl, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasAVX512VL, 0, sysctl_cpu_capability, "I", "");
 SYSCTL_PROC(_hw_optional, OID_AUTO, avx512ifma, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasAVX512IFMA, 0, sysctl_cpu_capability, "I", "");
 SYSCTL_PROC(_hw_optional, OID_AUTO, avx512vbmi, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasAVX512VBMI, 0, sysctl_cpu_capability, "I", "");
-#endif /* not RC_HIDE_XNU_J137 */
 #elif defined (__arm__) || defined (__arm64__)
 int watchpoint_flag = -1;
 int breakpoint_flag = -1;
index 0fd0cc33662786ff6fe422cc9b98fba721aa9cf4..de0e20667ca5300df2dd80d91d6287dd1f094160 100644 (file)
 #endif
 #include <os/overflow.h>
 
-#ifndef CONFIG_EMBEDDED
-#include <IOKit/IOBSD.h> /* for IOTaskHasEntitlement */
-#include <sys/csr.h> /* for csr_check */
-#define MAP_32BIT_ENTITLEMENT "com.apple.security.mmap-map-32bit"
-#endif
-
 /*
  * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
  * XXX usage is PROT_* from an interface perspective.  Thus the values of
@@ -566,13 +560,7 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
 
 #ifndef CONFIG_EMBEDDED
        if (flags & MAP_32BIT) {
-               if (csr_check(CSR_ALLOW_UNTRUSTED_KEXTS) == 0 ||
-                   IOTaskHasEntitlement(current_task(), MAP_32BIT_ENTITLEMENT)) {
-                       vmk_flags.vmkf_32bit_map_va = TRUE;
-               } else {
-                       error = EPERM;
-                       goto bad;
-               }
+               vmk_flags.vmkf_32bit_map_va = TRUE;
        }
 #endif
 
index 04c70d47a1d26691dc164464d70146820dafc94f..695d335b796c467fecedd0fac728c6aeb6d912b4 100644 (file)
@@ -95,7 +95,7 @@ static void system_override_begin(uint64_t flags);
 static void system_override_end(uint64_t flags);
 static void system_override_abort(uint64_t flags);
 static void system_override_callouts(uint64_t flags, boolean_t enable_override);
-static __attribute__((noinline)) void PROCESS_OVERRIDING_SYSTEM_DEFAULTS(uint64_t timeout);
+static __attribute__((noinline)) int PROCESS_OVERRIDING_SYSTEM_DEFAULTS(uint64_t timeout);
 
 void
 init_system_override()
@@ -140,7 +140,7 @@ system_override(__unused struct proc *p, struct system_override_args * uap, __un
                system_override_abort(flags);
        } else {
                system_override_begin(flags);
-               PROCESS_OVERRIDING_SYSTEM_DEFAULTS(timeout);
+               error = PROCESS_OVERRIDING_SYSTEM_DEFAULTS(timeout);
                system_override_end(flags);
        }
 
@@ -307,11 +307,13 @@ system_override_abort(uint64_t flags)
        }
 }
 
-static __attribute__((noinline)) void
+static __attribute__((noinline)) int
 PROCESS_OVERRIDING_SYSTEM_DEFAULTS(uint64_t timeout)
 {
        struct timespec ts;
        ts.tv_sec = timeout / NSEC_PER_SEC;
        ts.tv_nsec = timeout - ((long)ts.tv_sec * NSEC_PER_SEC);
-       msleep((caddr_t)&sys_override_wait, &sys_override_lock, PRIBIO | PCATCH, "system_override", &ts);
+       int error = msleep((caddr_t)&sys_override_wait, &sys_override_lock, PRIBIO | PCATCH, "system_override", &ts);
+       /* msleep returns EWOULDBLOCK if timeout expires, treat that as success */
+       return (error == EWOULDBLOCK) ? 0 : error;
 }
index c074ae1c628536b1b37ca478f1d26ce8ff275c56..c5ea090ce7b1914c26a94ec0b08870610ff28bf9 100644 (file)
@@ -836,10 +836,20 @@ proc_selfppid(void)
        return current_proc()->p_ppid;
 }
 
-int
+uint64_t
 proc_selfcsflags(void)
 {
-       return current_proc()->p_csflags;
+       return (uint64_t)current_proc()->p_csflags;
+}
+
+int
+proc_csflags(proc_t p, uint64_t *flags)
+{
+       if (p && flags) {
+               *flags = (uint64_t)p->p_csflags;
+               return 0;
+       }
+       return EINVAL;
 }
 
 uint32_t
@@ -936,6 +946,12 @@ proc_name(int pid, char * buf, int size)
 {
        proc_t p;
 
+       if (size <= 0) {
+               return;
+       }
+
+       bzero(buf, size);
+
        if ((p = proc_find(pid)) != PROC_NULL) {
                strlcpy(buf, &p->p_comm[0], size);
                proc_rele(p);
@@ -1267,6 +1283,63 @@ proc_getexecutablevnode(proc_t p)
        return NULLVP;
 }
 
+int
+proc_gettty(proc_t p, vnode_t *vp)
+{
+       if (!p || !vp) {
+               return EINVAL;
+       }
+
+       struct session *procsp = proc_session(p);
+       int err = EINVAL;
+
+       if (procsp != SESSION_NULL) {
+               session_lock(procsp);
+               vnode_t ttyvp = procsp->s_ttyvp;
+               int ttyvid = procsp->s_ttyvid;
+               session_unlock(procsp);
+
+               if (ttyvp) {
+                       if (vnode_getwithvid(ttyvp, ttyvid) == 0) {
+                               *vp = procsp->s_ttyvp;
+                               err = 0;
+                       }
+               } else {
+                       err = ENOENT;
+               }
+
+               session_rele(procsp);
+       }
+
+       return err;
+}
+
+int
+proc_gettty_dev(proc_t p, dev_t *dev)
+{
+       struct session *procsp = proc_session(p);
+       boolean_t has_tty = FALSE;
+
+       if (procsp != SESSION_NULL) {
+               session_lock(procsp);
+
+               struct tty * tp = SESSION_TP(procsp);
+               if (tp != TTY_NULL) {
+                       *dev = tp->t_dev;
+                       has_tty = TRUE;
+               }
+
+               session_unlock(procsp);
+               session_rele(procsp);
+       }
+
+       if (has_tty) {
+               return 0;
+       } else {
+               return EINVAL;
+       }
+}
+
 int
 proc_selfexecutableargs(uint8_t *buf, size_t *buflen)
 {
@@ -2429,7 +2502,7 @@ csops_internal(pid_t pid, int ops, user_addr_t uaddr, user_size_t usersize, user
                 */
                if (forself == 1 && IOTaskHasEntitlement(pt->task, CLEAR_LV_ENTITLEMENT)) {
                        proc_lock(pt);
-                       pt->p_csflags &= (~(CS_REQUIRE_LV & CS_FORCED_LV));
+                       pt->p_csflags &= (~(CS_REQUIRE_LV | CS_FORCED_LV));
                        proc_unlock(pt);
                        error = 0;
                } else {
index 5d2fcee095c3e03ebab8c1eadcdc2dc44e16b250..0e55c44451203aec65e8c3c1e614f2f69aa0f899 100644 (file)
 #include <machine/machine_routines.h>
 #include <machine/exec.h>
 
+#include <nfs/nfs_conf.h>
+
 #include <vm/vm_protos.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_compressor_algorithms.h>
@@ -232,7 +234,7 @@ fill_user32_proc(proc_t, struct user32_kinfo_proc *__restrict);
 
 extern int
 kdbg_control(int *name, u_int namelen, user_addr_t where, size_t * sizep);
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
 extern int
 netboot_root(void);
 #endif
@@ -282,7 +284,7 @@ STATIC int sysctl_hostname(struct sysctl_oid *oidp, void *arg1, int arg2, struct
 STATIC int sysctl_procname(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
 STATIC int sysctl_boottime(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
 STATIC int sysctl_symfile(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
 STATIC int sysctl_netboot(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
 #endif
 #ifdef CONFIG_IMGSRC_ACCESS
@@ -2347,7 +2349,7 @@ SYSCTL_PROC(_kern, KERN_SYMFILE, symfile,
     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_LOCKED,
     0, 0, sysctl_symfile, "A", "");
 
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
 STATIC int
 sysctl_netboot
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
@@ -4440,6 +4442,41 @@ SYSCTL_PROC(_kern, OID_AUTO, grade_cputype,
 
 #if DEVELOPMENT || DEBUG
 
+extern void do_cseg_wedge_thread(void);
+extern void do_cseg_unwedge_thread(void);
+
+static int
+cseg_wedge_thread SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+
+       int error, val = 0;
+       error = sysctl_handle_int(oidp, &val, 0, req);
+       if (error || val == 0) {
+               return error;
+       }
+
+       do_cseg_wedge_thread();
+       return 0;
+}
+SYSCTL_PROC(_kern, OID_AUTO, cseg_wedge_thread, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, cseg_wedge_thread, "I", "wedge c_seg thread");
+
+static int
+cseg_unwedge_thread SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+
+       int error, val = 0;
+       error = sysctl_handle_int(oidp, &val, 0, req);
+       if (error || val == 0) {
+               return error;
+       }
+
+       do_cseg_unwedge_thread();
+       return 0;
+}
+SYSCTL_PROC(_kern, OID_AUTO, cseg_unwedge_thread, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, cseg_unwedge_thread, "I", "unstuck c_seg thread");
+
 static atomic_int wedge_thread_should_wake = 0;
 
 static int
@@ -4792,8 +4829,42 @@ SYSCTL_PROC(_kern, OID_AUTO, test_mtx_uncontended, CTLTYPE_STRING | CTLFLAG_MASK
 
 extern uint64_t MutexSpin;
 
-SYSCTL_QUAD(_kern, OID_AUTO, mutex_spin_us, CTLFLAG_RW, &MutexSpin,
-    "Spin time for acquiring a kernel mutex");
+SYSCTL_QUAD(_kern, OID_AUTO, mutex_spin_abs, CTLFLAG_RW, &MutexSpin,
+    "Spin time in abs for acquiring a kernel mutex");
+
+extern uint64_t low_MutexSpin;
+extern int64_t high_MutexSpin;
+extern unsigned int real_ncpus;
+
+SYSCTL_QUAD(_kern, OID_AUTO, low_mutex_spin_abs, CTLFLAG_RW, &low_MutexSpin,
+    "Low spin threshold in abs for acquiring a kernel mutex");
+
+static int
+sysctl_high_mutex_spin_ns SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+       int error;
+       int64_t val = 0;
+       int64_t res;
+
+       /* Check if the user is writing to high_MutexSpin, or just reading it */
+       if (req->newptr) {
+               error = SYSCTL_IN(req, &val, sizeof(val));
+               if (error || (val < 0 && val != -1)) {
+                       return error;
+               }
+               high_MutexSpin = val;
+       }
+
+       if (high_MutexSpin >= 0) {
+               res = high_MutexSpin;
+       } else {
+               res = low_MutexSpin * real_ncpus;
+       }
+       return SYSCTL_OUT(req, &res, sizeof(res));
+}
+SYSCTL_PROC(_kern, OID_AUTO, high_mutex_spin_abs, CTLFLAG_RW | CTLTYPE_QUAD, 0, 0, sysctl_high_mutex_spin_ns, "I",
+    "High spin threshold in abs for acquiring a kernel mutex");
 
 #if defined (__x86_64__)
 
index ba02e1540c759904aaebd7f1ee1fa59852a6ce8e..83581807f1397f4816a8a0f8ef542923720eca4f 100644 (file)
@@ -121,7 +121,7 @@ common_hook(void)
        return rv;
 }
 
-#if (MAC_POLICY_OPS_VERSION != 59)
+#if (MAC_POLICY_OPS_VERSION != 62)
 # error "struct mac_policy_ops doesn't match definition in mac_policy.h"
 #endif
 /*
@@ -285,7 +285,7 @@ const static struct mac_policy_ops policy_ops = {
 
        CHECK_SET_HOOK(vnode_check_trigger_resolve)
        CHECK_SET_HOOK(mount_check_mount_late)
-       .mpo_reserved1 = (mpo_reserved_hook_t *)common_hook,
+       CHECK_SET_HOOK(mount_check_snapshot_mount)
        .mpo_reserved2 = (mpo_reserved_hook_t *)common_hook,
        CHECK_SET_HOOK(skywalk_flow_check_connect)
        CHECK_SET_HOOK(skywalk_flow_check_listen)
index b5a78b6b9489435ff0e90c0b044aa9bebca1a680..4e1a1c4fbd0c9047baafd2a078a89bcddafc3a31 100644 (file)
@@ -332,19 +332,24 @@ oslog_streamopen(__unused dev_t dev, __unused int flags, __unused int mode, stru
        if (!oslog_stream_msg_bufc) {
                return ENOMEM;
        }
+       /* Zeroing to avoid copying uninitialized struct padding to userspace. */
+       bzero(oslog_stream_msg_bufc, oslog_stream_buf_size);
 
        /* entries to support kernel logging in stream mode */
-       entries = kalloc(oslog_stream_num_entries * sizeof(struct oslog_stream_buf_entry_s));
+       size_t entries_size = oslog_stream_num_entries * sizeof(struct oslog_stream_buf_entry_s);
+       entries = kalloc(entries_size);
        if (!entries) {
                kfree(oslog_stream_msg_bufc, oslog_stream_buf_size);
                return ENOMEM;
        }
+       /* Zeroing to avoid copying uninitialized struct padding to userspace. */
+       bzero(entries, entries_size);
 
        stream_lock();
        if (oslog_stream_open) {
                stream_unlock();
                kfree(oslog_stream_msg_bufc, oslog_stream_buf_size);
-               kfree(entries, oslog_stream_num_entries * sizeof(struct oslog_stream_buf_entry_s));
+               kfree(entries, entries_size);
                return EBUSY;
        }
 
@@ -359,9 +364,6 @@ oslog_streamopen(__unused dev_t dev, __unused int flags, __unused int mode, stru
 
        for (int i = 0; i < oslog_stream_num_entries; i++) {
                oslog_stream_buf_entries[i].type = oslog_stream_link_type_log;
-               oslog_stream_buf_entries[i].offset = 0;
-               oslog_stream_buf_entries[i].size = 0;
-               oslog_stream_buf_entries[i].timestamp = 0;
                STAILQ_INSERT_TAIL(&oslog_stream_free_head, &oslog_stream_buf_entries[i], buf_entries);
        }
 
index ddf8e5db4aed91e230804f171bef57984f4d68ef..0d797382679639ac2ea65c5ca5c40dcd9c9bee51 100644 (file)
@@ -122,7 +122,6 @@ struct snprintf_arg {
 extern const char       *debugger_panic_str;
 
 extern  void cnputc(char);              /* standard console putc */
-void    (*v_putc)(char) = cnputc;       /* routine to putc on virtual console */
 
 extern  struct tty cons;                /* standard console tty */
 extern struct   tty *constty;           /* pointer to console "window" tty */
@@ -385,7 +384,7 @@ putchar(int c, void *arg)
                log_putc_locked(msgbufp, c);
        }
        if ((pca->flags & TOCONS) && constty == 0 && c != '\0') {
-               (*v_putc)(c);
+               cnputc(c);
        }
        if (pca->flags & TOSTR) {
                **sp = c;
index 6878545e7e0d53d1641ef4145dddc9f9c47b7484..240bae02096f98a766e9c8db281ef7310beab647 100644 (file)
@@ -37,6 +37,7 @@
 #include <sys/types.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
+#include <nfs/nfs_conf.h>
 
 0      AUE_NULL        ALL     { int nosys(void); }   { indirect syscall }
 1      AUE_EXIT        ALL     { void exit(int rval) NO_SYSCALL_STUB; } 
 153    AUE_PREAD       ALL     { user_ssize_t pread(int fd, user_addr_t buf, user_size_t nbyte, off_t offset); } 
 154    AUE_PWRITE      ALL     { user_ssize_t pwrite(int fd, user_addr_t buf, user_size_t nbyte, off_t offset); } 
 
-#if NFSSERVER
+#if NFSSERVER  /* XXX */
 155    AUE_NFS_SVC     ALL     { int nfssvc(int flag, caddr_t argp); } 
 #else
 155    AUE_NULL        ALL     { int nosys(void); } 
 159    AUE_UNMOUNT     ALL     { int unmount(user_addr_t path, int flags); } 
 160    AUE_NULL        ALL     { int nosys(void); }   { old async_daemon }
 
-#if NFSSERVER
+#if NFSSERVER  /* XXX */
 161    AUE_NFS_GETFH   ALL     { int getfh(char *fname, fhandle_t *fhp); } 
 #else
 161    AUE_NULL        ALL     { int nosys(void); } 
 245    AUE_FFSCTL      ALL     { int ffsctl(int fd, u_long cmd, caddr_t data, u_int options); } 
 246    AUE_NULL        ALL     { int nosys(void); } 
 
-#if NFSCLIENT
+#if NFSCLIENT  /* XXX */
 247    AUE_NULL        ALL     { int nfsclnt(int flag, caddr_t argp); } 
 #else
 247    AUE_NULL        ALL     { int nosys(void); } 
 #endif
-#if NFSSERVER
+#if NFSSERVER  /* XXX */
 248    AUE_FHOPEN      ALL     { int fhopen(const struct fhandle *u_fhp, int flags); } 
 #else
 248    AUE_NULL        ALL     { int nosys(void); } 
index fa3e01f846ece8ed7ec1bb3cb031241dcc37798f..842ff323badd1f1e80d024bf3e052bfc3a75b34b 100644 (file)
 0x3130164      VFS_devfs_label_associate_device
 0x3130168      VFS_devfs_label_associate_directory
 0x313016C      VFS_label_associate_fdesc
+0x3130170      VFS_mount_check_snapshot_mount
 0x3CF0000      CP_OFFSET_IO
 0x4010004      proc_exit
 0x4010008      force_exit
index 4ab504c31dfe11db61b4d880da0dddaa86ae705b..b94476d05ea29c77beaee69b10de9cdb7766b19f 100644 (file)
@@ -667,6 +667,9 @@ socreate_internal(int dom, struct socket **aso, int type, int proto,
        struct protosw *prp;
        struct socket *so;
        int error = 0;
+#if defined(XNU_TARGET_OS_OSX)
+       pid_t rpid = -1;
+#endif
 
 #if TCPDEBUG
        extern int tcpconsdebug;
@@ -757,7 +760,29 @@ socreate_internal(int dom, struct socket **aso, int type, int proto,
                so->e_pid = proc_pid(ep);
                proc_getexecutableuuid(ep, so->e_uuid, sizeof(so->e_uuid));
                so->so_flags |= SOF_DELEGATED;
+#if defined(XNU_TARGET_OS_OSX)
+               if (ep->p_responsible_pid != so->e_pid) {
+                       rpid = ep->p_responsible_pid;
+               }
+#endif
+       }
+
+#if defined(XNU_TARGET_OS_OSX)
+       if (rpid < 0 && p->p_responsible_pid != so->last_pid) {
+               rpid = p->p_responsible_pid;
+       }
+
+       so->so_rpid = -1;
+       uuid_clear(so->so_ruuid);
+       if (rpid >= 0) {
+               proc_t rp = proc_find(rpid);
+               if (rp != PROC_NULL) {
+                       proc_getexecutableuuid(rp, so->so_ruuid, sizeof(so->so_ruuid));
+                       so->so_rpid = rpid;
+                       proc_rele(rp);
+               }
        }
+#endif
 
        so->so_cred = kauth_cred_proc_ref(p);
        if (!suser(kauth_cred_get(), NULL)) {
@@ -6532,7 +6557,12 @@ filt_soread_common(struct knote *kn, struct kevent_qos_s *kev, struct socket *so
                }
        }
 
-       retval = (data >= lowwat);
+       /*
+        * While the `data` field is the amount of data to read,
+        * 0-sized packets need to wake up the kqueue, see 58140856,
+        * so we need to take control bytes into account too.
+        */
+       retval = (so->so_rcv.sb_cc >= lowwat);
 
 out:
        if (retval && kev) {
@@ -7857,6 +7887,20 @@ so_set_effective_pid(struct socket *so, int epid, struct proc *p, boolean_t chec
                so->e_upid = proc_uniqueid(ep);
                so->e_pid = proc_pid(ep);
                proc_getexecutableuuid(ep, so->e_uuid, sizeof(so->e_uuid));
+
+#if defined(XNU_TARGET_OS_OSX)
+               if (ep->p_responsible_pid != so->e_pid) {
+                       proc_t rp = proc_find(ep->p_responsible_pid);
+                       if (rp != PROC_NULL) {
+                               proc_getexecutableuuid(rp, so->so_ruuid, sizeof(so->so_ruuid));
+                               so->so_rpid = ep->p_responsible_pid;
+                               proc_rele(rp);
+                       } else {
+                               uuid_clear(so->so_ruuid);
+                               so->so_rpid = -1;
+                       }
+               }
+#endif
        }
        if (so->so_proto != NULL && so->so_proto->pr_update_last_owner != NULL) {
                (*so->so_proto->pr_update_last_owner)(so, NULL, ep);
index f37a137aa56c7f9be6a658df0c6e8b64536a055d..d0d8daec9152f5b55b811e65d618793572928581 100644 (file)
@@ -1222,7 +1222,44 @@ field of the
 .Vt statfs
 structure returned by
 .Xr statfs 2 .
+.
+.It ATTR_CMNEXT_CLONEID
+A
+.Vt u_int64_t
+that uniquely identifies the data stream associated with the file
+system object.  Useful for finding which files are pure clones of each
+other (as they will have the same clone-id).
+.
+.It ATTR_CMNEXT_EXT_FLAGS
+A
+.Vt u_int64_t
+that contains additional flags with information about the file.  The
+flags are:
+.
+.Bl -tag -width EF_MAY_SHARE_BLOCKS
+.
+.It EF_MAY_SHARE_BLOCKS
+If this bit is set then the file may share blocks with another file
+(i.e. it is a clone of another file).
+.
+.It EF_NO_XATTRS
+If this bit is set then the file has no extended attributes.  Useful
+for avoiding a call to listxattr().
+.
+.It EF_IS_SYNC_ROOT
+If this bit is set the directory is a "sync root".  This bit will
+never be set for regular files.
+.
+.It EF_IS_PURGEABLE
+If this bit is set the item is a "purgeable" item that can be deleted
+by the file system when asked to free space.
+.
+.It EF_IS_SPARSE
+If this bit is set the item has sparse regions.
+.
+.El
 .El
+.Pp
 .
 .Sh VOLUME CAPABILITIES
 .
index 3c36e631721eb0217e6b0c43496d6c9d8ab479fb..4ba8415d255dba9565568f3fab42ed8a13b825bb 100644 (file)
@@ -36,13 +36,13 @@ is a compatibility nod to Linux. On Linux,
 will produce lower quality output if the entropy pool drains, while
 .Nm /dev/random
 will prefer to block and wait for additional entropy to be collected.
-With Yarrow, this choice and distinction is not necessary, and
+With Fortuna, this choice and distinction is not necessary, and
 the two devices behave identically. You may use either.
 .Pp
 The
 .Nm
 device implements the
-.Nm Yarrow
+.Nm Fortuna
 pseudo random number generator algorithm and maintains its entropy pool.
 The kernel automatically seeds the algorithm with additional entropy during normal execution.
 .Sh FILES
index a1392ce2a0b338b4380df1305ae78f49906fc1d4..3498ffc0457785d2b588c32ed878db5bddca2f20 100644 (file)
@@ -202,7 +202,7 @@ devfs_mount(struct mount *mp, __unused vnode_t devvp, __unused user_addr_t data,
         *  Fill out some fields
         */
        __IGNORE_WCASTALIGN(mp->mnt_data = (qaddr_t)devfs_mp_p);
-       mp->mnt_vfsstat.f_fsid.val[0] = (int32_t)(uintptr_t)devfs_mp_p;
+       mp->mnt_vfsstat.f_fsid.val[0] = (int32_t)VM_KERNEL_ADDRHASH(devfs_mp_p);
        mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp);
        mp->mnt_flag |= MNT_LOCAL;
 
@@ -308,7 +308,7 @@ devfs_statfs( struct mount *mp, struct vfsstatfs *sbp, __unused vfs_context_t ct
        sbp->f_bavail = 0;
        sbp->f_files  = devfs_stats.nodes;
        sbp->f_ffree  = 0;
-       sbp->f_fsid.val[0] = (int32_t)(uintptr_t)devfs_mp_p;
+       sbp->f_fsid.val[0] = (int32_t)VM_KERNEL_ADDRHASH(devfs_mp_p);
        sbp->f_fsid.val[1] = vfs_typenum(mp);
 
        return 0;
index b9de4b101f06b01a7f7cf33f0a5f6c2d161d495e..322f408236512eb14145ef498dd9b88886a046b0 100644 (file)
@@ -483,7 +483,7 @@ devfs_getattr(struct vnop_getattr_args *ap)
        VATTR_RETURN(vap, va_nlink, file_node->dn_links);
        VATTR_RETURN(vap, va_uid, file_node->dn_uid);
        VATTR_RETURN(vap, va_gid, file_node->dn_gid);
-       VATTR_RETURN(vap, va_fsid, (uintptr_t)file_node->dn_dvm);
+       VATTR_RETURN(vap, va_fsid, (uint32_t)VM_KERNEL_ADDRHASH(file_node->dn_dvm));
        VATTR_RETURN(vap, va_fileid, (uintptr_t)file_node->dn_ino);
        VATTR_RETURN(vap, va_data_size, file_node->dn_len);
 
index 664ae0e16607a62eceed8a66ff9fb912f4d9d0c7..42e0822031608aedda5c162677cd45b294a099d3 100644 (file)
@@ -154,7 +154,7 @@ routefs_mount(struct mount *mp, __unused vnode_t devvp, user_addr_t data, vfs_co
         *  Fill out some fields
         */
        __IGNORE_WCASTALIGN(mp->mnt_data = (qaddr_t)routefs_mp_p);
-       mp->mnt_vfsstat.f_fsid.val[0] = (int32_t)(uintptr_t)routefs_mp_p;
+       mp->mnt_vfsstat.f_fsid.val[0] = (int32_t)VM_KERNEL_ADDRHASH(routefs_mp_p);
        mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp);
        mp->mnt_flag |= MNT_LOCAL;
 
@@ -283,7 +283,7 @@ routefs_statfs( struct mount *mp, struct vfsstatfs *sbp, __unused vfs_context_t
        sbp->f_bavail = 0;
        sbp->f_files  = 0;
        sbp->f_ffree  = 0;
-       sbp->f_fsid.val[0] = (int32_t)(uintptr_t)routefs_mp_p;
+       sbp->f_fsid.val[0] = (int32_t)VM_KERNEL_ADDRHASH(routefs_mp_p);
        sbp->f_fsid.val[1] = vfs_typenum(mp);
 
        return 0;
index 62988b66b456454d8a7961a3bf95cb1b8ce5ae16..626c2b2bf6ddbb965f793e99966ee1e16c6ead88 100644 (file)
@@ -359,6 +359,7 @@ struct content_filter **content_filters = NULL;
 uint32_t cfil_active_count = 0; /* Number of active content filters */
 uint32_t cfil_sock_attached_count = 0;  /* Number of sockets attachements */
 uint32_t cfil_sock_udp_attached_count = 0;      /* Number of UDP sockets attachements */
+uint32_t cfil_sock_attached_stats_count = 0;    /* Number of sockets requested periodic stats report */
 uint32_t cfil_close_wait_timeout = 1000; /* in milliseconds */
 
 static kern_ctl_ref cfil_kctlref = NULL;
@@ -408,6 +409,11 @@ struct cfil_entry {
        uint32_t                cfe_necp_control_unit;
        struct timeval          cfe_last_event; /* To user space */
        struct timeval          cfe_last_action; /* From user space */
+       uint64_t                cfe_byte_inbound_count_reported; /* stats already been reported */
+       uint64_t                cfe_byte_outbound_count_reported; /* stats already been reported */
+       struct timeval          cfe_stats_report_ts; /* Timestamp for last stats report */
+       uint32_t                cfe_stats_report_frequency; /* Interval for stats report in msecs */
+       boolean_t               cfe_laddr_sent;
 
        struct cfe_buf {
                /*
@@ -455,6 +461,7 @@ struct cfil_hash_entry;
  */
 struct cfil_info {
        TAILQ_ENTRY(cfil_info)  cfi_link;
+       TAILQ_ENTRY(cfil_info)  cfi_link_stats;
        struct socket           *cfi_so;
        uint64_t                cfi_flags;
        uint64_t                cfi_sock_id;
@@ -517,6 +524,7 @@ struct cfil_info {
 #define CFI_ENTRY_KCUNIT(i, e) (((e) - &((i)->cfi_entries[0])) + 1)
 
 TAILQ_HEAD(cfil_sock_head, cfil_info) cfil_sock_head;
+TAILQ_HEAD(cfil_sock_head_stats, cfil_info) cfil_sock_head_stats;
 
 #define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
 #define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
@@ -538,6 +546,23 @@ LIST_HEAD(cfilhashhead, cfil_hash_entry);
                           (addr.sa.sa_family == AF_INET && addr.sin.sin_addr.s_addr == 0) || \
                           (addr.sa.sa_family == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&addr.sin6.sin6_addr)))
 
+/*
+ * Periodic Statistics Report:
+ */
+static struct thread *cfil_stats_report_thread;
+#define CFIL_STATS_REPORT_INTERVAL_MIN_MSEC  500   // Highest report frequency
+#define CFIL_STATS_REPORT_RUN_INTERVAL_NSEC  (CFIL_STATS_REPORT_INTERVAL_MIN_MSEC * NSEC_PER_MSEC)
+#define CFIL_STATS_REPORT_MAX_COUNT          50    // Max stats to be reported per run
+
+/* This buffer must have same layout as struct cfil_msg_stats_report */
+struct cfil_stats_report_buffer {
+       struct cfil_msg_hdr        msghdr;
+       uint32_t                   count;
+       struct cfil_msg_sock_stats stats[CFIL_STATS_REPORT_MAX_COUNT];
+};
+static struct cfil_stats_report_buffer *global_cfil_stats_report_buffers[MAX_CONTENT_FILTER];
+static uint32_t global_cfil_stats_counts[MAX_CONTENT_FILTER];
+
 /*
  * UDP Garbage Collection:
  */
@@ -571,7 +596,7 @@ struct cfil_hash_entry {
        u_short cfentry_lport;
        sa_family_t                    cfentry_family;
        u_int32_t                      cfentry_flowhash;
-       u_int32_t                      cfentry_lastused;
+       u_int64_t                      cfentry_lastused;
        union {
                /* foreign host table entry */
                struct in_addr_4in6 addr46;
@@ -637,6 +662,7 @@ int cfil_debug = 1;
 #define DATA_DEBUG 0
 #define SHOW_DEBUG 0
 #define GC_DEBUG 0
+#define STATS_DEBUG 0
 
 /*
  * Sysctls for logs and statistics
@@ -754,6 +780,8 @@ static void cfil_sock_received_verdict(struct socket *so);
 static void cfil_fill_event_msg_addresses(struct cfil_hash_entry *, struct inpcb *,
     union sockaddr_in_4_6 *, union sockaddr_in_4_6 *,
     boolean_t, boolean_t);
+static void cfil_stats_report_thread_func(void *, wait_result_t);
+static void cfil_stats_report(void *v, wait_result_t w);
 
 bool check_port(struct sockaddr *, u_short);
 
@@ -1190,6 +1218,34 @@ cfil_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
 
                *unitinfo = cfc;
                cfil_active_count++;
+
+               // Allocate periodic stats buffer for this filter
+               if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] == NULL) {
+                       cfil_rw_unlock_exclusive(&cfil_lck_rw);
+
+                       struct cfil_stats_report_buffer *buf;
+
+                       MALLOC(buf,
+                           struct cfil_stats_report_buffer *,
+                           sizeof(struct cfil_stats_report_buffer),
+                           M_TEMP,
+                           M_WAITOK | M_ZERO);
+
+                       cfil_rw_lock_exclusive(&cfil_lck_rw);
+
+                       if (buf == NULL) {
+                               error = ENOMEM;
+                               cfil_rw_unlock_exclusive(&cfil_lck_rw);
+                               goto done;
+                       }
+
+                       /* Another thread may have won the race */
+                       if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] != NULL) {
+                               FREE(buf, M_TEMP);
+                       } else {
+                               global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] = buf;
+                       }
+               }
        }
        cfil_rw_unlock_exclusive(&cfil_lck_rw);
 done:
@@ -1334,6 +1390,11 @@ release:
        }
        verify_content_filter(cfc);
 
+       /* Free the stats buffer for this filter */
+       if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] != NULL) {
+               FREE(global_cfil_stats_report_buffers[cfc->cf_kcunit - 1], M_TEMP);
+               global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] = NULL;
+       }
        VERIFY(cfc->cf_sock_count == 0);
 
        /*
@@ -1593,6 +1654,90 @@ done:
        return so;
 }
 
+static void
+cfil_info_stats_toggle(struct cfil_info *cfil_info, struct cfil_entry *entry, uint32_t report_frequency)
+{
+       struct cfil_info *cfil = NULL;
+       Boolean found = FALSE;
+       int kcunit;
+
+       if (cfil_info == NULL) {
+               return;
+       }
+
+       if (report_frequency) {
+               if (entry == NULL) {
+                       return;
+               }
+
+               // Update stats reporting frequency.
+               if (entry->cfe_stats_report_frequency != report_frequency) {
+                       entry->cfe_stats_report_frequency = report_frequency;
+                       if (entry->cfe_stats_report_frequency < CFIL_STATS_REPORT_INTERVAL_MIN_MSEC) {
+                               entry->cfe_stats_report_frequency = CFIL_STATS_REPORT_INTERVAL_MIN_MSEC;
+                       }
+                       microuptime(&entry->cfe_stats_report_ts);
+
+                       // Insert cfil_info into list only if it is not in yet.
+                       TAILQ_FOREACH(cfil, &cfil_sock_head_stats, cfi_link_stats) {
+                               if (cfil == cfil_info) {
+                                       return;
+                               }
+                       }
+
+                       TAILQ_INSERT_TAIL(&cfil_sock_head_stats, cfil_info, cfi_link_stats);
+
+                       // Wake up stats thread if this is first flow added
+                       if (cfil_sock_attached_stats_count == 0) {
+                               thread_wakeup((caddr_t)&cfil_sock_attached_stats_count);
+                       }
+                       cfil_sock_attached_stats_count++;
+#if STATS_DEBUG
+                       CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED - STATS FLOW INSERTED: <so %llx sockID %llu> stats frequency %d msecs",
+                           cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0,
+                           cfil_info->cfi_sock_id,
+                           entry->cfe_stats_report_frequency);
+#endif
+               }
+       } else {
+               // Turn off stats reporting for this filter.
+               if (entry != NULL) {
+                       // Already off, no change.
+                       if (entry->cfe_stats_report_frequency == 0) {
+                               return;
+                       }
+
+                       entry->cfe_stats_report_frequency = 0;
+                       // If cfil_info still has filter(s) asking for stats, no need to remove from list.
+                       for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
+                               if (cfil_info->cfi_entries[kcunit - 1].cfe_stats_report_frequency > 0) {
+                                       return;
+                               }
+                       }
+               }
+
+               // No more filter asking for stats for this cfil_info, remove from list.
+               if (!TAILQ_EMPTY(&cfil_sock_head_stats)) {
+                       found = FALSE;
+                       TAILQ_FOREACH(cfil, &cfil_sock_head_stats, cfi_link_stats) {
+                               if (cfil == cfil_info) {
+                                       found = TRUE;
+                                       break;
+                               }
+                       }
+                       if (found) {
+                               cfil_sock_attached_stats_count--;
+                               TAILQ_REMOVE(&cfil_sock_head_stats, cfil_info, cfi_link_stats);
+#if STATS_DEBUG
+                               CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED - STATS FLOW DELETED: <so %llx sockID %llu> stats frequency reset",
+                                   cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0,
+                                   cfil_info->cfi_sock_id);
+#endif
+                       }
+               }
+       }
+}
+
 static errno_t
 cfil_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, mbuf_t m,
     int flags)
@@ -1787,6 +1932,12 @@ cfil_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, mbuf_t m,
                if (error == EJUSTRETURN) {
                        error = 0;
                }
+
+               // Toggle stats reporting according to received verdict.
+               cfil_rw_lock_exclusive(&cfil_lck_rw);
+               cfil_info_stats_toggle(cfil_info, entry, action_msg->cfa_stats_frequency);
+               cfil_rw_unlock_exclusive(&cfil_lck_rw);
+
                break;
 
        case CFM_OP_DROP:
@@ -2247,6 +2398,7 @@ cfil_init(void)
        lck_rw_init(&cfil_lck_rw, cfil_lck_grp, cfil_lck_attr);
 
        TAILQ_INIT(&cfil_sock_head);
+       TAILQ_INIT(&cfil_sock_head_stats);
 
        /*
         * Register kernel control
@@ -2278,10 +2430,21 @@ cfil_init(void)
        /* this must not fail */
        VERIFY(cfil_udp_gc_thread != NULL);
 
+       // Spawn thread for statistics reporting
+       if (kernel_thread_start(cfil_stats_report_thread_func, NULL,
+           &cfil_stats_report_thread) != KERN_SUCCESS) {
+               panic_plain("%s: Can't create statistics report thread", __func__);
+               /* NOTREACHED */
+       }
+       /* this must not fail */
+       VERIFY(cfil_stats_report_thread != NULL);
+
        // Set UDP per-flow mbuf thresholds to 1/32 of platform max
        mbuf_limit = MAX(UDP_FLOW_GC_MBUF_CNT_MAX, (nmbclusters << MCLSHIFT) >> UDP_FLOW_GC_MBUF_SHIFT);
        cfil_udp_gc_mbuf_num_max = (mbuf_limit >> MCLSHIFT);
        cfil_udp_gc_mbuf_cnt_max = mbuf_limit;
+
+       memset(&global_cfil_stats_report_buffers, 0, sizeof(global_cfil_stats_report_buffers));
 }
 
 struct cfil_info *
@@ -2486,6 +2649,9 @@ cfil_info_free(struct cfil_info *cfil_info)
        cfil_sock_attached_count--;
        TAILQ_REMOVE(&cfil_sock_head, cfil_info, cfi_link);
 
+       // Turn off stats reporting for cfil_info.
+       cfil_info_stats_toggle(cfil_info, NULL, 0);
+
        out_drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
        in_drain += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
 
@@ -3258,6 +3424,10 @@ static void
 fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
     struct in6_addr *ip6, u_int16_t port)
 {
+       if (sin46 == NULL) {
+               return;
+       }
+
        struct sockaddr_in6 *sin6 = &sin46->sin6;
 
        sin6->sin6_family = AF_INET6;
@@ -3274,6 +3444,10 @@ static void
 fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
     struct in_addr ip, u_int16_t port)
 {
+       if (sin46 == NULL) {
+               return;
+       }
+
        struct sockaddr_in *sin = &sin46->sin;
 
        sin->sin_family = AF_INET;
@@ -6548,7 +6722,7 @@ cfil_info_udp_expire(void *v, wait_result_t w)
        struct cfil_hash_entry *hash_entry;
        struct cfil_db *db;
        struct socket *so;
-       u_int32_t current_time = 0;
+       u_int64_t current_time = 0;
 
        current_time = net_uptime();
 
@@ -6699,3 +6873,318 @@ cfil_udp_get_socket_state(struct mbuf *m, uint32_t *state_change_cnt, short *opt
        }
        return NULL;
 }
+
+static int
+cfil_dispatch_stats_event_locked(int kcunit, struct cfil_stats_report_buffer *buffer, uint32_t stats_count)
+{
+       struct content_filter *cfc = NULL;
+       errno_t error = 0;
+       size_t msgsize = 0;
+
+       if (buffer == NULL || stats_count == 0) {
+               return error;
+       }
+
+       if (content_filters == NULL || kcunit > MAX_CONTENT_FILTER) {
+               return error;
+       }
+
+       cfc = content_filters[kcunit - 1];
+       if (cfc == NULL) {
+               return error;
+       }
+
+       /* Would be wasteful to try */
+       if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
+               error = ENOBUFS;
+               goto done;
+       }
+
+       msgsize = sizeof(struct cfil_msg_stats_report) + (sizeof(struct cfil_msg_sock_stats) * stats_count);
+       buffer->msghdr.cfm_len = msgsize;
+       buffer->msghdr.cfm_version = 1;
+       buffer->msghdr.cfm_type = CFM_TYPE_EVENT;
+       buffer->msghdr.cfm_op = CFM_OP_STATS;
+       buffer->msghdr.cfm_sock_id = 0;
+       buffer->count = stats_count;
+
+#if STATS_DEBUG
+       CFIL_LOG(LOG_ERR, "STATS (kcunit %d): msg size %lu - %lu %lu %lu",
+           kcunit,
+           (unsigned long)msgsize,
+           (unsigned long)sizeof(struct cfil_msg_stats_report),
+           (unsigned long)sizeof(struct cfil_msg_sock_stats),
+           (unsigned long)stats_count);
+#endif
+
+       error = ctl_enqueuedata(cfc->cf_kcref, cfc->cf_kcunit,
+           buffer,
+           msgsize,
+           CTL_DATA_EOR);
+       if (error != 0) {
+               CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
+               goto done;
+       }
+       OSIncrementAtomic(&cfil_stats.cfs_stats_event_ok);
+
+#if STATS_DEBUG
+       CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT: send msg to %d", kcunit);
+#endif
+
+done:
+
+       if (error == ENOBUFS) {
+               OSIncrementAtomic(
+                       &cfil_stats.cfs_stats_event_flow_control);
+
+               if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
+                       cfil_rw_lock_exclusive(&cfil_lck_rw);
+               }
+
+               cfc->cf_flags |= CFF_FLOW_CONTROLLED;
+
+               cfil_rw_unlock_exclusive(&cfil_lck_rw);
+       } else if (error != 0) {
+               OSIncrementAtomic(&cfil_stats.cfs_stats_event_fail);
+       }
+
+       return error;
+}
+
+static void
+cfil_stats_report_thread_sleep(bool forever)
+{
+#if STATS_DEBUG
+       CFIL_LOG(LOG_ERR, "CFIL: STATS COLLECTION SLEEP");
+#endif
+
+       if (forever) {
+               (void) assert_wait((event_t) &cfil_sock_attached_stats_count,
+                   THREAD_INTERRUPTIBLE);
+       } else {
+               uint64_t deadline = 0;
+               nanoseconds_to_absolutetime(CFIL_STATS_REPORT_RUN_INTERVAL_NSEC, &deadline);
+               clock_absolutetime_interval_to_deadline(deadline, &deadline);
+
+               (void) assert_wait_deadline(&cfil_sock_attached_stats_count,
+                   THREAD_INTERRUPTIBLE, deadline);
+       }
+}
+
+static void
+cfil_stats_report_thread_func(void *v, wait_result_t w)
+{
+#pragma unused(v, w)
+
+       ASSERT(cfil_stats_report_thread == current_thread());
+       thread_set_thread_name(current_thread(), "CFIL_STATS_REPORT");
+
+       // Kick off gc shortly
+       cfil_stats_report_thread_sleep(false);
+       thread_block_parameter((thread_continue_t) cfil_stats_report, NULL);
+       /* NOTREACHED */
+}
+
+static bool
+cfil_stats_collect_flow_stats_for_filter(int kcunit,
+    struct cfil_info *cfil_info,
+    struct cfil_entry *entry,
+    struct timeval current_tv)
+{
+       struct cfil_stats_report_buffer *buffer = NULL;
+       struct cfil_msg_sock_stats *flow_array = NULL;
+       struct cfil_msg_sock_stats *stats = NULL;
+       struct inpcb *inp = NULL;
+       struct timeval diff_time;
+       uint64_t diff_time_usecs;
+       int index = 0;
+
+       if (entry->cfe_stats_report_frequency == 0) {
+               return false;
+       }
+
+       buffer = global_cfil_stats_report_buffers[kcunit - 1];
+       if (buffer == NULL) {
+#if STATS_DEBUG
+               CFIL_LOG(LOG_ERR, "CFIL: STATS: no buffer");
+#endif
+               return false;
+       }
+
+       timersub(&current_tv, &entry->cfe_stats_report_ts, &diff_time);
+       diff_time_usecs = (diff_time.tv_sec * USEC_PER_SEC) + diff_time.tv_usec;
+
+#if STATS_DEBUG
+       CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - elapsed time - ts %llu %llu cur ts %llu %llu diff %llu %llu (usecs %llu) @freq %llu usecs sockID %llu",
+           (unsigned long long)entry->cfe_stats_report_ts.tv_sec,
+           (unsigned long long)entry->cfe_stats_report_ts.tv_usec,
+           (unsigned long long)current_tv.tv_sec,
+           (unsigned long long)current_tv.tv_usec,
+           (unsigned long long)diff_time.tv_sec,
+           (unsigned long long)diff_time.tv_usec,
+           (unsigned long long)diff_time_usecs,
+           (unsigned long long)((entry->cfe_stats_report_frequency * NSEC_PER_MSEC) / NSEC_PER_USEC),
+           cfil_info->cfi_sock_id);
+#endif
+
+       // Compare elapsed time in usecs
+       if (diff_time_usecs >= (entry->cfe_stats_report_frequency * NSEC_PER_MSEC) / NSEC_PER_USEC) {
+#if STATS_DEBUG
+               CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - in %llu reported %llu",
+                   cfil_info->cfi_byte_inbound_count,
+                   entry->cfe_byte_inbound_count_reported);
+               CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - out %llu reported %llu",
+                   cfil_info->cfi_byte_outbound_count,
+                   entry->cfe_byte_outbound_count_reported);
+#endif
+               // Check if flow has new bytes that have not been reported
+               if (entry->cfe_byte_inbound_count_reported < cfil_info->cfi_byte_inbound_count ||
+                   entry->cfe_byte_outbound_count_reported < cfil_info->cfi_byte_outbound_count) {
+                       flow_array = (struct cfil_msg_sock_stats *)&buffer->stats;
+                       index = global_cfil_stats_counts[kcunit - 1];
+
+                       stats = &flow_array[index];
+                       stats->cfs_sock_id = cfil_info->cfi_sock_id;
+                       stats->cfs_byte_inbound_count = cfil_info->cfi_byte_inbound_count;
+                       stats->cfs_byte_outbound_count = cfil_info->cfi_byte_outbound_count;
+
+                       if (entry->cfe_laddr_sent == false) {
+                               /* cache it if necessary */
+                               if (cfil_info->cfi_so_attach_laddr.sa.sa_len == 0) {
+                                       inp = cfil_info->cfi_so ? sotoinpcb(cfil_info->cfi_so) : NULL;
+                                       if (inp != NULL) {
+                                               boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
+                                               union sockaddr_in_4_6 *src = outgoing ? &cfil_info->cfi_so_attach_laddr : NULL;
+                                               union sockaddr_in_4_6 *dst = outgoing ? NULL : &cfil_info->cfi_so_attach_laddr;
+                                               cfil_fill_event_msg_addresses(cfil_info->cfi_hash_entry, inp,
+                                                   src, dst, inp->inp_vflag & INP_IPV4, outgoing);
+                                       }
+                               }
+
+                               if (cfil_info->cfi_so_attach_laddr.sa.sa_len != 0) {
+                                       stats->cfs_laddr.sin6 = cfil_info->cfi_so_attach_laddr.sin6;
+                                       entry->cfe_laddr_sent = true;
+                               }
+                       }
+
+                       global_cfil_stats_counts[kcunit - 1]++;
+
+                       entry->cfe_stats_report_ts = current_tv;
+                       entry->cfe_byte_inbound_count_reported = cfil_info->cfi_byte_inbound_count;
+                       entry->cfe_byte_outbound_count_reported = cfil_info->cfi_byte_outbound_count;
+#if STATS_DEBUG
+                       cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: STATS COLLECTED");
+#endif
+                       CFI_ADD_TIME_LOG(cfil_info, &current_tv, &cfil_info->cfi_first_event, CFM_OP_STATS);
+                       return true;
+               }
+       }
+       return false;
+}
+
+static void
+cfil_stats_report(void *v, wait_result_t w)
+{
+#pragma unused(v, w)
+
+       struct cfil_info *cfil_info = NULL;
+       struct cfil_entry *entry = NULL;
+       struct timeval current_tv;
+       uint32_t flow_count = 0;
+       uint64_t saved_next_sock_id = 0; // Next sock id to be reported for next loop
+       bool flow_reported = false;
+
+#if STATS_DEBUG
+       CFIL_LOG(LOG_ERR, "CFIL: STATS COLLECTION RUNNING");
+#endif
+
+       do {
+               // Collect all sock ids of flows that has new stats
+               cfil_rw_lock_shared(&cfil_lck_rw);
+
+               if (cfil_sock_attached_stats_count == 0) {
+#if STATS_DEBUG
+                       CFIL_LOG(LOG_ERR, "CFIL: STATS: no flow");
+#endif
+                       cfil_rw_unlock_shared(&cfil_lck_rw);
+                       goto go_sleep;
+               }
+
+               for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
+                       if (global_cfil_stats_report_buffers[kcunit - 1] != NULL) {
+                               memset(global_cfil_stats_report_buffers[kcunit - 1], 0, sizeof(struct cfil_stats_report_buffer));
+                       }
+                       global_cfil_stats_counts[kcunit - 1] = 0;
+               }
+
+               microuptime(&current_tv);
+               flow_count = 0;
+
+               TAILQ_FOREACH(cfil_info, &cfil_sock_head_stats, cfi_link_stats) {
+                       if (saved_next_sock_id != 0 &&
+                           saved_next_sock_id == cfil_info->cfi_sock_id) {
+                               // Here is where we left off previously, start accumulating
+                               saved_next_sock_id = 0;
+                       }
+
+                       if (saved_next_sock_id == 0) {
+                               if (flow_count >= CFIL_STATS_REPORT_MAX_COUNT) {
+                                       // Examine a fixed number of flows each round.  Remember the current flow
+                                       // so we can start from here for next loop
+                                       saved_next_sock_id = cfil_info->cfi_sock_id;
+                                       break;
+                               }
+
+                               flow_reported = false;
+                               for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
+                                       entry = &cfil_info->cfi_entries[kcunit - 1];
+                                       if (entry->cfe_filter == NULL) {
+#if STATS_DEBUG
+                                               CFIL_LOG(LOG_NOTICE, "CFIL: STATS REPORT - so %llx no filter",
+                                                   cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0);
+#endif
+                                               continue;
+                                       }
+
+                                       if ((entry->cfe_stats_report_frequency > 0) &&
+                                           cfil_stats_collect_flow_stats_for_filter(kcunit, cfil_info, entry, current_tv) == true) {
+                                               flow_reported = true;
+                                       }
+                               }
+                               if (flow_reported == true) {
+                                       flow_count++;
+                               }
+                       }
+               }
+
+               if (flow_count > 0) {
+#if STATS_DEBUG
+                       CFIL_LOG(LOG_ERR, "CFIL: STATS reporting for %d flows", flow_count);
+#endif
+                       for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
+                               if (global_cfil_stats_report_buffers[kcunit - 1] != NULL &&
+                                   global_cfil_stats_counts[kcunit - 1] > 0) {
+                                       cfil_dispatch_stats_event_locked(kcunit,
+                                           global_cfil_stats_report_buffers[kcunit - 1],
+                                           global_cfil_stats_counts[kcunit - 1]);
+                               }
+                       }
+               } else {
+                       cfil_rw_unlock_shared(&cfil_lck_rw);
+                       goto go_sleep;
+               }
+
+               cfil_rw_unlock_shared(&cfil_lck_rw);
+
+               // Loop again if we haven't finished the whole cfil_info list
+       } while (saved_next_sock_id != 0);
+
+go_sleep:
+
+       // Sleep forever (until waken up) if no more flow to report
+       cfil_rw_lock_shared(&cfil_lck_rw);
+       cfil_stats_report_thread_sleep(cfil_sock_attached_stats_count == 0 ? true : false);
+       cfil_rw_unlock_shared(&cfil_lck_rw);
+       thread_block_parameter((thread_continue_t) cfil_stats_report, NULL);
+       /* NOTREACHED */
+}
index b4f4485c53477cb12f7eb74cf3bf2e8ce45baf38..e3829bf0223bc09cf1d31246330aa06c36a64408 100644 (file)
@@ -149,6 +149,7 @@ typedef struct cfil_crypto_data {
 #define CFM_OP_DATA_IN 4                /* data being received */
 #define CFM_OP_DISCONNECT_OUT 5         /* no more outgoing data */
 #define CFM_OP_DISCONNECT_IN 6          /* no more incoming data */
+#define CFM_OP_STATS 7                  /* periodic stats report(s) */
 
 /*
  * Operations associated with action from filter to kernel
@@ -262,6 +263,30 @@ struct cfil_msg_sock_closed {
        uint32_t                cfc_signature_length;
 } __attribute__((aligned(8)));
 
+/*
+ * struct cfil_msg_stats_report
+ *
+ * Statistics report for flow(s).
+ *
+ * Action: No reply is expected.
+ *
+ * Valid Types: CFM_TYPE_EVENT
+ *
+ * Valid Op: CFM_OP_STATS
+ */
+struct cfil_msg_sock_stats {
+       cfil_sock_id_t          cfs_sock_id;
+       uint64_t                cfs_byte_inbound_count;
+       uint64_t                cfs_byte_outbound_count;
+       union sockaddr_in_4_6   cfs_laddr;
+} __attribute__((aligned(8)));
+
+struct cfil_msg_stats_report {
+       struct cfil_msg_hdr        cfr_msghdr;
+       uint32_t                   cfr_count;
+       struct cfil_msg_sock_stats cfr_stats[];
+} __attribute__((aligned(8)));
+
 /*
  * struct cfil_msg_action
  *
@@ -285,6 +310,7 @@ struct cfil_msg_action {
        uint64_t                cfa_in_peek_offset;
        uint64_t                cfa_out_pass_offset;
        uint64_t                cfa_out_peek_offset;
+       uint32_t                cfa_stats_frequency; // Statistics frequency in milliseconds
 };
 
 /*
@@ -409,6 +435,10 @@ struct cfil_stats {
        int32_t cfs_data_event_flow_control;
        int32_t cfs_data_event_fail;
 
+       int32_t cfs_stats_event_ok;
+       int32_t cfs_stats_event_flow_control;
+       int32_t cfs_stats_event_fail;
+
        int32_t cfs_disconnect_in_event_ok;
        int32_t cfs_disconnect_out_event_ok;
        int32_t cfs_disconnect_event_flow_control;
index d38af044c1eed58024808316bd431ae3e6f1bbb0..4a703aee2756aeaa21aac9091452dd328412fce8 100644 (file)
@@ -2275,7 +2275,8 @@ dlil_input_thread_cont(void *v, wait_result_t wres)
 
                lck_mtx_lock_spin(&inp->input_lck);
                VERIFY(inp->input_waiting & DLIL_INPUT_RUNNING);
-               if (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
+               if (!(inp->input_waiting & ~(DLIL_INPUT_RUNNING |
+                   DLIL_INPUT_TERMINATE))) {
                        break;
                }
        }
@@ -2579,7 +2580,8 @@ skip:
 
                lck_mtx_lock_spin(&inp->input_lck);
                VERIFY(inp->input_waiting & DLIL_INPUT_RUNNING);
-               if (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
+               if (!(inp->input_waiting & ~(DLIL_INPUT_RUNNING |
+                   DLIL_INPUT_TERMINATE))) {
                        break;
                }
        }
index a3c90194dad3364e0aab539b1ec7fceef12bddfc..80bc27d78cf753afc7a0e944c6e9dcbd974be84b 100644 (file)
 #define BR_DBGF_MCAST           0x0080
 #define BR_DBGF_HOSTFILTER      0x0100
 #define BR_DBGF_CHECKSUM        0x0200
+#define BR_DBGF_MAC_NAT         0x0400
 #endif /* BRIDGE_DEBUG */
 
 #define _BRIDGE_LOCK(_sc)               lck_mtx_lock(&(_sc)->sc_mtx)
 #define BRIDGE_UNREF(_sc)               bridge_unref(_sc)
 #define BRIDGE_XLOCK(_sc)               bridge_xlock(_sc)
 #define BRIDGE_XDROP(_sc)               bridge_xdrop(_sc)
+#define IF_BRIDGE_DEBUG(f)              bridge_debug_flag_is_set(f)
 
 #else /* !BRIDGE_DEBUG */
 
        (_sc)->sc_iflist_xcnt--;                                        \
 } while (0)
 
+#define IF_BRIDGE_DEBUG(f)      FALSE
+
 #endif /* BRIDGE_DEBUG */
 
 #if NBPFILTER > 0
 #define BRIDGE_BPF_MTAP_INPUT(sc, m)                                    \
-       if (sc->sc_bpf_input)                                           \
-               bridge_bpf_input(sc->sc_ifp, m)
+       if (sc->sc_bpf_input != NULL)                                   \
+               bridge_bpf_input(sc->sc_ifp, m, __func__, __LINE__)
 #else /* NBPFILTER */
 #define BRIDGE_BPF_MTAP_INPUT(ifp, m)
 #endif /* NBPFILTER */
 #define BRIDGE_RTABLE_PRUNE_PERIOD      (5 * 60)
 #endif
 
+/*
+ * Number of MAC NAT entries
+ * - sized based on 16 clients (including MAC NAT interface)
+ *   each with 4 addresses
+ */
+#ifndef BRIDGE_MAC_NAT_ENTRY_MAX
+#define BRIDGE_MAC_NAT_ENTRY_MAX        64
+#endif /* BRIDGE_MAC_NAT_ENTRY_MAX */
+
 /*
  * List of capabilities to possibly mask on the member interface.
  */
@@ -326,6 +339,66 @@ struct bridge_iflist {
 #define BIFF_HF_IPSRC           0x40    /* host filter source IP is set */
 #define BIFF_INPUT_BROADCAST    0x80    /* send broadcast packets in */
 
+/*
+ * mac_nat_entry
+ * - translates between an IP address and MAC address on a specific
+ *   bridge interface member
+ */
+struct mac_nat_entry {
+       LIST_ENTRY(mac_nat_entry) mne_list;     /* list linkage */
+       struct bridge_iflist    *mne_bif;       /* originating interface */
+       unsigned long           mne_expire;     /* expiration time */
+       union {
+               struct in_addr  mneu_ip;        /* originating IPv4 address */
+               struct in6_addr mneu_ip6;       /* originating IPv6 address */
+       } mne_u;
+       uint8_t                 mne_mac[ETHER_ADDR_LEN];
+       uint8_t                 mne_flags;
+       uint8_t                 mne_reserved;
+};
+#define mne_ip  mne_u.mneu_ip
+#define mne_ip6 mne_u.mneu_ip6
+
+#define MNE_FLAGS_IPV6          0x01    /* IPv6 address */
+
+LIST_HEAD(mac_nat_entry_list, mac_nat_entry);
+
+/*
+ * mac_nat_record
+ * - used by bridge_mac_nat_output() to convey the translation that needs
+ *   to take place in bridge_mac_nat_translate
+ * - holds enough information so that the translation can be done later without
+ *   holding the bridge lock
+ */
+struct mac_nat_record {
+       uint16_t                mnr_ether_type;
+       union {
+               uint16_t        mnru_arp_offset;
+               struct {
+                       uint16_t mnruip_dhcp_flags;
+                       uint16_t mnruip_udp_csum;
+                       uint8_t  mnruip_header_len;
+               } mnru_ip;
+               struct {
+                       uint16_t mnruip6_icmp6_len;
+                       uint16_t mnruip6_lladdr_offset;
+                       uint8_t mnruip6_icmp6_type;
+                       uint8_t mnruip6_header_len;
+               } mnru_ip6;
+       } mnr_u;
+};
+
+#define mnr_arp_offset  mnr_u.mnru_arp_offset
+
+#define mnr_ip_header_len       mnr_u.mnru_ip.mnruip_header_len
+#define mnr_ip_dhcp_flags       mnr_u.mnru_ip.mnruip_dhcp_flags
+#define mnr_ip_udp_csum         mnr_u.mnru_ip.mnruip_udp_csum
+
+#define mnr_ip6_icmp6_len       mnr_u.mnru_ip6.mnruip6_icmp6_len
+#define mnr_ip6_icmp6_type      mnr_u.mnru_ip6.mnruip6_icmp6_type
+#define mnr_ip6_header_len      mnr_u.mnru_ip6.mnruip6_header_len
+#define mnr_ip6_lladdr_offset   mnr_u.mnru_ip6.mnruip6_lladdr_offset
+
 /*
  * Bridge route node.
  */
@@ -390,6 +463,12 @@ struct bridge_softc {
        u_char                  sc_defaddr[6];  /* Default MAC address */
        char                    sc_if_xname[IFNAMSIZ];
 
+       struct bridge_iflist    *sc_mac_nat_bif; /* single MAC NAT interface */
+       struct mac_nat_entry_list sc_mne_list;  /* MAC NAT IPv4 */
+       struct mac_nat_entry_list sc_mne_list_v6;/* MAC NAT IPv6 */
+       uint32_t                sc_mne_max;      /* max # of entries */
+       uint32_t                sc_mne_count;    /* cur. # of entries */
+       uint32_t                sc_mne_allocation_failures;
 #if BRIDGE_DEBUG
        /*
         * Locking and unlocking calling history
@@ -401,9 +480,9 @@ struct bridge_softc {
 #endif /* BRIDGE_DEBUG */
 };
 
-#define SCF_DETACHING 0x01
-#define SCF_RESIZING 0x02
-#define SCF_MEDIA_ACTIVE 0x04
+#define SCF_DETACHING            0x01
+#define SCF_RESIZING             0x02
+#define SCF_MEDIA_ACTIVE         0x04
 
 typedef enum {
        kChecksumOperationNone = 0,
@@ -419,6 +498,7 @@ decl_lck_mtx_data(static, bridge_list_mtx);
 static int      bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
 
 static zone_t   bridge_rtnode_pool = NULL;
+static zone_t   bridge_mne_pool = NULL;
 
 static int      bridge_clone_create(struct if_clone *, uint32_t, void *);
 static int      bridge_clone_destroy(struct ifnet *);
@@ -440,13 +520,15 @@ static void     bridge_ifstop(struct ifnet *, int);
 static int      bridge_output(struct ifnet *, struct mbuf *);
 static void     bridge_finalize_cksum(struct ifnet *, struct mbuf *);
 static void     bridge_start(struct ifnet *);
-__private_extern__ errno_t bridge_input(struct ifnet *, struct mbuf *, void *);
+static errno_t  bridge_input(struct ifnet *, mbuf_t *);
+static errno_t  bridge_iff_input(void *, ifnet_t, protocol_family_t,
+    mbuf_t *, char **);
 static errno_t  bridge_iff_output(void *, ifnet_t, protocol_family_t,
     mbuf_t *);
 static errno_t  bridge_member_output(struct bridge_softc *sc, ifnet_t ifp,
-    mbuf_t m);
+    mbuf_t *m);
 
-static int      bridge_enqueue(struct bridge_softc *, struct ifnet *,
+static int      bridge_enqueue(ifnet_t, struct ifnet *,
     struct ifnet *, struct mbuf *, ChecksumOperation);
 static void     bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
 
@@ -539,20 +621,24 @@ static int      bridge_ioctl_gfilt(struct bridge_softc *, void *);
 static int      bridge_ioctl_sfilt(struct bridge_softc *, void *);
 static int      bridge_ioctl_ghostfilter(struct bridge_softc *, void *);
 static int      bridge_ioctl_shostfilter(struct bridge_softc *, void *);
+static int      bridge_ioctl_gmnelist32(struct bridge_softc *, void *);
+static int      bridge_ioctl_gmnelist64(struct bridge_softc *, void *);
 #ifdef PFIL_HOOKS
 static int      bridge_pfil(struct mbuf **, struct ifnet *, struct ifnet *,
     int);
-static int      bridge_ip_checkbasic(struct mbuf **);
-#ifdef INET6
-static int      bridge_ip6_checkbasic(struct mbuf **);
-#endif /* INET6 */
 static int      bridge_fragment(struct ifnet *, struct mbuf *,
     struct ether_header *, int, struct llc *);
 #endif /* PFIL_HOOKS */
+static int bridge_ip_checkbasic(struct mbuf **);
+#ifdef INET6
+static int bridge_ip6_checkbasic(struct mbuf **);
+#endif /* INET6 */
+
+static int bridge_pf(struct mbuf **, struct ifnet *, uint32_t sc_filter_flags, int input);
 
 static errno_t bridge_set_bpf_tap(ifnet_t, bpf_tap_mode, bpf_packet_func);
-__private_extern__ errno_t bridge_bpf_input(ifnet_t, struct mbuf *);
-__private_extern__ errno_t bridge_bpf_output(ifnet_t, struct mbuf *);
+static errno_t bridge_bpf_input(ifnet_t, struct mbuf *, const char *, int);
+static errno_t bridge_bpf_output(ifnet_t, struct mbuf *);
 
 static void bridge_detach(ifnet_t);
 static void bridge_link_event(struct ifnet *, u_int32_t);
@@ -562,7 +648,21 @@ static int interface_media_active(struct ifnet *);
 static void bridge_schedule_delayed_call(struct bridge_delayed_call *);
 static void bridge_cancel_delayed_call(struct bridge_delayed_call *);
 static void bridge_cleanup_delayed_call(struct bridge_delayed_call *);
-static int bridge_host_filter(struct bridge_iflist *, struct mbuf *);
+static int bridge_host_filter(struct bridge_iflist *, mbuf_t *);
+
+static errno_t bridge_mac_nat_enable(struct bridge_softc *,
+    struct bridge_iflist *);
+static void bridge_mac_nat_disable(struct bridge_softc *sc);
+static void bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long);
+static void bridge_mac_nat_populate_entries(struct bridge_softc *sc);
+static void bridge_mac_nat_flush_entries(struct bridge_softc *sc,
+    struct bridge_iflist *);
+static ifnet_t bridge_mac_nat_input(struct bridge_softc *, mbuf_t *,
+    boolean_t *);
+static boolean_t bridge_mac_nat_output(struct bridge_softc *,
+    struct bridge_iflist *, mbuf_t *, struct mac_nat_record *);
+static void bridge_mac_nat_translate(mbuf_t *, struct mac_nat_record *,
+    const caddr_t);
 
 #define m_copypacket(m, how) m_copym(m, 0, M_COPYALL, how)
 
@@ -748,6 +848,9 @@ static const struct bridge_control bridge_control_table32[] = {
          .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
        { .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
          .bc_flags = BC_F_COPYIN | BC_F_SUSER },
+
+       { .bc_func = bridge_ioctl_gmnelist32, .bc_argsize = sizeof(struct ifbrmnelist32),
+         .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
 };
 
 static const struct bridge_control bridge_control_table64[] = {
@@ -846,6 +949,9 @@ static const struct bridge_control bridge_control_table64[] = {
          .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
        { .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
          .bc_flags = BC_F_COPYIN | BC_F_SUSER },
+
+       { .bc_func = bridge_ioctl_gmnelist64, .bc_argsize = sizeof(struct ifbrmnelist64),
+         .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
 };
 
 static const unsigned int bridge_control_table_size =
@@ -1096,6 +1202,12 @@ link_print(struct bridge_softc * sc)
        printf("\n");
 }
 
+static boolean_t
+bridge_debug_flag_is_set(uint32_t flag)
+{
+       return (if_bridge_debug & flag) != 0;
+}
+
 #endif /* BRIDGE_DEBUG */
 
 /*
@@ -1114,6 +1226,10 @@ bridgeattach(int n)
            1024 * sizeof(struct bridge_rtnode), 0, "bridge_rtnode");
        zone_change(bridge_rtnode_pool, Z_CALLERACCT, FALSE);
 
+       bridge_mne_pool = zinit(sizeof(struct mac_nat_entry),
+           256 * sizeof(struct mac_nat_entry), 0, "bridge_mac_nat_entry");
+       zone_change(bridge_mne_pool, Z_CALLERACCT, FALSE);
+
        lck_grp_attr = lck_grp_attr_alloc_init();
 
        bridge_lock_grp = lck_grp_alloc_init("if_bridge", lck_grp_attr);
@@ -1237,16 +1353,9 @@ bridge_clone_create(struct if_clone *ifc, uint32_t unit, void *params)
 
        lck_mtx_init(&sc->sc_mtx, bridge_lock_grp, bridge_lock_attr);
        sc->sc_brtmax = BRIDGE_RTABLE_MAX;
+       sc->sc_mne_max = BRIDGE_MAC_NAT_ENTRY_MAX;
        sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
-       sc->sc_filter_flags = IFBF_FILT_DEFAULT;
-#ifndef BRIDGE_IPF
-       /*
-        * For backwards compatibility with previous behaviour...
-        * Switch off filtering on the bridge itself if BRIDGE_IPF is
-        * not defined.
-        */
-       sc->sc_filter_flags &= ~IFBF_FILT_USEIPF;
-#endif
+       sc->sc_filter_flags = 0;
 
        TAILQ_INIT(&sc->sc_iflist);
 
@@ -1295,6 +1404,8 @@ bridge_clone_create(struct if_clone *ifc, uint32_t unit, void *params)
                    __func__, error);
                goto done;
        }
+       LIST_INIT(&sc->sc_mne_list);
+       LIST_INIT(&sc->sc_mne_list_v6);
        sc->sc_ifp = ifp;
        error = bridge_ifnet_set_attrs(ifp);
        if (error != 0) {
@@ -1353,7 +1464,7 @@ bridge_clone_create(struct if_clone *ifc, uint32_t unit, void *params)
        sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
 
 #if BRIDGE_DEBUG
-       if (if_bridge_debug & BR_DBGF_LIFECYCLE) {
+       if (IF_BRIDGE_DEBUG(BR_DBGF_LIFECYCLE)) {
                link_print(sc);
        }
 #endif
@@ -1512,7 +1623,7 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
        BRIDGE_LOCK_ASSERT_NOTHELD(sc);
 
 #if BRIDGE_DEBUG
-       if (if_bridge_debug & BR_DBGF_IOCTL) {
+       if (IF_BRIDGE_DEBUG(BR_DBGF_IOCTL)) {
                printf("%s: ifp %s cmd 0x%08lx (%c%c [%lu] %c %lu)\n",
                    __func__, ifp->if_xname, cmd, (cmd & IOC_IN) ? 'I' : ' ',
                    (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd),
@@ -1778,7 +1889,7 @@ bridge_set_tso(struct bridge_softc *sc)
                error = ifnet_set_offload(sc->sc_ifp, offload);
                if (error != 0) {
 #if BRIDGE_DEBUG
-                       if (if_bridge_debug & BR_DBGF_LIFECYCLE) {
+                       if (IF_BRIDGE_DEBUG(BR_DBGF_LIFECYCLE)) {
                                printf("%s: ifnet_set_offload(%s, 0x%x) "
                                    "failed %d\n", __func__,
                                    sc->sc_ifp->if_xname, offload, error);
@@ -1798,7 +1909,7 @@ bridge_set_tso(struct bridge_softc *sc)
                            tso_v4_mtu);
                        if (error != 0) {
 #if BRIDGE_DEBUG
-                               if (if_bridge_debug & BR_DBGF_LIFECYCLE) {
+                               if (IF_BRIDGE_DEBUG(BR_DBGF_LIFECYCLE)) {
                                        printf("%s: ifnet_set_tso_mtu(%s, "
                                            "AF_INET, %u) failed %d\n",
                                            __func__, sc->sc_ifp->if_xname,
@@ -1816,7 +1927,7 @@ bridge_set_tso(struct bridge_softc *sc)
                            tso_v6_mtu);
                        if (error != 0) {
 #if BRIDGE_DEBUG
-                               if (if_bridge_debug & BR_DBGF_LIFECYCLE) {
+                               if (IF_BRIDGE_DEBUG(BR_DBGF_LIFECYCLE)) {
                                        printf("%s: ifnet_set_tso_mtu(%s, "
                                            "AF_INET6, %u) failed %d\n",
                                            __func__, sc->sc_ifp->if_xname,
@@ -1897,7 +2008,7 @@ bridge_iff_input(void *cookie, ifnet_t ifp, protocol_family_t protocol,
                frmlen = (char *)mbuf_data(m) - *frame_ptr;
        }
 #if BRIDGE_DEBUG
-       if (if_bridge_debug & BR_DBGF_INPUT) {
+       if (IF_BRIDGE_DEBUG(BR_DBGF_INPUT)) {
                printf("%s: %s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
                    "frmlen %lu\n", __func__, sc->sc_ifp->if_xname,
                    ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
@@ -1905,7 +2016,7 @@ bridge_iff_input(void *cookie, ifnet_t ifp, protocol_family_t protocol,
                    (uint64_t)VM_KERNEL_ADDRPERM(*frame_ptr),
                    included ? "inside" : "outside", frmlen);
 
-               if (if_bridge_debug & BR_DBGF_MBUF) {
+               if (IF_BRIDGE_DEBUG(BR_DBGF_MBUF)) {
                        printf_mbuf(m, "bridge_iff_input[", "\n");
                        printf_ether_header((struct ether_header *)
                            (void *)*frame_ptr);
@@ -1914,28 +2025,59 @@ bridge_iff_input(void *cookie, ifnet_t ifp, protocol_family_t protocol,
                }
        }
 #endif /* BRIDGE_DEBUG */
+       if (included == 0) {
+               if (IF_BRIDGE_DEBUG(BR_DBGF_INPUT)) {
+                       printf("%s: frame_ptr outside mbuf\n", __func__);
+               }
+               goto out;
+       }
 
        /* Move data pointer to start of frame to the link layer header */
-       if (included) {
-               (void) mbuf_setdata(m, (char *)mbuf_data(m) - frmlen,
-                   mbuf_len(m) + frmlen);
-               (void) mbuf_pkthdr_adjustlen(m, frmlen);
-       } else {
-               printf("%s: frame_ptr outside mbuf\n", __func__);
+       (void) mbuf_setdata(m, (char *)mbuf_data(m) - frmlen,
+           mbuf_len(m) + frmlen);
+       (void) mbuf_pkthdr_adjustlen(m, frmlen);
+
+       /* make sure we can access the ethernet header */
+       if (mbuf_pkthdr_len(m) < sizeof(struct ether_header)) {
+               if (IF_BRIDGE_DEBUG(BR_DBGF_INPUT)) {
+                       printf("%s: short frame %lu < %lu\n", __func__,
+                           mbuf_pkthdr_len(m), sizeof(struct ether_header));
+               }
                goto out;
        }
+       if (mbuf_len(m) < sizeof(struct ether_header)) {
+               error = mbuf_pullup(data, sizeof(struct ether_header));
+               if (error != 0) {
+                       if (IF_BRIDGE_DEBUG(BR_DBGF_INPUT)) {
+                               printf("%s: mbuf_pullup(%lu) failed %d\n",
+                                   __func__, sizeof(struct ether_header),
+                                   error);
+                       }
+                       error = EJUSTRETURN;
+                       goto out;
+               }
+               if (m != *data) {
+                       m = *data;
+                       *frame_ptr = mbuf_data(m);
+               }
+       }
 
-       error = bridge_input(ifp, m, *frame_ptr);
+       error = bridge_input(ifp, data);
 
        /* Adjust packet back to original */
        if (error == 0) {
+               /* bridge_input might have modified *data */
+               if (*data != m) {
+                       m = *data;
+                       *frame_ptr = mbuf_data(m);
+               }
                (void) mbuf_setdata(m, (char *)mbuf_data(m) + frmlen,
                    mbuf_len(m) - frmlen);
                (void) mbuf_pkthdr_adjustlen(m, -frmlen);
        }
 #if BRIDGE_DEBUG
-       if ((if_bridge_debug & BR_DBGF_INPUT) &&
-           (if_bridge_debug & BR_DBGF_MBUF)) {
+       if (IF_BRIDGE_DEBUG(BR_DBGF_INPUT) &&
+           IF_BRIDGE_DEBUG(BR_DBGF_MBUF)) {
                printf("\n");
                printf_mbuf(m, "bridge_iff_input]", "\n");
        }
@@ -1962,7 +2104,7 @@ bridge_iff_output(void *cookie, ifnet_t ifp, protocol_family_t protocol,
        }
 
 #if BRIDGE_DEBUG
-       if (if_bridge_debug & BR_DBGF_OUTPUT) {
+       if (IF_BRIDGE_DEBUG(BR_DBGF_OUTPUT)) {
                printf("%s: %s from %s m 0x%llx data 0x%llx\n", __func__,
                    sc->sc_ifp->if_xname, ifp->if_xname,
                    (uint64_t)VM_KERNEL_ADDRPERM(m),
@@ -1970,7 +2112,7 @@ bridge_iff_output(void *cookie, ifnet_t ifp, protocol_family_t protocol,
        }
 #endif /* BRIDGE_DEBUG */
 
-       error = bridge_member_output(sc, ifp, m);
+       error = bridge_member_output(sc, ifp, data);
        if (error != 0 && error != EJUSTRETURN) {
                printf("%s: bridge_member_output failed error %d\n", __func__,
                    error);
@@ -1993,7 +2135,7 @@ bridge_iff_event(void *cookie, ifnet_t ifp, protocol_family_t protocol,
            event_msg->kev_class == KEV_NETWORK_CLASS &&
            event_msg->kev_subclass == KEV_DL_SUBCLASS) {
 #if BRIDGE_DEBUG
-               if (if_bridge_debug & BR_DBGF_LIFECYCLE) {
+               if (IF_BRIDGE_DEBUG(BR_DBGF_LIFECYCLE)) {
                        printf("%s: %s event_code %u - %s\n", __func__,
                            ifp->if_xname, event_msg->event_code,
                            dlil_kev_dl_code_str(event_msg->event_code));
@@ -2061,7 +2203,7 @@ bridge_iff_detached(void *cookie, ifnet_t ifp)
        struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
 
 #if BRIDGE_DEBUG
-       if (if_bridge_debug & BR_DBGF_LIFECYCLE) {
+       if (IF_BRIDGE_DEBUG(BR_DBGF_LIFECYCLE)) {
                printf("%s: %s\n", __func__, ifp->if_xname);
        }
 #endif /* BRIDGE_DEBUG */
@@ -2090,7 +2232,7 @@ bridge_attach_protocol(struct ifnet *ifp)
        struct ifnet_attach_proto_param reg;
 
 #if BRIDGE_DEBUG
-       if (if_bridge_debug & BR_DBGF_LIFECYCLE) {
+       if (IF_BRIDGE_DEBUG(BR_DBGF_LIFECYCLE)) {
                printf("%s: %s\n", __func__, ifp->if_xname);
        }
 #endif /* BRIDGE_DEBUG */
@@ -2113,7 +2255,7 @@ bridge_detach_protocol(struct ifnet *ifp)
        int     error;
 
 #if BRIDGE_DEBUG
-       if (if_bridge_debug & BR_DBGF_LIFECYCLE) {
+       if (IF_BRIDGE_DEBUG(BR_DBGF_LIFECYCLE)) {
                printf("%s: %s\n", __func__, ifp->if_xname);
        }
 #endif /* BRIDGE_DEBUG */
@@ -2144,13 +2286,21 @@ bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
        VERIFY(ifs != NULL);
 
        /*
-        * First, remove the member from the list first so it cannot be found anymore
+        * Remove the member from the list first so it cannot be found anymore
         * when we release the bridge lock below
         */
        BRIDGE_XLOCK(sc);
        TAILQ_REMOVE(&sc->sc_iflist, bif, bif_next);
        BRIDGE_XDROP(sc);
 
+       if (sc->sc_mac_nat_bif != NULL) {
+               if (bif == sc->sc_mac_nat_bif) {
+                       bridge_mac_nat_disable(sc);
+               } else {
+                       bridge_mac_nat_flush_entries(sc, bif);
+               }
+       }
+
        if (!gone) {
                switch (ifs->if_type) {
                case IFT_ETHER:
@@ -2293,6 +2443,7 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
        uint8_t eaddr[ETHER_ADDR_LEN];
        struct iff_filter iff;
        u_int32_t event_code = 0;
+       boolean_t mac_nat = FALSE;
 
        ifs = ifunit(req->ifbr_ifsname);
        if (ifs == NULL) {
@@ -2307,9 +2458,10 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
        }
 
        /* If it's in the span list, it can't be a member. */
-       TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
-       if (ifs == bif->bif_ifp) {
-               return EBUSY;
+       TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
+               if (ifs == bif->bif_ifp) {
+                       return EBUSY;
+               }
        }
 
        if (ifs->if_bridge == sc) {
@@ -2322,6 +2474,11 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
 
        switch (ifs->if_type) {
        case IFT_ETHER:
+               if (strcmp(ifs->if_name, "en") == 0 &&
+                   ifs->if_subfamily == IFNET_SUBFAMILY_WIFI) {
+                       /* XXX is there a better way to identify Wi-Fi STA? */
+                       mac_nat = TRUE;
+               }
        case IFT_L2VLAN:
                /* permitted interface types */
                break;
@@ -2332,27 +2489,36 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
                return EINVAL;
        }
 
+       /* fail to add the interface if the MTU doesn't match */
+       if (!TAILQ_EMPTY(&sc->sc_iflist) && sc->sc_ifp->if_mtu != ifs->if_mtu) {
+               printf("%s: %s: invalid MTU for %s", __func__,
+                   sc->sc_ifp->if_xname,
+                   ifs->if_xname);
+               return EINVAL;
+       }
+
+       /* there's already an interface that's doing MAC NAT */
+       if (mac_nat && sc->sc_mac_nat_bif != NULL) {
+               return EBUSY;
+       }
        bif = _MALLOC(sizeof(*bif), M_DEVBUF, M_WAITOK | M_ZERO);
        if (bif == NULL) {
                return ENOMEM;
        }
-
        bif->bif_ifp = ifs;
        ifnet_reference(ifs);
-       bif->bif_ifflags = IFBIF_LEARNING | IFBIF_DISCOVER;
+       bif->bif_ifflags |= IFBIF_LEARNING | IFBIF_DISCOVER;
 #if HAS_IF_CAP
        bif->bif_savedcaps = ifs->if_capenable;
 #endif /* HAS_IF_CAP */
        bif->bif_sc = sc;
+       if (mac_nat) {
+               (void)bridge_mac_nat_enable(sc, bif);
+       }
 
        /* Allow the first Ethernet member to define the MTU */
        if (TAILQ_EMPTY(&sc->sc_iflist)) {
                sc->sc_ifp->if_mtu = ifs->if_mtu;
-       } else if (sc->sc_ifp->if_mtu != ifs->if_mtu) {
-               printf("%s: %s: invalid MTU for %s", __func__,
-                   sc->sc_ifp->if_xname,
-                   ifs->if_xname);
-               return EINVAL;
        }
 
        /*
@@ -2523,6 +2689,8 @@ bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
        req->ifbr_proto = bp->bp_protover;
        req->ifbr_role = bp->bp_role;
        req->ifbr_stpflags = bp->bp_flags;
+       req->ifbr_ifsflags = bif->bif_ifflags;
+
        /* Copy STP state options as flags */
        if (bp->bp_operedge) {
                req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
@@ -2543,7 +2711,6 @@ bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
                req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
        }
 
-       req->ifbr_ifsflags = bif->bif_ifflags;
        req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
        req->ifbr_addrcnt = bif->bif_addrcnt;
        req->ifbr_addrmax = bif->bif_addrmax;
@@ -2571,6 +2738,15 @@ bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
                /* SPAN is readonly */
                return EINVAL;
        }
+       if ((req->ifbr_ifsflags & IFBIF_MAC_NAT) != 0) {
+               errno_t error;
+               error = bridge_mac_nat_enable(sc, bif);
+               if (error != 0) {
+                       return error;
+               }
+       } else if (sc->sc_mac_nat_bif != NULL) {
+               bridge_mac_nat_disable(sc);
+       }
 
 
 #if BRIDGESTP
@@ -3020,11 +3196,9 @@ bridge_ioctl_sfilt(struct bridge_softc *sc, void *arg)
                return EINVAL;
        }
 
-#ifndef BRIDGE_IPF
        if (param->ifbrp_filter & IFBF_FILT_USEIPF) {
                return EINVAL;
        }
-#endif
 
        sc->sc_filter_flags = param->ifbrp_filter;
 
@@ -3343,6 +3517,113 @@ bridge_ioctl_shostfilter(struct bridge_softc *sc, void *arg)
        return 0;
 }
 
+static char *
+bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,
+    unsigned int * count_p, char *buf, unsigned int *len_p)
+{
+       unsigned int            count = *count_p;
+       struct ifbrmne          ifbmne;
+       unsigned int            len = *len_p;
+       struct mac_nat_entry    *mne;
+       unsigned long           now;
+
+       bzero(&ifbmne, sizeof(ifbmne));
+       LIST_FOREACH(mne, list, mne_list) {
+               if (len < sizeof(ifbmne)) {
+                       break;
+               }
+               snprintf(ifbmne.ifbmne_ifname, sizeof(ifbmne.ifbmne_ifname),
+                   "%s", mne->mne_bif->bif_ifp->if_xname);
+               memcpy(ifbmne.ifbmne_mac, mne->mne_mac,
+                   sizeof(ifbmne.ifbmne_mac));
+               now = (unsigned long) net_uptime();
+               if (now < mne->mne_expire) {
+                       ifbmne.ifbmne_expire = mne->mne_expire - now;
+               } else {
+                       ifbmne.ifbmne_expire = 0;
+               }
+               if ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) {
+                       ifbmne.ifbmne_af = AF_INET6;
+                       ifbmne.ifbmne_ip6_addr = mne->mne_ip6;
+               } else {
+                       ifbmne.ifbmne_af = AF_INET;
+                       ifbmne.ifbmne_ip_addr = mne->mne_ip;
+               }
+               memcpy(buf, &ifbmne, sizeof(ifbmne));
+               count++;
+               buf += sizeof(ifbmne);
+               len -= sizeof(ifbmne);
+       }
+       *count_p = count;
+       *len_p = len;
+       return buf;
+}
+
+/*
+ * bridge_ioctl_gmnelist()
+ *   Perform the get mac_nat_entry list ioctl.
+ *
+ * Note:
+ *   The struct ifbrmnelist32 and struct ifbrmnelist64 have the same
+ *   field size/layout except for the last field ifbml_buf, the user-supplied
+ *   buffer pointer. That is passed in separately via the 'user_addr'
+ *   parameter from the respective 32-bit or 64-bit ioctl routine.
+ */
+static int
+bridge_ioctl_gmnelist(struct bridge_softc *sc, struct ifbrmnelist32 *mnl,
+    user_addr_t user_addr)
+{
+       unsigned int            count;
+       char                    *buf;
+       int                     error = 0;
+       char                    *outbuf = NULL;
+       struct mac_nat_entry    *mne;
+       unsigned int            buflen;
+       unsigned int            len;
+
+       mnl->ifbml_elsize = sizeof(struct ifbrmne);
+       count = 0;
+       LIST_FOREACH(mne, &sc->sc_mne_list, mne_list)
+       count++;
+       LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list)
+       count++;
+       buflen = sizeof(struct ifbrmne) * count;
+       if (buflen == 0 || mnl->ifbml_len == 0) {
+               mnl->ifbml_len = buflen;
+               return error;
+       }
+       BRIDGE_UNLOCK(sc);
+       outbuf = _MALLOC(buflen, M_TEMP, M_WAITOK | M_ZERO);
+       BRIDGE_LOCK(sc);
+       count = 0;
+       buf = outbuf;
+       len = min(mnl->ifbml_len, buflen);
+       buf = bridge_mac_nat_entry_out(&sc->sc_mne_list, &count, buf, &len);
+       buf = bridge_mac_nat_entry_out(&sc->sc_mne_list_v6, &count, buf, &len);
+       mnl->ifbml_len = count * sizeof(struct ifbrmne);
+       BRIDGE_UNLOCK(sc);
+       error = copyout(outbuf, user_addr, mnl->ifbml_len);
+       _FREE(outbuf, M_TEMP);
+       BRIDGE_LOCK(sc);
+       return error;
+}
+
+static int
+bridge_ioctl_gmnelist64(struct bridge_softc *sc, void *arg)
+{
+       struct ifbrmnelist64 *mnl = arg;
+
+       return bridge_ioctl_gmnelist(sc, arg, mnl->ifbml_buf);
+}
+
+static int
+bridge_ioctl_gmnelist32(struct bridge_softc *sc, void *arg)
+{
+       struct ifbrmnelist32 *mnl = arg;
+
+       return bridge_ioctl_gmnelist(sc, arg,
+                  CAST_USER_ADDR_T(mnl->ifbml_buf));
+}
 
 /*
  * bridge_ifdetach:
@@ -3357,7 +3638,7 @@ bridge_ifdetach(struct ifnet *ifp)
        struct bridge_softc *sc = ifp->if_bridge;
 
 #if BRIDGE_DEBUG
-       if (if_bridge_debug & BR_DBGF_LIFECYCLE) {
+       if (IF_BRIDGE_DEBUG(BR_DBGF_LIFECYCLE)) {
                printf("%s: %s\n", __func__, ifp->if_xname);
        }
 #endif /* BRIDGE_DEBUG */
@@ -3400,7 +3681,7 @@ bridge_proto_attach_changed(struct ifnet *ifp)
        struct bridge_softc *sc = ifp->if_bridge;
 
 #if BRIDGE_DEBUG
-       if (if_bridge_debug & BR_DBGF_LIFECYCLE) {
+       if (IF_BRIDGE_DEBUG(BR_DBGF_LIFECYCLE)) {
                printf("%s: %s\n", __func__, ifp->if_xname);
        }
 #endif /* BRIDGE_DEBUG */
@@ -3429,7 +3710,7 @@ bridge_proto_attach_changed(struct ifnet *ifp)
        }
        BRIDGE_UNLOCK(sc);
 #if BRIDGE_DEBUG
-       if ((if_bridge_debug & BR_DBGF_LIFECYCLE) != 0 && changed) {
+       if (IF_BRIDGE_DEBUG(BR_DBGF_LIFECYCLE)) {
                printf("%s: input broadcast %s", ifp->if_xname,
                    input_broadcast ? "ENABLED" : "DISABLED");
        }
@@ -3506,7 +3787,7 @@ bridge_iflinkevent(struct ifnet *ifp)
        u_int32_t event_code = 0;
 
 #if BRIDGE_DEBUG
-       if (if_bridge_debug & BR_DBGF_LIFECYCLE) {
+       if (IF_BRIDGE_DEBUG(BR_DBGF_LIFECYCLE)) {
                printf("%s: %s\n", __func__, ifp->if_xname);
        }
 #endif /* BRIDGE_DEBUG */
@@ -3524,6 +3805,9 @@ bridge_iflinkevent(struct ifnet *ifp)
                } else {
                        bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
                }
+               if (sc->sc_mac_nat_bif != NULL) {
+                       bridge_mac_nat_flush_entries(sc, bif);
+               }
 
                event_code = bridge_updatelinkstatus(sc);
        }
@@ -3565,7 +3849,7 @@ bridge_delayed_callback(void *param)
        BRIDGE_LOCK(sc);
 
 #if BRIDGE_DEBUG_DELAYED_CALLBACK
-       if (if_bridge_debug & BR_DBGF_DELAYED_CALL) {
+       if (IF_BRIDGE_DEBUG(BR_DBGF_DELAYED_CALL)) {
                printf("%s: %s call 0x%llx flags 0x%x\n", __func__,
                    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
                    call->bdc_flags);
@@ -3612,7 +3896,7 @@ bridge_schedule_delayed_call(struct bridge_delayed_call *call)
        call->bdc_flags = BDCF_OUTSTANDING;
 
 #if BRIDGE_DEBUG_DELAYED_CALLBACK
-       if (if_bridge_debug & BR_DBGF_DELAYED_CALL) {
+       if (IF_BRIDGE_DEBUG(BR_DBGF_DELAYED_CALL)) {
                printf("%s: %s call 0x%llx flags 0x%x\n", __func__,
                    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
                    call->bdc_flags);
@@ -3659,7 +3943,7 @@ bridge_cancel_delayed_call(struct bridge_delayed_call *call)
 
        while (call->bdc_flags & BDCF_OUTSTANDING) {
 #if BRIDGE_DEBUG
-               if (if_bridge_debug & BR_DBGF_DELAYED_CALL) {
+               if (IF_BRIDGE_DEBUG(BR_DBGF_DELAYED_CALL)) {
                        printf("%s: %s call 0x%llx flags 0x%x\n", __func__,
                            sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
                            call->bdc_flags);
@@ -3821,7 +4105,7 @@ bridge_compute_cksum(struct ifnet *src_if, struct ifnet *dst_if, struct mbuf *m)
 #endif /* INET6 */
        }
 #if BRIDGE_DEBUG
-       if (if_bridge_debug & BR_DBGF_CHECKSUM) {
+       if (IF_BRIDGE_DEBUG(BR_DBGF_CHECKSUM)) {
                printf("%s: [%s -> %s] before 0x%x did 0x%x after 0x%x\n",
                    __func__,
                    src_if->if_xname, dst_if->if_xname, csum_flags, did_sw,
@@ -3837,7 +4121,7 @@ bridge_compute_cksum(struct ifnet *src_if, struct ifnet *dst_if, struct mbuf *m)
  *
  */
 static int
-bridge_enqueue(struct bridge_softc *sc, struct ifnet *src_ifp,
+bridge_enqueue(ifnet_t bridge_ifp, struct ifnet *src_ifp,
     struct ifnet *dst_ifp, struct mbuf *m, ChecksumOperation cksum_op)
 {
        int len, error = 0;
@@ -3907,9 +4191,9 @@ bridge_enqueue(struct bridge_softc *sc, struct ifnet *src_ifp,
                }
 
                if (_error == 0) {
-                       (void) ifnet_stat_increment_out(sc->sc_ifp, 1, len, 0);
+                       (void) ifnet_stat_increment_out(bridge_ifp, 1, len, 0);
                } else {
-                       (void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
+                       (void) ifnet_stat_increment_out(bridge_ifp, 0, 0, 1);
                }
        }
 
@@ -3950,8 +4234,7 @@ bridge_dummynet(struct mbuf *m, struct ifnet *ifp)
                        return;
                }
        }
-
-       (void) bridge_enqueue(sc, NULL, ifp, m, kChecksumOperationNone);
+       (void) bridge_enqueue(sc->sc_ifp, NULL, ifp, m, kChecksumOperationNone);
 }
 #endif /* HAS_BRIDGE_DUMMYNET */
 
@@ -3965,14 +4248,18 @@ bridge_dummynet(struct mbuf *m, struct ifnet *ifp)
  *     The mbuf has the Ethernet header already attached.
  */
 static errno_t
-bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t m)
+bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t *data)
 {
+       ifnet_t bridge_ifp;
        struct ether_header *eh;
        struct ifnet *dst_if;
        uint16_t vlan;
+       struct bridge_iflist *mac_nat_bif;
+       ifnet_t mac_nat_ifp;
+       mbuf_t m = *data;
 
 #if BRIDGE_DEBUG
-       if (if_bridge_debug & BR_DBGF_OUTPUT) {
+       if (IF_BRIDGE_DEBUG(BR_DBGF_OUTPUT)) {
                printf("%s: ifp %s\n", __func__, ifp->if_xname);
        }
 #endif /* BRIDGE_DEBUG */
@@ -3980,7 +4267,8 @@ bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t m)
        if (m->m_len < ETHER_HDR_LEN) {
                m = m_pullup(m, ETHER_HDR_LEN);
                if (m == NULL) {
-                       return ENOBUFS;
+                       *data = NULL;
+                       return EJUSTRETURN;
                }
        }
 
@@ -3988,6 +4276,19 @@ bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t m)
        vlan = VLANTAGOF(m);
 
        BRIDGE_LOCK(sc);
+       mac_nat_bif = sc->sc_mac_nat_bif;
+       mac_nat_ifp = (mac_nat_bif != NULL) ? mac_nat_bif->bif_ifp : NULL;
+       if (mac_nat_ifp == ifp) {
+               /* record the IP address used by the MAC NAT interface */
+               (void)bridge_mac_nat_output(sc, mac_nat_bif, data, NULL);
+               m = *data;
+               if (m == NULL) {
+                       /* packet was deallocated */
+                       BRIDGE_UNLOCK(sc);
+                       return EJUSTRETURN;
+               }
+       }
+       bridge_ifp = sc->sc_ifp;
 
        /*
         * APPLE MODIFICATION
@@ -4004,7 +4305,7 @@ bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t m)
         * go ahead and send out that interface.  Otherwise, the packet
         * is dropped below.
         */
-       if ((sc->sc_ifp->if_flags & IFF_RUNNING) == 0) {
+       if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
                dst_if = ifp;
                goto sendunicast;
        }
@@ -4030,7 +4331,7 @@ bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t m)
                BRIDGE_LOCK2REF(sc, error);
                if (error != 0) {
                        m_freem(m);
-                       return error;
+                       return EJUSTRETURN;
                }
 
                TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
@@ -4046,16 +4347,26 @@ bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t m)
                        if ((dst_if->if_flags & IFF_RUNNING) == 0) {
                                continue;
                        }
-
-                       /*
-                        * If this is not the original output interface,
-                        * and the interface is participating in spanning
-                        * tree, make sure the port is in a state that
-                        * allows forwarding.
-                        */
-                       if (dst_if != ifp && (bif->bif_ifflags & IFBIF_STP) &&
-                           bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
-                               continue;
+                       if (dst_if != ifp) {
+                               /*
+                                * If this is not the original output interface,
+                                * and the interface is participating in spanning
+                                * tree, make sure the port is in a state that
+                                * allows forwarding.
+                                */
+                               if ((bif->bif_ifflags & IFBIF_STP) &&
+                                   bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
+                                       continue;
+                               }
+                               /*
+                                * If this is not the original output interface,
+                                * and the destination is the MAC NAT interface,
+                                * drop the packet. The packet can't be sent
+                                * if the source MAC is incorrect.
+                                */
+                               if (dst_if == mac_nat_ifp) {
+                                       continue;
+                               }
                        }
                        if (TAILQ_NEXT(bif, bif_next) == NULL) {
                                used = 1;
@@ -4064,12 +4375,12 @@ bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t m)
                                mc = m_dup(m, M_DONTWAIT);
                                if (mc == NULL) {
                                        (void) ifnet_stat_increment_out(
-                                               sc->sc_ifp, 0, 0, 1);
+                                               bridge_ifp, 0, 0, 1);
                                        continue;
                                }
                        }
-                       (void) bridge_enqueue(sc, ifp, dst_if, mc,
-                           kChecksumOperationCompute);
+                       (void) bridge_enqueue(bridge_ifp, ifp, dst_if,
+                           mc, kChecksumOperationCompute);
                }
                if (used == 0) {
                        m_freem(m);
@@ -4095,8 +4406,18 @@ sendunicast:
                /* just let the packet continue on its way */
                return 0;
        }
-       (void) bridge_enqueue(sc, ifp, dst_if, m,
-           kChecksumOperationCompute);
+       if (dst_if != mac_nat_ifp) {
+               (void) bridge_enqueue(bridge_ifp, ifp, dst_if, m,
+                   kChecksumOperationCompute);
+       } else {
+               /*
+                * This is not the original output interface
+                * and the destination is the MAC NAT interface.
+                * Drop the packet because the packet can't be sent
+                * if the source MAC is incorrect.
+                */
+               m_freem(m);
+       }
        return EJUSTRETURN;
 }
 
@@ -4111,11 +4432,10 @@ bridge_output(struct ifnet *ifp, struct mbuf *m)
 {
        struct bridge_softc *sc = ifnet_softc(ifp);
        struct ether_header *eh;
-       struct ifnet *dst_if;
+       struct ifnet *dst_if = NULL;
        int error = 0;
 
        eh = mtod(m, struct ether_header *);
-       dst_if = NULL;
 
        BRIDGE_LOCK(sc);
 
@@ -4135,8 +4455,11 @@ bridge_output(struct ifnet *ifp, struct mbuf *m)
                /* callee will unlock */
                bridge_broadcast(sc, NULL, m, 0);
        } else {
+               ifnet_t bridge_ifp;
+
+               bridge_ifp = sc->sc_ifp;
                BRIDGE_UNLOCK(sc);
-               error = bridge_enqueue(sc, NULL, dst_if, m,
+               error = bridge_enqueue(bridge_ifp, NULL, dst_if, m,
                    kChecksumOperationFinalize);
        }
 
@@ -4244,25 +4567,29 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
     struct mbuf *m)
 {
        struct bridge_iflist *dbif;
-       struct ifnet *src_if, *dst_if, *ifp;
+       ifnet_t bridge_ifp;
+       struct ifnet *src_if, *dst_if;
        struct ether_header *eh;
        uint16_t vlan;
        uint8_t *dst;
        int error;
+       struct mac_nat_record mnr;
+       boolean_t translate_mac = FALSE;
+       uint32_t sc_filter_flags = 0;
 
        BRIDGE_LOCK_ASSERT_HELD(sc);
 
+       bridge_ifp = sc->sc_ifp;
 #if BRIDGE_DEBUG
-       if (if_bridge_debug & BR_DBGF_OUTPUT) {
-               printf("%s: %s m 0x%llx\n", __func__, sc->sc_ifp->if_xname,
+       if (IF_BRIDGE_DEBUG(BR_DBGF_OUTPUT)) {
+               printf("%s: %s m 0x%llx\n", __func__, bridge_ifp->if_xname,
                    (uint64_t)VM_KERNEL_ADDRPERM(m));
        }
 #endif /* BRIDGE_DEBUG */
 
        src_if = m->m_pkthdr.rcvif;
-       ifp = sc->sc_ifp;
 
-       (void) ifnet_stat_increment_in(ifp, 1, m->m_pkthdr.len, 0);
+       (void) ifnet_stat_increment_in(bridge_ifp, 1, m->m_pkthdr.len, 0);
        vlan = VLANTAGOF(m);
 
 
@@ -4324,7 +4651,7 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
 
 
                /* ...forward it to all interfaces. */
-               atomic_add_64(&ifp->if_imcasts, 1);
+               atomic_add_64(&bridge_ifp->if_imcasts, 1);
                dst_if = NULL;
        }
 
@@ -4340,10 +4667,8 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
 #if NBPFILTER > 0
        if (eh->ether_type == htons(ETHERTYPE_RSN_PREAUTH) ||
            dst_if != NULL || (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
-               m->m_pkthdr.rcvif = ifp;
-               if (sc->sc_bpf_input) {
-                       bridge_bpf_input(ifp, m);
-               }
+               m->m_pkthdr.rcvif = bridge_ifp;
+               BRIDGE_BPF_MTAP_INPUT(sc, m);
        }
 #endif /* NBPFILTER */
 
@@ -4351,7 +4676,7 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
        /* run the packet filter */
        if (PFIL_HOOKED(&inet_pfil_hook) || PFIL_HOOKED_INET6) {
                BRIDGE_UNLOCK(sc);
-               if (bridge_pfil(&m, ifp, src_if, PFIL_IN) != 0) {
+               if (bridge_pfil(&m, bridge_ifp, src_if, PFIL_IN) != 0) {
                        return;
                }
                if (m == NULL) {
@@ -4362,6 +4687,7 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
 #endif /* PFIL_HOOKS */
 
        if (dst_if == NULL) {
+               /* bridge_broadcast will unlock */
                bridge_broadcast(sc, src_if, m, 1);
                return;
        }
@@ -4398,17 +4724,27 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
        if ((dst_if->if_extflags & IFEXTF_DHCPRA_MASK) != 0) {
                m = ip_xdhcpra_output(dst_if, m);
                if (!m) {
-                       ++sc->sc_sc.sc_ifp.if_xdhcpra;
+                       ++bridge_ifp.if_xdhcpra;
+                       BRIDGE_UNLOCK(sc);
                        return;
                }
        }
 #endif /* HAS_DHCPRA_MASK */
 
-       BRIDGE_UNLOCK(sc);
+       if (dbif == sc->sc_mac_nat_bif) {
+               /* determine how to translate the packet */
+               translate_mac
+                       = bridge_mac_nat_output(sc, sbif, &m, &mnr);
+               if (m == NULL) {
+                       /* packet was deallocated */
+                       BRIDGE_UNLOCK(sc);
+                       return;
+               }
+       }
 
 #if defined(PFIL_HOOKS)
        if (PFIL_HOOKED(&inet_pfil_hook) || PFIL_HOOKED_INET6) {
-               if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0) {
+               if (bridge_pfil(&m, bridge_ifp, dst_if, PFIL_OUT) != 0) {
                        return;
                }
                if (m == NULL) {
@@ -4417,6 +4753,21 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
        }
 #endif /* PFIL_HOOKS */
 
+       sc_filter_flags = sc->sc_filter_flags;
+       BRIDGE_UNLOCK(sc);
+       if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
+               if (bridge_pf(&m, dst_if, sc_filter_flags, FALSE) != 0) {
+                       return;
+               }
+               if (m == NULL) {
+                       return;
+               }
+       }
+
+       /* if we need to, translate the MAC address */
+       if (translate_mac) {
+               bridge_mac_nat_translate(&m, &mnr, IF_LLADDR(dst_if));
+       }
        /*
         * This is an inbound packet where the checksum
         * (if applicable) is already present/valid. Since
@@ -4424,7 +4775,10 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
         * forwarding), there's no need to validate the checksum.
         * Clear the checksum offload flags and send it along.
         */
-       (void) bridge_enqueue(sc, NULL, dst_if, m, kChecksumOperationClear);
+       if (m != NULL) {
+               (void) bridge_enqueue(bridge_ifp, NULL, dst_if, m,
+                   kChecksumOperationClear);
+       }
        return;
 
 drop:
@@ -4434,9 +4788,7 @@ drop:
 
 #if BRIDGE_DEBUG
 
-char *ether_ntop(char *, size_t, const u_char *);
-
-__private_extern__ char *
+static char *
 ether_ntop(char *buf, size_t len, const u_char *ap)
 {
        snprintf(buf, len, "%02x:%02x:%02x:%02x:%02x:%02x",
@@ -4447,27 +4799,44 @@ ether_ntop(char *buf, size_t len, const u_char *ap)
 
 #endif /* BRIDGE_DEBUG */
 
+static void
+inject_input_packet(ifnet_t ifp, mbuf_t m)
+{
+       mbuf_pkthdr_setrcvif(m, ifp);
+       mbuf_pkthdr_setheader(m, mbuf_data(m));
+       mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
+           mbuf_len(m) - ETHER_HDR_LEN);
+       mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
+       m->m_flags |= M_PROTO1; /* set to avoid loops */
+       dlil_input_packet_list(ifp, m);
+       return;
+}
+
 /*
  * bridge_input:
  *
  *     Filter input from a member interface.  Queue the packet for
  *     bridging if it is not for us.
  */
-__private_extern__ errno_t
-bridge_input(struct ifnet *ifp, struct mbuf *m, void *frame_header)
+errno_t
+bridge_input(struct ifnet *ifp, mbuf_t *data)
 {
        struct bridge_softc *sc = ifp->if_bridge;
        struct bridge_iflist *bif, *bif2;
-       struct ifnet *bifp;
+       ifnet_t bridge_ifp;
        struct ether_header *eh;
        struct mbuf *mc, *mc2;
        uint16_t vlan;
-       int error;
+       errno_t error;
+       boolean_t is_ifp_mac = FALSE;
+       mbuf_t m = *data;
+       uint32_t sc_filter_flags = 0;
 
+       bridge_ifp = sc->sc_ifp;
 #if BRIDGE_DEBUG
-       if (if_bridge_debug & BR_DBGF_INPUT) {
+       if (IF_BRIDGE_DEBUG(BR_DBGF_INPUT)) {
                printf("%s: %s from %s m 0x%llx data 0x%llx\n", __func__,
-                   sc->sc_ifp->if_xname, ifp->if_xname,
+                   bridge_ifp->if_xname, ifp->if_xname,
                    (uint64_t)VM_KERNEL_ADDRPERM(m),
                    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)));
        }
@@ -4475,15 +4844,14 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, void *frame_header)
 
        if ((sc->sc_ifp->if_flags & IFF_RUNNING) == 0) {
 #if BRIDGE_DEBUG
-               if (if_bridge_debug & BR_DBGF_INPUT) {
+               if (IF_BRIDGE_DEBUG(BR_DBGF_INPUT)) {
                        printf("%s: %s not running passing along\n",
-                           __func__, sc->sc_ifp->if_xname);
+                           __func__, bridge_ifp->if_xname);
                }
 #endif /* BRIDGE_DEBUG */
                return 0;
        }
 
-       bifp = sc->sc_ifp;
        vlan = VLANTAGOF(m);
 
 #ifdef IFF_MONITOR
@@ -4493,10 +4861,10 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, void *frame_header)
         * the bpf(4) machinery, but before we do, increment the byte and
         * packet counters associated with this interface.
         */
-       if ((bifp->if_flags & IFF_MONITOR) != 0) {
-               m->m_pkthdr.rcvif  = bifp;
+       if ((bridge_ifp->if_flags & IFF_MONITOR) != 0) {
+               m->m_pkthdr.rcvif  = bridge_ifp;
                BRIDGE_BPF_MTAP_INPUT(sc, m);
-               (void) ifnet_stat_increment_in(bifp, 1, m->m_pkthdr.len, 0);
+               (void) ifnet_stat_increment_in(bridge_ifp, 1, m->m_pkthdr.len, 0);
                m_freem(m);
                return EJUSTRETURN;
        }
@@ -4510,29 +4878,47 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, void *frame_header)
                mbuf_setflags_mask(m, 0, MBUF_PROMISC);
        }
 
+       sc_filter_flags = sc->sc_filter_flags;
+       if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
+               error = bridge_pf(&m, ifp, sc_filter_flags, TRUE);
+               if (error != 0) {
+                       return EJUSTRETURN;
+               }
+               if (m == NULL) {
+                       return EJUSTRETURN;
+               }
+               /*
+                * bridge_pf could have modified the pointer on success in order
+                * to do its processing. Updated data such that we don't use a
+                * stale pointer.
+                */
+               *data = m;
+       }
+
        BRIDGE_LOCK(sc);
        bif = bridge_lookup_member_if(sc, ifp);
        if (bif == NULL) {
                BRIDGE_UNLOCK(sc);
 #if BRIDGE_DEBUG
-               if (if_bridge_debug & BR_DBGF_INPUT) {
+               if (IF_BRIDGE_DEBUG(BR_DBGF_INPUT)) {
                        printf("%s: %s bridge_lookup_member_if failed\n",
-                           __func__, sc->sc_ifp->if_xname);
+                           __func__, bridge_ifp->if_xname);
                }
 #endif /* BRIDGE_DEBUG */
                return 0;
        }
 
        if (bif->bif_flags & BIFF_HOST_FILTER) {
-               error = bridge_host_filter(bif, m);
+               error = bridge_host_filter(bif, data);
                if (error != 0) {
-                       if (if_bridge_debug & BR_DBGF_INPUT) {
+                       if (IF_BRIDGE_DEBUG(BR_DBGF_INPUT)) {
                                printf("%s: %s bridge_host_filter failed\n",
                                    __func__, bif->bif_ifp->if_xname);
                        }
                        BRIDGE_UNLOCK(sc);
                        return EJUSTRETURN;
                }
+               m = *data;
        }
 
        eh = mtod(m, struct ether_header *);
@@ -4541,7 +4927,7 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, void *frame_header)
 
        if (m->m_flags & (M_BCAST | M_MCAST)) {
 #if BRIDGE_DEBUG
-               if (if_bridge_debug & BR_DBGF_MCAST) {
+               if (IF_BRIDGE_DEBUG(BR_DBGF_MCAST)) {
                        if ((m->m_flags & M_MCAST)) {
                                printf("%s: multicast: "
                                    "%02x:%02x:%02x:%02x:%02x:%02x\n",
@@ -4598,7 +4984,7 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, void *frame_header)
                 * here from ether_input as a bridge is never a member of a
                 * bridge.
                 */
-               VERIFY(bifp->if_bridge == NULL);
+               VERIFY(bridge_ifp->if_bridge == NULL);
                mc2 = m_dup(m, M_DONTWAIT);
                if (mc2 != NULL) {
                        /* Keep the layer3 header aligned */
@@ -4607,30 +4993,27 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, void *frame_header)
                }
                if (mc2 != NULL) {
                        /* mark packet as arriving on the bridge */
-                       mc2->m_pkthdr.rcvif = bifp;
+                       mc2->m_pkthdr.rcvif = bridge_ifp;
                        mc2->m_pkthdr.pkt_hdr = mbuf_data(mc2);
 
-#if NBPFILTER > 0
-                       if (sc->sc_bpf_input) {
-                               bridge_bpf_input(bifp, mc2);
-                       }
-#endif /* NBPFILTER */
+                       BRIDGE_BPF_MTAP_INPUT(sc, m);
+
                        (void) mbuf_setdata(mc2,
                            (char *)mbuf_data(mc2) + ETHER_HDR_LEN,
                            mbuf_len(mc2) - ETHER_HDR_LEN);
                        (void) mbuf_pkthdr_adjustlen(mc2, -ETHER_HDR_LEN);
 
-                       (void) ifnet_stat_increment_in(bifp, 1,
+                       (void) ifnet_stat_increment_in(bridge_ifp, 1,
                            mbuf_pkthdr_len(mc2), 0);
 
 #if BRIDGE_DEBUG
-                       if (if_bridge_debug & BR_DBGF_MCAST) {
+                       if (IF_BRIDGE_DEBUG(BR_DBGF_MCAST)) {
                                printf("%s: %s mcast for us\n", __func__,
-                                   sc->sc_ifp->if_xname);
+                                   bridge_ifp->if_xname);
                        }
 #endif /* BRIDGE_DEBUG */
 
-                       dlil_input_packet_list(bifp, mc2);
+                       dlil_input_packet_list(bridge_ifp, mc2);
                }
 
                /* Return the original packet for local processing. */
@@ -4699,13 +5082,7 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, void *frame_header)
                        }                                               \
                }                                                       \
                BRIDGE_UNLOCK(sc);                                      \
-               mbuf_pkthdr_setrcvif(m, iface);                         \
-               mbuf_pkthdr_setheader(m, mbuf_data(m));                 \
-               mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,   \
-                            mbuf_len(m) - ETHER_HDR_LEN);              \
-               mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);               \
-               m->m_flags |= M_PROTO1; /* set to avoid loops */        \
-               dlil_input_packet_list(iface, m);                       \
+               inject_input_packet(iface, m);                          \
                return (EJUSTRETURN);                                   \
        }                                                               \
                                                                         \
@@ -4720,16 +5097,44 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, void *frame_header)
        /*
         * Unicast.
         */
+       if (memcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0) {
+               is_ifp_mac = TRUE;
+       }
+
+       /* handle MAC-NAT if enabled */
+       if (is_ifp_mac && sc->sc_mac_nat_bif == bif) {
+               ifnet_t dst_if;
+               boolean_t is_input = FALSE;
+
+               dst_if = bridge_mac_nat_input(sc, data, &is_input);
+               m = *data;
+               if (dst_if == ifp) {
+                       /* our input packet */
+               } else if (dst_if != NULL || m == NULL) {
+                       BRIDGE_UNLOCK(sc);
+                       if (dst_if != NULL) {
+                               ASSERT(m != NULL);
+                               if (is_input) {
+                                       inject_input_packet(dst_if, m);
+                               } else {
+                                       (void)bridge_enqueue(bridge_ifp, NULL,
+                                           dst_if, m,
+                                           kChecksumOperationClear);
+                               }
+                       }
+                       return EJUSTRETURN;
+               }
+       }
+
        /*
-        * If the packet is for us, set the packets source as the
-        * bridge, and return the packet back to ether_input for
-        * local processing.
+        * If the packet is for the bridge, set the packet's source interface
+        * and return the packet back to ether_input for local processing.
         */
-       if (memcmp(eh->ether_dhost, IF_LLADDR(bifp),
-           ETHER_ADDR_LEN) == 0 || CARP_CHECK_WE_ARE_DST(bifp)) {
+       if (memcmp(eh->ether_dhost, IF_LLADDR(bridge_ifp),
+           ETHER_ADDR_LEN) == 0 || CARP_CHECK_WE_ARE_DST(bridge_ifp)) {
                /* Mark the packet as arriving on the bridge interface */
-               (void) mbuf_pkthdr_setrcvif(m, bifp);
-               mbuf_pkthdr_setheader(m, frame_header);
+               (void) mbuf_pkthdr_setrcvif(m, bridge_ifp);
+               mbuf_pkthdr_setheader(m, mbuf_data(m));
 
                /*
                 * If the interface is learning, and the source
@@ -4747,18 +5152,18 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, void *frame_header)
                    mbuf_len(m) - ETHER_HDR_LEN);
                (void) mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
 
-               (void) ifnet_stat_increment_in(bifp, 1, mbuf_pkthdr_len(m), 0);
+               (void) ifnet_stat_increment_in(bridge_ifp, 1, mbuf_pkthdr_len(m), 0);
 
                BRIDGE_UNLOCK(sc);
 
 #if BRIDGE_DEBUG
-               if (if_bridge_debug & BR_DBGF_INPUT) {
+               if (IF_BRIDGE_DEBUG(BR_DBGF_INPUT)) {
                        printf("%s: %s packet for bridge\n", __func__,
-                           sc->sc_ifp->if_xname);
+                           bridge_ifp->if_xname);
                }
 #endif /* BRIDGE_DEBUG */
 
-               dlil_input_packet_list(bifp, m);
+               dlil_input_packet_list(bridge_ifp, m);
 
                return EJUSTRETURN;
        }
@@ -4776,7 +5181,7 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, void *frame_header)
         * bridge's own MAC address, because the bridge may be
         * using the SAME MAC address as one of its interfaces
         */
-       if (memcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0) {
+       if (is_ifp_mac) {
 
 #ifdef VERY_VERY_VERY_DIAGNOSTIC
                printf("%s: not forwarding packet bound for member "
@@ -4821,27 +5226,40 @@ static void
 bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
     struct mbuf *m, int runfilt)
 {
-#ifndef PFIL_HOOKS
-#pragma unused(runfilt)
-#endif
+       ifnet_t bridge_ifp;
        struct bridge_iflist *dbif, *sbif;
        struct mbuf *mc;
        struct mbuf *mc_in;
        struct ifnet *dst_if;
        int error = 0, used = 0;
-       boolean_t is_output;
+       boolean_t bridge_if_out;
        ChecksumOperation cksum_op;
+       struct mac_nat_record mnr;
+       struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
+       boolean_t translate_mac = FALSE;
+       uint32_t sc_filter_flags = 0;
 
+       bridge_ifp = sc->sc_ifp;
        if (src_if != NULL) {
-               is_output = FALSE;
+               bridge_if_out = FALSE;
                cksum_op = kChecksumOperationClear;
                sbif = bridge_lookup_member_if(sc, src_if);
+               if (sbif != NULL && mac_nat_bif != NULL && sbif != mac_nat_bif) {
+                       /* get the translation record while holding the lock */
+                       translate_mac
+                               = bridge_mac_nat_output(sc, sbif, &m, &mnr);
+                       if (m == NULL) {
+                               /* packet was deallocated */
+                               BRIDGE_UNLOCK(sc);
+                               return;
+                       }
+               }
        } else {
                /*
                 * src_if is NULL when the bridge interface calls
                 * bridge_broadcast().
                 */
-               is_output = TRUE;
+               bridge_if_out = TRUE;
                cksum_op = kChecksumOperationFinalize;
                sbif = NULL;
        }
@@ -4855,7 +5273,7 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
 #ifdef PFIL_HOOKS
        /* Filter on the bridge interface before broadcasting */
        if (runfilt && (PFIL_HOOKED(&inet_pfil_hook) || PFIL_HOOKED_INET6)) {
-               if (bridge_pfil(&m, sc->sc_ifp, NULL, PFIL_OUT) != 0) {
+               if (bridge_pfil(&m, bridge_ifp, NULL, PFIL_OUT) != 0) {
                        goto out;
                }
                if (m == NULL) {
@@ -4863,7 +5281,6 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
                }
        }
 #endif /* PFIL_HOOKS */
-
        TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
                dst_if = dbif->bif_ifp;
                if (dst_if == src_if) {
@@ -4901,7 +5318,7 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
                } else {
                        mc = m_dup(m, M_DONTWAIT);
                        if (mc == NULL) {
-                               (void) ifnet_stat_increment_out(sc->sc_ifp,
+                               (void) ifnet_stat_increment_out(bridge_ifp,
                                    0, 0, 1);
                                continue;
                        }
@@ -4911,7 +5328,7 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
                 * If broadcast input is enabled, do so only if this
                 * is an input packet.
                 */
-               if (!is_output &&
+               if (!bridge_if_out &&
                    (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
                        mc_in = m_dup(mc, M_DONTWAIT);
                        /* this could fail, but we continue anyways */
@@ -4933,7 +5350,7 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
                                mc = m_copyup(mc, i, ETHER_ALIGN);
                                if (mc == NULL) {
                                        (void) ifnet_stat_increment_out(
-                                               sc->sc_ifp, 0, 0, 1);
+                                               bridge_ifp, 0, 0, 1);
                                        if (mc_in != NULL) {
                                                m_freem(mc_in);
                                        }
@@ -4956,7 +5373,48 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
 #endif /* PFIL_HOOKS */
 
                /* out */
-               (void) bridge_enqueue(sc, NULL, dst_if, mc, cksum_op);
+               if (translate_mac && mac_nat_bif == dbif) {
+                       /* translate the packet without holding the lock */
+                       bridge_mac_nat_translate(&mc, &mnr, IF_LLADDR(dst_if));
+               }
+
+               sc_filter_flags = sc->sc_filter_flags;
+               if (runfilt &&
+                   PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
+                       if (used == 0) {
+                               /* Keep the layer3 header aligned */
+                               int i = min(mc->m_pkthdr.len, max_protohdr);
+                               mc = m_copyup(mc, i, ETHER_ALIGN);
+                               if (mc == NULL) {
+                                       (void) ifnet_stat_increment_out(
+                                               sc->sc_ifp, 0, 0, 1);
+                                       if (mc_in != NULL) {
+                                               m_freem(mc_in);
+                                               mc_in = NULL;
+                                       }
+                                       continue;
+                               }
+                       }
+                       if (bridge_pf(&mc, dst_if, sc_filter_flags, FALSE) != 0) {
+                               if (mc_in != NULL) {
+                                       m_freem(mc_in);
+                                       mc_in = NULL;
+                               }
+                               continue;
+                       }
+                       if (mc == NULL) {
+                               if (mc_in != NULL) {
+                                       m_freem(mc_in);
+                                       mc_in = NULL;
+                               }
+                               continue;
+                       }
+               }
+
+               if (mc != NULL) {
+                       (void) bridge_enqueue(bridge_ifp,
+                           NULL, dst_if, mc, cksum_op);
+               }
 
                /* in */
                if (mc_in == NULL) {
@@ -5012,7 +5470,7 @@ bridge_span(struct bridge_softc *sc, struct mbuf *m)
                        continue;
                }
 
-               (void) bridge_enqueue(sc, NULL, dst_if, mc,
+               (void) bridge_enqueue(sc->sc_ifp, NULL, dst_if, mc,
                    kChecksumOperationNone);
        }
 }
@@ -5067,6 +5525,9 @@ bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan,
                 */
                brt = zalloc_noblock(bridge_rtnode_pool);
                if (brt == NULL) {
+                       if (IF_BRIDGE_DEBUG(BR_DBGF_RT_TABLE)) {
+                               printf("%s: zalloc_nolock failed", __func__);
+                       }
                        return ENOMEM;
                }
                bzero(brt, sizeof(struct bridge_rtnode));
@@ -5088,7 +5549,7 @@ bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan,
                brt->brt_dst = bif;
                bif->bif_addrcnt++;
 #if BRIDGE_DEBUG
-               if (if_bridge_debug & BR_DBGF_RT_TABLE) {
+               if (IF_BRIDGE_DEBUG(BR_DBGF_RT_TABLE)) {
                        printf("%s: added %02x:%02x:%02x:%02x:%02x:%02x "
                            "on %s count %u hashsize %u\n", __func__,
                            dst[0], dst[1], dst[2], dst[3], dst[4], dst[5],
@@ -5184,7 +5645,6 @@ bridge_aging_timer(struct bridge_softc *sc)
        BRIDGE_LOCK_ASSERT_HELD(sc);
 
        bridge_rtage(sc);
-
        if ((sc->sc_ifp->if_flags & IFF_RUNNING) &&
            (sc->sc_flags & SCF_DETACHING) == 0) {
                sc->sc_aging_timer.bdc_sc = sc;
@@ -5216,6 +5676,9 @@ bridge_rtage(struct bridge_softc *sc)
                        }
                }
        }
+       if (sc->sc_mac_nat_bif != NULL) {
+               bridge_mac_nat_age_entries(sc, now);
+       }
 }
 
 /*
@@ -5265,7 +5728,7 @@ bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
 /*
  * bridge_rtdelete:
  *
- *     Delete routes to a speicifc member interface.
+ *     Delete routes to a specific member interface.
  */
 static void
 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
@@ -5382,7 +5845,7 @@ bridge_rthash_delayed_resize(struct bridge_softc *sc)
 out:
        if (error == 0) {
 #if BRIDGE_DEBUG
-               if (if_bridge_debug & BR_DBGF_RT_TABLE) {
+               if (IF_BRIDGE_DEBUG(BR_DBGF_RT_TABLE)) {
                        printf("%s: %s new size %u\n", __func__,
                            sc->sc_ifp->if_xname, sc->sc_rthash_size);
                }
@@ -5555,7 +6018,7 @@ bridge_rtnode_hash(struct bridge_softc *sc, struct bridge_rtnode *brt)
                dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
                if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) {
 #if BRIDGE_DEBUG
-                       if (if_bridge_debug & BR_DBGF_RT_TABLE) {
+                       if (IF_BRIDGE_DEBUG(BR_DBGF_RT_TABLE)) {
                                printf("%s: %s EEXIST "
                                    "%02x:%02x:%02x:%02x:%02x:%02x\n",
                                    __func__, sc->sc_ifp->if_xname,
@@ -5578,7 +6041,7 @@ bridge_rtnode_hash(struct bridge_softc *sc, struct bridge_rtnode *brt)
        } while (lbrt != NULL);
 
 #if BRIDGE_DEBUG
-       if (if_bridge_debug & BR_DBGF_RT_TABLE) {
+       if (IF_BRIDGE_DEBUG(BR_DBGF_RT_TABLE)) {
                printf("%s: %s impossible %02x:%02x:%02x:%02x:%02x:%02x\n",
                    __func__, sc->sc_ifp->if_xname,
                    brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2],
@@ -6004,6 +6467,7 @@ bad:
        *mp = NULL;
        return error;
 }
+#endif /* PFIL_HOOKS */
 
 /*
  * Perform basic checks on header size since
@@ -6037,7 +6501,7 @@ bridge_ip_checkbasic(struct mbuf **mp)
                        ipstat.ips_toosmall++;
                        goto bad;
                }
-       } else if (__predict_false(m->m_len < sizeof(struct ip))) {
+       } else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip), 0)) {
                if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
                        ipstat.ips_toosmall++;
                        goto bad;
@@ -6048,12 +6512,12 @@ bridge_ip_checkbasic(struct mbuf **mp)
                goto bad;
        }
 
-       if (ip->ip_v != IPVERSION) {
+       if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
                ipstat.ips_badvers++;
                goto bad;
        }
-       hlen = ip->ip_hl << 2;
-       if (hlen < sizeof(struct ip)) {  /* minimum header length */
+       hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+       if (hlen < (int)sizeof(struct ip)) {  /* minimum header length */
                ipstat.ips_badhlen++;
                goto bad;
        }
@@ -6140,7 +6604,7 @@ bridge_ip6_checkbasic(struct mbuf **mp)
                        in6_ifstat_inc(inifp, ifs6_in_hdrerr);
                        goto bad;
                }
-       } else if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
+       } else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip6_hdr), 0)) {
                struct ifnet *inifp = m->m_pkthdr.rcvif;
                if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
                        ip6stat.ip6s_toosmall++;
@@ -6167,6 +6631,7 @@ bad:
 }
 #endif /* INET6 */
 
+#ifdef PFIL_HOOKS
 /*
  * bridge_fragment:
  *
@@ -6299,18 +6764,19 @@ bridge_detach(ifnet_t ifp)
  *
  *     Invoke the input BPF callback if enabled
  */
-__private_extern__ errno_t
-bridge_bpf_input(ifnet_t ifp, struct mbuf *m)
+static errno_t
+bridge_bpf_input(ifnet_t ifp, struct mbuf *m, const char * func, int line)
 {
        struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
+       bpf_packet_func     input_func = sc->sc_bpf_input;
 
-       if (sc->sc_bpf_input) {
+       if (input_func != NULL) {
                if (mbuf_pkthdr_rcvif(m) != ifp) {
-                       printf("%s: rcvif: 0x%llx != ifp 0x%llx\n", __func__,
+                       printf("%s.%d: rcvif: 0x%llx != ifp 0x%llx\n", func, line,
                            (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
                            (uint64_t)VM_KERNEL_ADDRPERM(ifp));
                }
-               (*sc->sc_bpf_input)(ifp, m);
+               (*input_func)(ifp, m);
        }
        return 0;
 }
@@ -6320,13 +6786,14 @@ bridge_bpf_input(ifnet_t ifp, struct mbuf *m)
  *
  *     Invoke the output BPF callback if enabled
  */
-__private_extern__ errno_t
+static errno_t
 bridge_bpf_output(ifnet_t ifp, struct mbuf *m)
 {
        struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
+       bpf_packet_func     output_func = sc->sc_bpf_output;
 
-       if (sc->sc_bpf_output) {
-               (*sc->sc_bpf_output)(ifp, m);
+       if (output_func != NULL) {
+               (*output_func)(ifp, m);
        }
        return 0;
 }
@@ -6346,7 +6813,7 @@ bridge_link_event(struct ifnet *ifp, u_int32_t event_code)
        } event;
 
 #if BRIDGE_DEBUG
-       if (if_bridge_debug & BR_DBGF_LIFECYCLE) {
+       if (IF_BRIDGE_DEBUG(BR_DBGF_LIFECYCLE)) {
                printf("%s: %s event_code %u - %s\n", __func__, ifp->if_xname,
                    event_code, dlil_kev_dl_code_str(event_code));
        }
@@ -6364,12 +6831,13 @@ bridge_link_event(struct ifnet *ifp, u_int32_t event_code)
        ifnet_event(ifp, &event.header);
 }
 
-#define BRIDGE_HF_DROP(reason, func, line) { \
-       bridge_hostfilter_stats.reason++; \
-       if (if_bridge_debug & BR_DBGF_HOSTFILTER) \
-               printf("%s.%d" #reason, func, line); \
-       error = EINVAL; \
-}
+#define BRIDGE_HF_DROP(reason, func, line) {                    \
+               bridge_hostfilter_stats.reason++;               \
+               if (IF_BRIDGE_DEBUG(BR_DBGF_HOSTFILTER)) {      \
+                       printf("%s.%d" #reason, func, line);    \
+                       error = EINVAL;                         \
+               }                                               \
+       }
 
 /*
  * Make sure this is a DHCP or Bootp request that match the host filter
@@ -6423,24 +6891,13 @@ done:
 }
 
 static int
-bridge_host_filter(struct bridge_iflist *bif, struct mbuf *m)
+bridge_host_filter(struct bridge_iflist *bif, mbuf_t *data)
 {
        int error = EINVAL;
        struct ether_header *eh;
        static struct in_addr inaddr_any = { .s_addr = INADDR_ANY };
+       mbuf_t m = *data;
 
-       /*
-        * Check the Ethernet header is large enough
-        */
-       if (mbuf_pkthdr_len(m) < sizeof(struct ether_header)) {
-               BRIDGE_HF_DROP(brhf_ether_too_small, __func__, __LINE__);
-               goto done;
-       }
-       if (mbuf_len(m) < sizeof(struct ether_header) &&
-           mbuf_pullup(&m, sizeof(struct ether_header)) != 0) {
-               BRIDGE_HF_DROP(brhf_ether_pullup_failed, __func__, __LINE__);
-               goto done;
-       }
        eh = mtod(m, struct ether_header *);
 
        /*
@@ -6468,11 +6925,13 @@ bridge_host_filter(struct bridge_iflist *bif, struct mbuf *m)
                        BRIDGE_HF_DROP(brhf_arp_too_small, __func__, __LINE__);
                        goto done;
                }
-               if (mbuf_len(m) < minlen && mbuf_pullup(&m, minlen) != 0) {
+               if (mbuf_len(m) < minlen && mbuf_pullup(data, minlen) != 0) {
                        BRIDGE_HF_DROP(brhf_arp_pullup_failed,
                            __func__, __LINE__);
                        goto done;
                }
+               m = *data;
+
                /*
                 * Verify this is an ethernet/ip arp
                 */
@@ -6528,9 +6987,6 @@ bridge_host_filter(struct bridge_iflist *bif, struct mbuf *m)
                        BRIDGE_HF_DROP(brhf_arp_bad_spa, __func__, __LINE__);
                        goto done;
                }
-               /*
-                *
-                */
                bridge_hostfilter_stats.brhf_arp_ok += 1;
                error = 0;
        } else if (eh->ether_type == htons(ETHERTYPE_IP)) {
@@ -6629,7 +7085,7 @@ bridge_host_filter(struct bridge_iflist *bif, struct mbuf *m)
        }
 done:
        if (error != 0) {
-               if (if_bridge_debug & BR_DBGF_HOSTFILTER) {
+               if (IF_BRIDGE_DEBUG(BR_DBGF_HOSTFILTER)) {
                        if (m) {
                                printf_mbuf_data(m, 0,
                                    sizeof(struct ether_header) +
@@ -6644,3 +7100,1453 @@ done:
        }
        return error;
 }
+
+/*
+ * MAC NAT
+ */
+
+static errno_t
+bridge_mac_nat_enable(struct bridge_softc *sc, struct bridge_iflist *bif)
+{
+       errno_t         error = 0;
+
+       BRIDGE_LOCK_ASSERT_HELD(sc);
+
+       if (sc->sc_mac_nat_bif != NULL) {
+               if (sc->sc_mac_nat_bif != bif) {
+                       error = EBUSY;
+               }
+               goto done;
+       }
+       sc->sc_mac_nat_bif = bif;
+       bif->bif_ifflags |= IFBIF_MAC_NAT;
+       bridge_mac_nat_populate_entries(sc);
+
+done:
+       return error;
+}
+
+static void
+bridge_mac_nat_disable(struct bridge_softc *sc)
+{
+       struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
+
+       assert(mac_nat_bif != NULL);
+       bridge_mac_nat_flush_entries(sc, mac_nat_bif);
+       mac_nat_bif->bif_ifflags &= ~IFBIF_MAC_NAT;
+       sc->sc_mac_nat_bif = NULL;
+       return;
+}
+
+static void
+mac_nat_entry_print2(struct mac_nat_entry *mne,
+    char *ifname, const char *msg1, const char *msg2)
+{
+       int             af;
+       char            etopbuf[24];
+       char            ntopbuf[MAX_IPv6_STR_LEN];
+       const char      *space;
+
+       af = ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) ? AF_INET6 : AF_INET;
+       ether_ntop(etopbuf, sizeof(etopbuf), mne->mne_mac);
+       (void)inet_ntop(af, &mne->mne_u, ntopbuf, sizeof(ntopbuf));
+       if (msg2 == NULL) {
+               msg2 = "";
+               space = "";
+       } else {
+               space = " ";
+       }
+       printf("%s %s%s%s %p (%s, %s, %s)\n",
+           ifname, msg1, space, msg2, mne, mne->mne_bif->bif_ifp->if_xname,
+           ntopbuf, etopbuf);
+}
+
+static void
+mac_nat_entry_print(struct mac_nat_entry *mne,
+    char *ifname, const char *msg)
+{
+       mac_nat_entry_print2(mne, ifname, msg, NULL);
+}
+
+static struct mac_nat_entry *
+bridge_lookup_mac_nat_entry(struct bridge_softc *sc, int af, void * ip)
+{
+       struct mac_nat_entry    *mne;
+       struct mac_nat_entry    *ret_mne = NULL;
+
+       if (af == AF_INET) {
+               in_addr_t s_addr = ((struct in_addr *)ip)->s_addr;
+
+               LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
+                       if (mne->mne_ip.s_addr == s_addr) {
+                               if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+                                       mac_nat_entry_print(mne, sc->sc_if_xname,
+                                           "found");
+                               }
+                               ret_mne = mne;
+                               break;
+                       }
+               }
+       } else {
+               const struct in6_addr *ip6 = (const struct in6_addr *)ip;
+
+               LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
+                       if (IN6_ARE_ADDR_EQUAL(&mne->mne_ip6, ip6)) {
+                               if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+                                       mac_nat_entry_print(mne, sc->sc_if_xname,
+                                           "found");
+                               }
+                               ret_mne = mne;
+                               break;
+                       }
+               }
+       }
+       return ret_mne;
+}
+
+static void
+bridge_destroy_mac_nat_entry(struct bridge_softc *sc,
+    struct mac_nat_entry *mne, const char *reason)
+{
+       LIST_REMOVE(mne, mne_list);
+       if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+               mac_nat_entry_print(mne, sc->sc_if_xname, reason);
+       }
+       zfree(bridge_mne_pool, mne);
+       sc->sc_mne_count--;
+}
+
+static struct mac_nat_entry *
+bridge_create_mac_nat_entry(struct bridge_softc *sc,
+    struct bridge_iflist *bif, int af, const void *ip, uint8_t *eaddr)
+{
+       struct mac_nat_entry_list *list;
+       struct mac_nat_entry *mne;
+
+       if (sc->sc_mne_count >= sc->sc_mne_max) {
+               sc->sc_mne_allocation_failures++;
+               return NULL;
+       }
+       mne = zalloc_noblock(bridge_mne_pool);
+       if (mne == NULL) {
+               sc->sc_mne_allocation_failures++;
+               return NULL;
+       }
+       sc->sc_mne_count++;
+       bzero(mne, sizeof(*mne));
+       bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
+       mne->mne_bif = bif;
+       if (af == AF_INET) {
+               bcopy(ip, &mne->mne_ip, sizeof(mne->mne_ip));
+               list = &sc->sc_mne_list;
+       } else {
+               bcopy(ip, &mne->mne_ip6, sizeof(mne->mne_ip6));
+               mne->mne_flags |= MNE_FLAGS_IPV6;
+               list = &sc->sc_mne_list_v6;
+       }
+       LIST_INSERT_HEAD(list, mne, mne_list);
+       mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
+       if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+               mac_nat_entry_print(mne, sc->sc_if_xname, "created");
+       }
+       return mne;
+}
+
+static struct mac_nat_entry *
+bridge_update_mac_nat_entry(struct bridge_softc *sc,
+    struct bridge_iflist *bif, int af, void *ip, uint8_t *eaddr)
+{
+       struct mac_nat_entry *mne;
+
+       mne = bridge_lookup_mac_nat_entry(sc, af, ip);
+       if (mne != NULL) {
+               struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
+
+               if (mne->mne_bif == mac_nat_bif) {
+                       /* the MAC NAT interface takes precedence */
+                       if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+                               if (mne->mne_bif != bif) {
+                                       mac_nat_entry_print2(mne,
+                                           sc->sc_if_xname, "reject",
+                                           bif->bif_ifp->if_xname);
+                               }
+                       }
+               } else if (mne->mne_bif != bif) {
+                       const char *old_if = mne->mne_bif->bif_ifp->if_xname;
+
+                       mne->mne_bif = bif;
+                       if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+                               mac_nat_entry_print2(mne,
+                                   sc->sc_if_xname, "replaced",
+                                   old_if);
+                       }
+                       bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
+               }
+               mne->mne_expire = (unsigned long)net_uptime() +
+                   sc->sc_brttimeout;
+       } else {
+               mne = bridge_create_mac_nat_entry(sc, bif, af, ip, eaddr);
+       }
+       return mne;
+}
+
+static void
+bridge_mac_nat_flush_entries_common(struct bridge_softc *sc,
+    struct mac_nat_entry_list *list, struct bridge_iflist *bif)
+{
+       struct mac_nat_entry *mne;
+       struct mac_nat_entry *tmne;
+
+       LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
+               if (bif != NULL && mne->mne_bif != bif) {
+                       continue;
+               }
+               bridge_destroy_mac_nat_entry(sc, mne, "flushed");
+       }
+}
+
+/*
+ * bridge_mac_nat_flush_entries:
+ *
+ * Flush MAC NAT entries for the specified member. Flush all entries if
+ * the member is the one that requires MAC NAT, otherwise just flush the
+ * ones for the specified member.
+ */
+static void
+bridge_mac_nat_flush_entries(struct bridge_softc *sc, struct bridge_iflist * bif)
+{
+       struct bridge_iflist *flush_bif;
+
+       flush_bif = (bif == sc->sc_mac_nat_bif) ? NULL : bif;
+       bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list, flush_bif);
+       bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list_v6, flush_bif);
+}
+
+static void
+bridge_mac_nat_populate_entries(struct bridge_softc *sc)
+{
+       errno_t                 error;
+       ifnet_t                 ifp;
+       ifaddr_t                *list;
+       struct bridge_iflist    *mac_nat_bif = sc->sc_mac_nat_bif;
+
+       assert(mac_nat_bif != NULL);
+       ifp = mac_nat_bif->bif_ifp;
+       error = ifnet_get_address_list(ifp, &list);
+       if (error != 0) {
+               printf("%s: ifnet_get_address_list(%s) failed %d\n",
+                   __func__, ifp->if_xname, error);
+               return;
+       }
+       for (ifaddr_t *scan = list; *scan != NULL; scan++) {
+               sa_family_t     af;
+               void            *ip;
+
+               union {
+                       struct sockaddr         sa;
+                       struct sockaddr_in      sin;
+                       struct sockaddr_in6     sin6;
+               } u;
+               af = ifaddr_address_family(*scan);
+               switch (af) {
+               case AF_INET:
+               case AF_INET6:
+                       error = ifaddr_address(*scan, &u.sa, sizeof(u));
+                       if (error != 0) {
+                               printf("%s: ifaddr_address failed %d\n",
+                                   __func__, error);
+                               break;
+                       }
+                       if (af == AF_INET) {
+                               ip = (void *)&u.sin.sin_addr;
+                       } else {
+                               if (IN6_IS_ADDR_LINKLOCAL(&u.sin6.sin6_addr)) {
+                                       /* remove scope ID */
+                                       u.sin6.sin6_addr.s6_addr16[1] = 0;
+                               }
+                               ip = (void *)&u.sin6.sin6_addr;
+                       }
+                       bridge_create_mac_nat_entry(sc, mac_nat_bif, af, ip,
+                           (uint8_t *)IF_LLADDR(ifp));
+                       break;
+               default:
+                       break;
+               }
+       }
+       ifnet_free_address_list(list);
+       return;
+}
+
+static void
+bridge_mac_nat_age_entries_common(struct bridge_softc *sc,
+    struct mac_nat_entry_list *list, unsigned long now)
+{
+       struct mac_nat_entry *mne;
+       struct mac_nat_entry *tmne;
+
+       LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
+               if (now >= mne->mne_expire) {
+                       bridge_destroy_mac_nat_entry(sc, mne, "aged out");
+               }
+       }
+}
+
+static void
+bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long now)
+{
+       if (sc->sc_mac_nat_bif == NULL) {
+               return;
+       }
+       bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list, now);
+       bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list_v6, now);
+}
+
+static const char *
+get_in_out_string(boolean_t is_output)
+{
+       return is_output ? "OUT" : "IN";
+}
+
+/*
+ * is_valid_arp_packet:
+ *     Verify that this is a valid ARP packet.
+ *
+ *     Returns TRUE if the packet is valid, FALSE otherwise.
+ */
+static boolean_t
+is_valid_arp_packet(mbuf_t *data, boolean_t is_output,
+    struct ether_header **eh_p, struct ether_arp **ea_p)
+{
+       struct ether_arp *ea;
+       struct ether_header *eh;
+       size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
+       boolean_t is_valid = FALSE;
+       int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
+
+       if (mbuf_pkthdr_len(*data) < minlen) {
+               if (IF_BRIDGE_DEBUG(flags)) {
+                       printf("%s: ARP %s short frame %lu < %lu\n",
+                           __func__,
+                           get_in_out_string(is_output),
+                           mbuf_pkthdr_len(*data), minlen);
+               }
+               goto done;
+       }
+       if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
+               if (IF_BRIDGE_DEBUG(flags)) {
+                       printf("%s: ARP %s size %lu mbuf_pullup fail\n",
+                           __func__,
+                           get_in_out_string(is_output),
+                           minlen);
+               }
+               *data = NULL;
+               goto done;
+       }
+
+       /* validate ARP packet */
+       eh = mtod(*data, struct ether_header *);
+       ea = (struct ether_arp *)(eh + 1);
+       if (ntohs(ea->arp_hrd) != ARPHRD_ETHER) {
+               if (IF_BRIDGE_DEBUG(flags)) {
+                       printf("%s: ARP %s htype not ethernet\n",
+                           __func__,
+                           get_in_out_string(is_output));
+               }
+               goto done;
+       }
+       if (ea->arp_hln != ETHER_ADDR_LEN) {
+               if (IF_BRIDGE_DEBUG(flags)) {
+                       printf("%s: ARP %s hlen not ethernet\n",
+                           __func__,
+                           get_in_out_string(is_output));
+               }
+               goto done;
+       }
+       if (ntohs(ea->arp_pro) != ETHERTYPE_IP) {
+               if (IF_BRIDGE_DEBUG(flags)) {
+                       printf("%s: ARP %s ptype not IP\n",
+                           __func__,
+                           get_in_out_string(is_output));
+               }
+               goto done;
+       }
+       if (ea->arp_pln != sizeof(struct in_addr)) {
+               if (IF_BRIDGE_DEBUG(flags)) {
+                       printf("%s: ARP %s plen not IP\n",
+                           __func__,
+                           get_in_out_string(is_output));
+               }
+               goto done;
+       }
+       is_valid = TRUE;
+       *ea_p = ea;
+       *eh_p = eh;
+done:
+       return is_valid;
+}
+
+static struct mac_nat_entry *
+bridge_mac_nat_arp_input(struct bridge_softc *sc, mbuf_t *data)
+{
+       struct ether_arp        *ea;
+       struct ether_header     *eh;
+       struct mac_nat_entry    *mne = NULL;
+       u_short                 op;
+       struct in_addr          tpa;
+
+       if (!is_valid_arp_packet(data, FALSE, &eh, &ea)) {
+               goto done;
+       }
+       op = ntohs(ea->arp_op);
+       switch (op) {
+       case ARPOP_REQUEST:
+       case ARPOP_REPLY:
+               /* only care about REQUEST and REPLY */
+               break;
+       default:
+               goto done;
+       }
+
+       /* check the target IP address for a NAT entry */
+       bcopy(ea->arp_tpa, &tpa, sizeof(tpa));
+       if (tpa.s_addr != 0) {
+               mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &tpa);
+       }
+       if (mne != NULL) {
+               if (op == ARPOP_REPLY) {
+                       /* translate the MAC address */
+                       if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+                               char    mac_src[24];
+                               char    mac_dst[24];
+
+                               ether_ntop(mac_src, sizeof(mac_src),
+                                   ea->arp_tha);
+                               ether_ntop(mac_dst, sizeof(mac_dst),
+                                   mne->mne_mac);
+                               printf("%s %s ARP %s -> %s\n",
+                                   sc->sc_if_xname,
+                                   mne->mne_bif->bif_ifp->if_xname,
+                                   mac_src, mac_dst);
+                       }
+                       bcopy(mne->mne_mac, ea->arp_tha, sizeof(ea->arp_tha));
+               }
+       } else {
+               /* handle conflicting ARP (sender matches mne) */
+               struct in_addr spa;
+
+               bcopy(ea->arp_spa, &spa, sizeof(spa));
+               if (spa.s_addr != 0 && spa.s_addr != tpa.s_addr) {
+                       /* check the source IP for a NAT entry */
+                       mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &spa);
+               }
+       }
+
+done:
+       return mne;
+}
+
+static boolean_t
+bridge_mac_nat_arp_output(struct bridge_softc *sc,
+    struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
+{
+       struct ether_arp        *ea;
+       struct ether_header     *eh;
+       struct in_addr          ip;
+       struct mac_nat_entry    *mne = NULL;
+       u_short                 op;
+       boolean_t               translate = FALSE;
+
+       if (!is_valid_arp_packet(data, TRUE, &eh, &ea)) {
+               goto done;
+       }
+       op = ntohs(ea->arp_op);
+       switch (op) {
+       case ARPOP_REQUEST:
+       case ARPOP_REPLY:
+               /* only care about REQUEST and REPLY */
+               break;
+       default:
+               goto done;
+       }
+
+       bcopy(ea->arp_spa, &ip, sizeof(ip));
+       if (ip.s_addr == 0) {
+               goto done;
+       }
+       /* XXX validate IP address: no multicast/broadcast */
+       mne = bridge_update_mac_nat_entry(sc, bif, AF_INET, &ip, ea->arp_sha);
+       if (mnr != NULL && mne != NULL) {
+               /* record the offset to do the replacement */
+               translate = TRUE;
+               mnr->mnr_arp_offset = (char *)ea->arp_sha - (char *)eh;
+       }
+
+done:
+       return translate;
+}
+
+#define ETHER_IPV4_HEADER_LEN   (sizeof(struct ether_header) +  \
+                                + sizeof(struct ip))
+static struct ether_header *
+get_ether_ip_header(mbuf_t *data, boolean_t is_output)
+{
+       struct ether_header     *eh = NULL;
+       int             flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
+       size_t          minlen = ETHER_IPV4_HEADER_LEN;
+
+       if (mbuf_pkthdr_len(*data) < minlen) {
+               if (IF_BRIDGE_DEBUG(flags)) {
+                       printf("%s: IP %s short frame %lu < %lu\n",
+                           __func__,
+                           get_in_out_string(is_output),
+                           mbuf_pkthdr_len(*data), minlen);
+               }
+               goto done;
+       }
+       if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
+               if (IF_BRIDGE_DEBUG(flags)) {
+                       printf("%s: IP %s size %lu mbuf_pullup fail\n",
+                           __func__,
+                           get_in_out_string(is_output),
+                           minlen);
+               }
+               *data = NULL;
+               goto done;
+       }
+       eh = mtod(*data, struct ether_header *);
+done:
+       return eh;
+}
+
+static struct mac_nat_entry *
+bridge_mac_nat_ip_input(struct bridge_softc *sc, mbuf_t *data)
+{
+       struct in_addr          dst;
+       struct ether_header     *eh;
+       struct ip               *iphdr;
+       struct mac_nat_entry    *mne = NULL;
+
+       eh = get_ether_ip_header(data, FALSE);
+       if (eh == NULL) {
+               goto done;
+       }
+       iphdr = (struct ip *)(void *)(eh + 1);
+       bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
+       /* XXX validate IP address */
+       if (dst.s_addr == 0) {
+               goto done;
+       }
+       mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &dst);
+done:
+       return mne;
+}
+
+static void
+bridge_mac_nat_udp_output(struct bridge_softc *sc,
+    struct bridge_iflist *bif, mbuf_t m,
+    uint8_t ip_header_len, struct mac_nat_record *mnr)
+{
+       uint16_t        dp_flags;
+       errno_t         error;
+       size_t          offset;
+       struct udphdr   udphdr;
+
+       /* copy the UDP header */
+       offset = sizeof(struct ether_header) + ip_header_len;
+       error = mbuf_copydata(m, offset, sizeof(struct udphdr), &udphdr);
+       if (error != 0) {
+               if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+                       printf("%s: mbuf_copydata udphdr failed %d",
+                           __func__, error);
+               }
+               return;
+       }
+       if (ntohs(udphdr.uh_sport) != IPPORT_BOOTPC ||
+           ntohs(udphdr.uh_dport) != IPPORT_BOOTPS) {
+               /* not a BOOTP/DHCP packet */
+               return;
+       }
+       /* check whether the broadcast bit is already set */
+       offset += sizeof(struct udphdr) + offsetof(struct dhcp, dp_flags);
+       error = mbuf_copydata(m, offset, sizeof(dp_flags), &dp_flags);
+       if (error != 0) {
+               if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+                       printf("%s: mbuf_copydata dp_flags failed %d",
+                           __func__, error);
+               }
+               return;
+       }
+       if ((ntohs(dp_flags) & DHCP_FLAGS_BROADCAST) != 0) {
+               /* it's already set, nothing to do */
+               return;
+       }
+       /* broadcast bit needs to be set */
+       mnr->mnr_ip_dhcp_flags = dp_flags | htons(DHCP_FLAGS_BROADCAST);
+       mnr->mnr_ip_header_len = ip_header_len;
+       if (udphdr.uh_sum != 0) {
+               uint16_t        delta;
+
+               /* adjust checksum to take modified dp_flags into account */
+               delta = dp_flags - mnr->mnr_ip_dhcp_flags;
+               mnr->mnr_ip_udp_csum = udphdr.uh_sum + delta;
+       }
+       if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+               printf("%s %s DHCP dp_flags 0x%x UDP cksum 0x%x\n",
+                   sc->sc_if_xname,
+                   bif->bif_ifp->if_xname,
+                   ntohs(mnr->mnr_ip_dhcp_flags),
+                   ntohs(mnr->mnr_ip_udp_csum));
+       }
+       return;
+}
+
+static boolean_t
+bridge_mac_nat_ip_output(struct bridge_softc *sc,
+    struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
+{
+#pragma unused(mnr)
+       struct ether_header     *eh;
+       struct in_addr          ip;
+       struct ip               *iphdr;
+       uint8_t                 ip_header_len;
+       struct mac_nat_entry    *mne = NULL;
+       boolean_t               translate = FALSE;
+
+       eh = get_ether_ip_header(data, TRUE);
+       if (eh == NULL) {
+               goto done;
+       }
+       iphdr = (struct ip *)(void *)(eh + 1);
+       ip_header_len = IP_VHL_HL(iphdr->ip_vhl) << 2;
+       if (ip_header_len < sizeof(ip)) {
+               /* bogus IP header */
+               goto done;
+       }
+       bcopy(&iphdr->ip_src, &ip, sizeof(ip));
+       /* XXX validate the source address */
+       if (ip.s_addr != 0) {
+               mne = bridge_update_mac_nat_entry(sc, bif, AF_INET, &ip,
+                   eh->ether_shost);
+       }
+       if (mnr != NULL) {
+               if (iphdr->ip_p == IPPROTO_UDP) {
+                       /* handle DHCP must broadcast */
+                       bridge_mac_nat_udp_output(sc, bif, *data,
+                           ip_header_len, mnr);
+               }
+               translate = TRUE;
+       }
+done:
+       return translate;
+}
+
+#define ETHER_IPV6_HEADER_LEN   (sizeof(struct ether_header) +  \
+                                + sizeof(struct ip6_hdr))
+static struct ether_header *
+get_ether_ipv6_header(mbuf_t *data, boolean_t is_output)
+{
+       struct ether_header     *eh = NULL;
+       int             flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
+       size_t          minlen = ETHER_IPV6_HEADER_LEN;
+
+       if (mbuf_pkthdr_len(*data) < minlen) {
+               if (IF_BRIDGE_DEBUG(flags)) {
+                       printf("%s: IP %s short frame %lu < %lu\n",
+                           __func__,
+                           get_in_out_string(is_output),
+                           mbuf_pkthdr_len(*data), minlen);
+               }
+               goto done;
+       }
+       if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
+               if (IF_BRIDGE_DEBUG(flags)) {
+                       printf("%s: IP %s size %lu mbuf_pullup fail\n",
+                           __func__,
+                           get_in_out_string(is_output),
+                           minlen);
+               }
+               *data = NULL;
+               goto done;
+       }
+       eh = mtod(*data, struct ether_header *);
+done:
+       return eh;
+}
+
+#if 0
+static void
+bridge_mac_nat_icmpv6_input(struct bridge_softc *sc, mbuf_t *data,
+    struct ether_header *eh, struct ip6_hdr *hdr)
+{
+#pragma unused(sc)
+#pragma unused(data)
+#pragma unused(eh)
+#pragma unused(hdr)
+       return;
+}
+#endif
+
+#include <netinet/icmp6.h>
+#include <netinet6/nd6.h>
+
+#define ETHER_ND_LLADDR_LEN     (ETHER_ADDR_LEN + sizeof(struct nd_opt_hdr))
+
+static void
+bridge_mac_nat_icmpv6_output(struct bridge_softc *sc, struct bridge_iflist *bif,
+    mbuf_t *data, struct ether_header *eh,
+    struct ip6_hdr *ip6h, struct in6_addr *saddrp, struct mac_nat_record *mnr)
+{
+       struct icmp6_hdr *icmp6;
+       unsigned int    icmp6len;
+       int             lladdrlen = 0;
+       char            *lladdr = NULL;
+       mbuf_t          m = *data;
+       unsigned int    off = sizeof(*ip6h);
+
+       icmp6len = m->m_pkthdr.len - sizeof(*eh) - off;
+       if (icmp6len < sizeof(*icmp6)) {
+               printf("%s: short packet %d < %lu\n", __func__,
+                   icmp6len, sizeof(*icmp6));
+               return;
+       }
+       icmp6 = (struct icmp6_hdr *)((caddr_t)ip6h + off);
+       switch (icmp6->icmp6_type) {
+       case ND_NEIGHBOR_SOLICIT: {
+               struct nd_neighbor_solicit *nd_ns;
+               union nd_opts ndopts;
+               boolean_t is_dad_probe;
+               struct in6_addr taddr;
+
+               if (icmp6len < sizeof(*nd_ns)) {
+                       if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+                               printf("%s: short nd_ns %d < %lu\n", __func__,
+                                   icmp6len, sizeof(*nd_ns));
+                       }
+                       return;
+               }
+
+               nd_ns = (struct nd_neighbor_solicit *)(void *)icmp6;
+               bcopy(&nd_ns->nd_ns_target, &taddr, sizeof(taddr));
+               if (IN6_IS_ADDR_MULTICAST(&taddr) ||
+                   IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
+                       if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+                               printf("%s: invalid target ignored\n", __func__);
+                       }
+                       return;
+               }
+               /* parse options */
+               nd6_option_init(nd_ns + 1, icmp6len - sizeof(*nd_ns), &ndopts);
+               if (nd6_options(&ndopts) < 0) {
+                       if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+                               printf("%s: invalid ND6 NS option\n", __func__);
+                       }
+                       return;
+               }
+               if (ndopts.nd_opts_src_lladdr != NULL) {
+                       lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
+                       lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
+               }
+               is_dad_probe = IN6_IS_ADDR_UNSPECIFIED(saddrp);
+               if (lladdr != NULL) {
+                       if (is_dad_probe) {
+                               printf("%s: bad ND6 DAD packet\n", __func__);
+                               return;
+                       }
+                       if (lladdrlen != ETHER_ND_LLADDR_LEN) {
+                               if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+                                       printf("%s: source lladdrlen %d != %lu\n",
+                                           __func__,
+                                           lladdrlen, ETHER_ND_LLADDR_LEN);
+                               }
+                               return;
+                       }
+                       mnr->mnr_ip6_lladdr_offset = (void *)lladdr -
+                           (void *)eh;
+                       mnr->mnr_ip6_icmp6_len = icmp6len;
+                       mnr->mnr_ip6_icmp6_type = icmp6->icmp6_type;
+                       mnr->mnr_ip6_header_len = off;
+               }
+               if (is_dad_probe) {
+                       /* node is trying use taddr, create an mne using taddr */
+                       *saddrp = taddr;
+               }
+               break;
+       }
+       case ND_NEIGHBOR_ADVERT: {
+               struct nd_neighbor_advert *nd_na;
+               union nd_opts ndopts;
+               struct in6_addr taddr;
+
+
+               nd_na = (struct nd_neighbor_advert *)(void *)icmp6;
+
+               if (icmp6len < sizeof(*nd_na)) {
+                       if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+                               printf("%s: short nd_na %d < %lu\n", __func__,
+                                   icmp6len, sizeof(*nd_na));
+                       }
+                       return;
+               }
+
+               bcopy(&nd_na->nd_na_target, &taddr, sizeof(taddr));
+               if (IN6_IS_ADDR_MULTICAST(&taddr) ||
+                   IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
+                       if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+                               printf("%s: invalid target ignored\n", __func__);
+                       }
+                       return;
+               }
+               /* parse options */
+               nd6_option_init(nd_na + 1, icmp6len - sizeof(*nd_na), &ndopts);
+               if (nd6_options(&ndopts) < 0) {
+                       if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+                               printf("%s: invalid ND6 NA option\n", __func__);
+                       }
+                       return;
+               }
+               if (ndopts.nd_opts_tgt_lladdr == NULL) {
+                       /* target linklayer, nothing to do */
+                       return;
+               }
+               lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
+               lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
+               if (lladdrlen != ETHER_ND_LLADDR_LEN) {
+                       if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+                               printf("%s: target lladdrlen %d != %lu\n",
+                                   __func__, lladdrlen, ETHER_ND_LLADDR_LEN);
+                       }
+                       return;
+               }
+               mnr->mnr_ip6_lladdr_offset = (void *)lladdr - (void *)eh;
+               mnr->mnr_ip6_icmp6_len = icmp6len;
+               mnr->mnr_ip6_header_len = off;
+               mnr->mnr_ip6_icmp6_type = icmp6->icmp6_type;
+               break;
+       }
+       case ND_ROUTER_SOLICIT: {
+               struct nd_router_solicit *nd_rs;
+               union nd_opts ndopts;
+
+               if (icmp6len < sizeof(*nd_rs)) {
+                       if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+                               printf("%s: short nd_rs %d < %lu\n", __func__,
+                                   icmp6len, sizeof(*nd_rs));
+                       }
+                       return;
+               }
+               nd_rs = (struct nd_router_solicit *)(void *)icmp6;
+
+               /* parse options */
+               nd6_option_init(nd_rs + 1, icmp6len - sizeof(*nd_rs), &ndopts);
+               if (nd6_options(&ndopts) < 0) {
+                       if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+                               printf("%s: invalid ND6 RS option\n", __func__);
+                       }
+                       return;
+               }
+               if (ndopts.nd_opts_src_lladdr != NULL) {
+                       lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
+                       lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
+               }
+               if (lladdr != NULL) {
+                       if (lladdrlen != ETHER_ND_LLADDR_LEN) {
+                               if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+                                       printf("%s: source lladdrlen %d != %lu\n",
+                                           __func__,
+                                           lladdrlen, ETHER_ND_LLADDR_LEN);
+                               }
+                               return;
+                       }
+                       mnr->mnr_ip6_lladdr_offset = (void *)lladdr -
+                           (void *)eh;
+                       mnr->mnr_ip6_icmp6_len = icmp6len;
+                       mnr->mnr_ip6_icmp6_type = icmp6->icmp6_type;
+                       mnr->mnr_ip6_header_len = off;
+               }
+               break;
+       }
+       default:
+               break;
+       }
+       if (mnr->mnr_ip6_lladdr_offset != 0 &&
+           IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+               const char *str;
+
+               switch (mnr->mnr_ip6_icmp6_type) {
+               case ND_ROUTER_SOLICIT:
+                       str = "ROUTER SOLICIT";
+                       break;
+               case ND_NEIGHBOR_ADVERT:
+                       str = "NEIGHBOR ADVERT";
+                       break;
+               case ND_NEIGHBOR_SOLICIT:
+                       str = "NEIGHBOR SOLICIT";
+                       break;
+               default:
+                       str = "";
+                       break;
+               }
+               printf("%s %s %s ip6len %d icmp6len %d lladdr offset %d\n",
+                   sc->sc_if_xname, bif->bif_ifp->if_xname, str,
+                   mnr->mnr_ip6_header_len,
+                   mnr->mnr_ip6_icmp6_len, mnr->mnr_ip6_lladdr_offset);
+       }
+}
+
+static struct mac_nat_entry *
+bridge_mac_nat_ipv6_input(struct bridge_softc *sc, mbuf_t *data)
+{
+       struct in6_addr         dst;
+       struct ether_header     *eh;
+       struct ip6_hdr          *ip6h;
+       struct mac_nat_entry    *mne = NULL;
+
+       eh = get_ether_ipv6_header(data, FALSE);
+       if (eh == NULL) {
+               goto done;
+       }
+       ip6h = (struct ip6_hdr *)(void *)(eh + 1);
+#if 0
+       if (ip6h->ip6_nxt == IPPROTO_ICMPV6) {
+               bridge_mac_nat_icmpv6_input(sc, data, eh, ip6h);
+       }
+#endif
+       bcopy(&ip6h->ip6_dst, &dst, sizeof(dst));
+       /* XXX validate IPv6 address */
+       if (IN6_IS_ADDR_UNSPECIFIED(&dst)) {
+               goto done;
+       }
+       mne = bridge_lookup_mac_nat_entry(sc, AF_INET6, &dst);
+
+done:
+       return mne;
+}
+
+static boolean_t
+bridge_mac_nat_ipv6_output(struct bridge_softc *sc,
+    struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
+{
+       struct ether_header     *eh;
+       struct ip6_hdr          *ip6h;
+       struct in6_addr         saddr;
+       boolean_t               translate;
+
+       translate = (bif == sc->sc_mac_nat_bif) ? FALSE : TRUE;
+       eh = get_ether_ipv6_header(data, TRUE);
+       if (eh == NULL) {
+               translate = FALSE;
+               goto done;
+       }
+       ip6h = (struct ip6_hdr *)(void *)(eh + 1);
+       bcopy(&ip6h->ip6_src, &saddr, sizeof(saddr));
+       if (mnr != NULL && ip6h->ip6_nxt == IPPROTO_ICMPV6) {
+               bridge_mac_nat_icmpv6_output(sc, bif, data,
+                   eh, ip6h, &saddr, mnr);
+       }
+       if (IN6_IS_ADDR_UNSPECIFIED(&saddr)) {
+               goto done;
+       }
+       (void)bridge_update_mac_nat_entry(sc, bif, AF_INET6, &saddr,
+           eh->ether_shost);
+
+done:
+       return translate;
+}
+
+/*
+ * bridge_mac_nat_input:
+ * Process a packet arriving on the MAC NAT interface (sc_mac_nat_bif).
+ * This interface is the "external" interface with respect to NAT.
+ * The interface is only capable of receiving a single MAC address
+ * (e.g. a Wi-Fi STA interface).
+ *
+ * When a packet arrives on the external interface, look up the destination
+ * IP address in the mac_nat_entry table. If there is a match, *is_input
+ * is set to TRUE if it's for the MAC NAT interface, otherwise *is_input
+ * is set to FALSE and translate the MAC address if necessary.
+ *
+ * Returns:
+ * The internal interface to direct the packet to, or NULL if the packet
+ * should not be redirected.
+ *
+ * *data may be updated to point at a different mbuf chain, or set to NULL
+ * if the chain was deallocated during processing.
+ */
+static ifnet_t
+bridge_mac_nat_input(struct bridge_softc *sc, mbuf_t *data,
+    boolean_t *is_input)
+{
+       ifnet_t                 dst_if = NULL;
+       struct ether_header     *eh;
+       uint16_t                ether_type;
+       boolean_t               is_unicast;
+       mbuf_t                  m = *data;
+       struct mac_nat_entry    *mne = NULL;
+
+       BRIDGE_LOCK_ASSERT_HELD(sc);
+       *is_input = FALSE;
+       assert(sc->sc_mac_nat_bif != NULL);
+       is_unicast = ((m->m_flags & (M_BCAST | M_MCAST)) == 0);
+       eh = mtod(m, struct ether_header *);
+       ether_type = ntohs(eh->ether_type);
+       switch (ether_type) {
+       case ETHERTYPE_ARP:
+               mne = bridge_mac_nat_arp_input(sc, data);
+               break;
+       case ETHERTYPE_IP:
+               if (is_unicast) {
+                       mne = bridge_mac_nat_ip_input(sc, data);
+               }
+               break;
+       case ETHERTYPE_IPV6:
+               if (is_unicast) {
+                       mne = bridge_mac_nat_ipv6_input(sc, data);
+               }
+               break;
+       default:
+               break;
+       }
+       if (mne != NULL) {
+               if (is_unicast) {
+                       if (m != *data) {
+                               /* it may have changed */
+                               eh = mtod(*data, struct ether_header *);
+                       }
+                       bcopy(mne->mne_mac, eh->ether_dhost,
+                           sizeof(eh->ether_dhost));
+               }
+               dst_if = mne->mne_bif->bif_ifp;
+               *is_input = (mne->mne_bif == sc->sc_mac_nat_bif);
+       }
+       return dst_if;
+}
+
+/*
+ * bridge_mac_nat_output:
+ * Process a packet destined to the MAC NAT interface (sc_mac_nat_bif)
+ * from the interface 'bif'.
+ *
+ * Create a mac_nat_entry containing the source IP address and MAC address
+ * from the packet. Populate a mac_nat_record with information detailing
+ * how to translate the packet. Translation takes place later when
+ * the bridge lock is no longer held.
+ *
+ * If 'bif' == sc_mac_nat_bif, the stack over the MAC NAT
+ * interface is generating an output packet. No translation is required in this
+ * case, we just record the IP address used to prevent another bif from
+ * claiming our IP address.
+ *
+ * Returns:
+ * TRUE if the packet should be translated (*mnr updated as well),
+ * FALSE otherwise.
+ *
+ * *data may be updated to point at a different mbuf chain or NULL if
+ * the chain was deallocated during processing.
+ */
+
+static boolean_t
+bridge_mac_nat_output(struct bridge_softc *sc,
+    struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
+{
+       struct ether_header     *eh;
+       uint16_t                ether_type;
+       boolean_t               translate = FALSE;
+
+       BRIDGE_LOCK_ASSERT_HELD(sc);
+       assert(sc->sc_mac_nat_bif != NULL);
+
+       eh = mtod(*data, struct ether_header *);
+       ether_type = ntohs(eh->ether_type);
+       if (mnr != NULL) {
+               bzero(mnr, sizeof(*mnr));
+               mnr->mnr_ether_type = ether_type;
+       }
+       switch (ether_type) {
+       case ETHERTYPE_ARP:
+               translate = bridge_mac_nat_arp_output(sc, bif, data, mnr);
+               break;
+       case ETHERTYPE_IP:
+               translate = bridge_mac_nat_ip_output(sc, bif, data, mnr);
+               break;
+       case ETHERTYPE_IPV6:
+               translate = bridge_mac_nat_ipv6_output(sc, bif, data, mnr);
+               break;
+       default:
+               break;
+       }
+       return translate;
+}
+
+static void
+bridge_mac_nat_arp_translate(mbuf_t *data, struct mac_nat_record *mnr,
+    const caddr_t eaddr)
+{
+       errno_t                 error;
+
+       if (mnr->mnr_arp_offset == 0) {
+               return;
+       }
+       /* replace the source hardware address */
+       error = mbuf_copyback(*data, mnr->mnr_arp_offset,
+           ETHER_ADDR_LEN, eaddr,
+           MBUF_DONTWAIT);
+       if (error != 0) {
+               printf("%s: mbuf_copyback failed\n",
+                   __func__);
+               m_freem(*data);
+               *data = NULL;
+       }
+       return;
+}
+
+static void
+bridge_mac_nat_ip_translate(mbuf_t *data, struct mac_nat_record *mnr)
+{
+       errno_t         error;
+       size_t          offset;
+
+       if (mnr->mnr_ip_header_len == 0) {
+               return;
+       }
+       /* update the UDP checksum */
+       offset = sizeof(struct ether_header) + mnr->mnr_ip_header_len;
+       error = mbuf_copyback(*data, offset + offsetof(struct udphdr, uh_sum),
+           sizeof(mnr->mnr_ip_udp_csum),
+           &mnr->mnr_ip_udp_csum,
+           MBUF_DONTWAIT);
+       if (error != 0) {
+               printf("%s: mbuf_copyback uh_sum failed\n",
+                   __func__);
+               m_freem(*data);
+               *data = NULL;
+       }
+       /* update the DHCP must broadcast flag */
+       offset += sizeof(struct udphdr);
+       error = mbuf_copyback(*data, offset + offsetof(struct dhcp, dp_flags),
+           sizeof(mnr->mnr_ip_dhcp_flags),
+           &mnr->mnr_ip_dhcp_flags,
+           MBUF_DONTWAIT);
+       if (error != 0) {
+               printf("%s: mbuf_copyback dp_flags failed\n",
+                   __func__);
+               m_freem(*data);
+               *data = NULL;
+       }
+}
+
+static void
+bridge_mac_nat_ipv6_translate(mbuf_t *data, struct mac_nat_record *mnr,
+    const caddr_t eaddr)
+{
+       uint16_t        cksum;
+       errno_t         error;
+       mbuf_t          m = *data;
+
+       if (mnr->mnr_ip6_header_len == 0) {
+               return;
+       }
+       switch (mnr->mnr_ip6_icmp6_type) {
+       case ND_ROUTER_SOLICIT:
+       case ND_NEIGHBOR_SOLICIT:
+       case ND_NEIGHBOR_ADVERT:
+               if (mnr->mnr_ip6_lladdr_offset == 0) {
+                       /* nothing to do */
+                       return;
+               }
+               break;
+       default:
+               return;
+       }
+
+       /*
+        * replace the lladdr
+        */
+       error = mbuf_copyback(m, mnr->mnr_ip6_lladdr_offset,
+           ETHER_ADDR_LEN, eaddr,
+           MBUF_DONTWAIT);
+       if (error != 0) {
+               printf("%s: mbuf_copyback lladdr failed\n",
+                   __func__);
+               m_freem(m);
+               *data = NULL;
+               return;
+       }
+
+       /*
+        * recompute the icmp6 checksum
+        */
+
+       /* skip past the ethernet header */
+       mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
+           mbuf_len(m) - ETHER_HDR_LEN);
+       mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
+
+#define CKSUM_OFFSET_ICMP6      offsetof(struct icmp6_hdr, icmp6_cksum)
+       /* set the checksum to zero */
+       cksum = 0;
+       error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
+           sizeof(cksum), &cksum, MBUF_DONTWAIT);
+       if (error != 0) {
+               printf("%s: mbuf_copyback cksum=0 failed\n",
+                   __func__);
+               m_freem(m);
+               *data = NULL;
+               return;
+       }
+       /* compute and set the new checksum */
+       cksum = in6_cksum(m, IPPROTO_ICMPV6, mnr->mnr_ip6_header_len,
+           mnr->mnr_ip6_icmp6_len);
+       error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
+           sizeof(cksum), &cksum, MBUF_DONTWAIT);
+       if (error != 0) {
+               printf("%s: mbuf_copyback cksum failed\n",
+                   __func__);
+               m_freem(m);
+               *data = NULL;
+               return;
+       }
+       /* restore the ethernet header */
+       mbuf_setdata(m, (char *)mbuf_data(m) - ETHER_HDR_LEN,
+           mbuf_len(m) + ETHER_HDR_LEN);
+       mbuf_pkthdr_adjustlen(m, ETHER_HDR_LEN);
+       return;
+}
+
+static void
+bridge_mac_nat_translate(mbuf_t *data, struct mac_nat_record *mnr,
+    const caddr_t eaddr)
+{
+       struct ether_header     *eh;
+
+       /* replace the source ethernet address with the single MAC */
+       eh = mtod(*data, struct ether_header *);
+       bcopy(eaddr, eh->ether_shost, sizeof(eh->ether_shost));
+       switch (mnr->mnr_ether_type) {
+       case ETHERTYPE_ARP:
+               bridge_mac_nat_arp_translate(data, mnr, eaddr);
+               break;
+
+       case ETHERTYPE_IP:
+               bridge_mac_nat_ip_translate(data, mnr);
+               break;
+
+       case ETHERTYPE_IPV6:
+               bridge_mac_nat_ipv6_translate(data, mnr, eaddr);
+               break;
+
+       default:
+               break;
+       }
+       return;
+}
+
+/*
+ * bridge packet filtering
+ */
+
+/*
+ * the PF routines expect to be called from ip_input, so we
+ * need to do and undo here some of the same processing.
+ *
+ * XXX : this is heavily inspired on bridge_pfil()
+ */
+static
+int
+bridge_pf(struct mbuf **mp, struct ifnet *ifp, uint32_t sc_filter_flags, int input)
+{
+       /*
+        * XXX : mpetit : heavily inspired by bridge_pfil()
+        */
+
+       int snap, error, i, hlen;
+       struct ether_header *eh1, eh2;
+       struct ip *ip;
+       struct llc llc1;
+       u_int16_t ether_type;
+
+       snap = 0;
+       error = -1;     /* Default error if not error == 0 */
+
+       if ((sc_filter_flags & IFBF_FILT_MEMBER) == 0) {
+               return 0; /* filtering is disabled */
+       }
+       i = min((*mp)->m_pkthdr.len, max_protohdr);
+       if ((*mp)->m_len < i) {
+               *mp = m_pullup(*mp, i);
+               if (*mp == NULL) {
+                       printf("%s: m_pullup failed\n", __func__);
+                       return -1;
+               }
+       }
+
+       eh1 = mtod(*mp, struct ether_header *);
+       ether_type = ntohs(eh1->ether_type);
+
+       /*
+        * Check for SNAP/LLC.
+        */
+       if (ether_type < ETHERMTU) {
+               struct llc *llc2 = (struct llc *)(eh1 + 1);
+
+               if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
+                   llc2->llc_dsap == LLC_SNAP_LSAP &&
+                   llc2->llc_ssap == LLC_SNAP_LSAP &&
+                   llc2->llc_control == LLC_UI) {
+                       ether_type = htons(llc2->llc_un.type_snap.ether_type);
+                       snap = 1;
+               }
+       }
+
+       /*
+        * If we're trying to filter bridge traffic, don't look at anything
+        * other than IP and ARP traffic.  If the filter doesn't understand
+        * IPv6, don't allow IPv6 through the bridge either.  This is lame
+        * since if we really wanted, say, an AppleTalk filter, we are hosed,
+        * but of course we don't have an AppleTalk filter to begin with.
+        * (Note that since pfil doesn't understand ARP it will pass *ALL*
+        * ARP traffic.)
+        */
+       switch (ether_type) {
+       case ETHERTYPE_ARP:
+       case ETHERTYPE_REVARP:
+               return 0;         /* Automatically pass */
+
+       case ETHERTYPE_IP:
+       case ETHERTYPE_IPV6:
+               break;
+       default:
+               /*
+                * Check to see if the user wants to pass non-ip
+                * packets, these will not be checked by pf and
+                * passed unconditionally so the default is to drop.
+                */
+               if ((sc_filter_flags & IFBF_FILT_ONLYIP)) {
+                       goto bad;
+               }
+               break;
+       }
+
+       /* Strip off the Ethernet header and keep a copy. */
+       m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t)&eh2);
+       m_adj(*mp, ETHER_HDR_LEN);
+
+       /* Strip off snap header, if present */
+       if (snap) {
+               m_copydata(*mp, 0, sizeof(struct llc), (caddr_t)&llc1);
+               m_adj(*mp, sizeof(struct llc));
+       }
+
+       /*
+        * Check the IP header for alignment and errors
+        */
+       switch (ether_type) {
+       case ETHERTYPE_IP:
+               error = bridge_ip_checkbasic(mp);
+               break;
+       case ETHERTYPE_IPV6:
+               error = bridge_ip6_checkbasic(mp);
+               break;
+       default:
+               error = 0;
+               break;
+       }
+       if (error) {
+               goto bad;
+       }
+
+       error = 0;
+
+       /*
+        * Run the packet through pf rules
+        */
+       switch (ether_type) {
+       case ETHERTYPE_IP:
+               /*
+                * before calling the firewall, swap fields the same as
+                * IP does. here we assume the header is contiguous
+                */
+               ip = mtod(*mp, struct ip *);
+
+               ip->ip_len = ntohs(ip->ip_len);
+               ip->ip_off = ntohs(ip->ip_off);
+
+               if (ifp != NULL) {
+                       error = pf_af_hook(ifp, 0, mp, AF_INET, input, NULL);
+               }
+
+               if (*mp == NULL || error != 0) { /* filter may consume */
+                       break;
+               }
+
+               /* Recalculate the ip checksum and restore byte ordering */
+               ip = mtod(*mp, struct ip *);
+               hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+               if (hlen < (int)sizeof(struct ip)) {
+                       goto bad;
+               }
+               if (hlen > (*mp)->m_len) {
+                       if ((*mp = m_pullup(*mp, hlen)) == 0) {
+                               goto bad;
+                       }
+                       ip = mtod(*mp, struct ip *);
+                       if (ip == NULL) {
+                               goto bad;
+                       }
+               }
+               ip->ip_len = htons(ip->ip_len);
+               ip->ip_off = htons(ip->ip_off);
+               ip->ip_sum = 0;
+               if (hlen == sizeof(struct ip)) {
+                       ip->ip_sum = in_cksum_hdr(ip);
+               } else {
+                       ip->ip_sum = in_cksum(*mp, hlen);
+               }
+               break;
+
+       case ETHERTYPE_IPV6:
+               if (ifp != NULL) {
+                       error = pf_af_hook(ifp, 0, mp, AF_INET6, input, NULL);
+               }
+
+               if (*mp == NULL || error != 0) { /* filter may consume */
+                       break;
+               }
+               break;
+       default:
+               error = 0;
+               break;
+       }
+
+       if (*mp == NULL) {
+               return error;
+       }
+       if (error != 0) {
+               goto bad;
+       }
+
+       error = -1;
+
+       /*
+        * Finally, put everything back the way it was and return
+        */
+       if (snap) {
+               M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT, 0);
+               if (*mp == NULL) {
+                       return error;
+               }
+               bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
+       }
+
+       M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT, 0);
+       if (*mp == NULL) {
+               return error;
+       }
+       bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
+
+       return 0;
+
+bad:
+       m_freem(*mp);
+       *mp = NULL;
+       return error;
+}
index ae7754b7b8def1557b6e3242f5334fa6917f57a3..48a6555fc9eabce769527363dba1ce363c0cd063 100644 (file)
 
 #include <net/if.h>
 #include <net/ethernet.h>
+#include <netinet/in.h>
 
 /*
  * Commands used in the SIOCSDRVSPEC ioctl.  Note the lookup of the
 #define BRDGSPROTO              31      /* set protocol (ifbrparam) */
 #define BRDGSTXHC               32      /* set tx hold count (ifbrparam) */
 #define BRDGSIFAMAX             33      /* set max interface addrs (ifbreq) */
-#define BRDGGHOSTFILTER         34      /* set max interface addrs (ifbrhostfilter) */
-#define BRDGSHOSTFILTER         35      /* set max interface addrs (ifbrhostfilter) */
+#define BRDGGHOSTFILTER         34      /* get host filter (ifbrhostfilter) */
+#define BRDGSHOSTFILTER         35      /* set host filter (ifbrhostfilter) */
+#define BRDGGMACNATLIST         36      /* get MAC NAT list */
+
 
 /*
  * Generic bridge control request.
@@ -175,7 +178,7 @@ struct ifbreq {
 
 #pragma pack()
 
-/* BRDGGIFFLAGS, BRDGSIFFLAGS */
+/* BRDGGIFFLGS, BRDGSIFFLGS */
 #define IFBIF_LEARNING          0x0001  /* if can learn */
 #define IFBIF_DISCOVER          0x0002  /* if sends packets w/ unknown dest. */
 #define IFBIF_STP               0x0004  /* if participates in spanning tree */
@@ -188,10 +191,13 @@ struct ifbreq {
 #define IFBIF_BSTP_ADMEDGE      0x0200  /* member stp admin edge enabled */
 #define IFBIF_BSTP_ADMCOST      0x0400  /* member stp admin path cost */
 #define IFBIF_PRIVATE           0x0800  /* if is a private segment */
+#define IFBIF_MAC_NAT           0x8000  /* member requires MAC NAT */
 
 #define IFBIFBITS       "\020\001LEARNING\002DISCOVER\003STP\004SPAN" \
-                       "\005STICKY\014PRIVATE\006EDGE\007AUTOEDGE\010PTP" \
-                       "\011AUTOPTP"
+       "\005STICKY\006EDGE\007AUTOEDGE\010PTP"                       \
+       "\011AUTOPTP\014PRIVATE"                                      \
+       "\020MACNAT"
+
 #define IFBIFMASK       ~(IFBIF_BSTP_EDGE|IFBIF_BSTP_AUTOEDGE|IFBIF_BSTP_PTP| \
                        IFBIF_BSTP_AUTOPTP|IFBIF_BSTP_ADMEDGE| \
                        IFBIF_BSTP_ADMCOST)     /* not saved */
@@ -201,23 +207,14 @@ struct ifbreq {
 #define IFBF_FLUSHALL           0x01    /* flush all addresses */
 
 /* BRDGSFILT */
-#define IFBF_FILT_USEIPF        0x00000001 /* run pfil hooks on the bridge
+#define IFBF_FILT_USEIPF        0x00000001 /* run pf hooks on the bridge
                                            *  interface */
-#define IFBF_FILT_MEMBER        0x00000002 /* run pfil hooks on the member
+#define IFBF_FILT_MEMBER        0x00000002 /* run pf hooks on the member
                                            *  interfaces */
 #define IFBF_FILT_ONLYIP        0x00000004 /* only pass IP[46] packets when
-                                           *  pfil is enabled */
+                                           *  pf is enabled */
 #define IFBF_FILT_MASK          0x00000007 /* mask of valid values */
 
-
-/* APPLE MODIFICATION <jhw@apple.com>: Default is to pass non-IP packets. */
-#define IFBF_FILT_DEFAULT       ( IFBF_FILT_USEIPF | IFBF_FILT_MEMBER )
-#if 0
-#define IFBF_FILT_DEFAULT       (IFBF_FILT_USEIPF | \
-IFBF_FILT_MEMBER | \
-IFBF_FILT_ONLYIP)
-#endif
-
 /*
  * Interface list structure.
  */
@@ -551,5 +548,58 @@ extern u_int8_t bstp_etheraddr[ETHER_ADDR_LEN];
 int     bridgeattach(int);
 
 #endif /* XNU_KERNEL_PRIVATE */
+
+
+/*
+ * MAC NAT entry list
+ */
+
+#pragma pack(4)
+
+union ifbrip {
+       struct in_addr  ifbrip_addr;
+       struct in6_addr ifbrip_addr6;
+};
+
+struct ifbrmne {
+       char            ifbmne_ifname[IFNAMSIZ]; /* member if name */
+       uint64_t        ifbmne_expire;           /* expiration time */
+       uint8_t         ifbmne_mac[ETHER_ADDR_LEN];/* MAC address */
+       uint8_t         ifbmne_reserved;
+       uint8_t         ifbmne_af;              /* AF_INET or AF_INET6 */
+       union ifbrip    ifbmne_ip;
+};
+#define ifbmne_ip_addr  ifbmne_ip.ifbrip_addr
+#define ifbmne_ip6_addr ifbmne_ip.ifbrip_addr6
+
+#ifndef XNU_KERNEL_PRIVATE
+
+struct ifbrmnelist {
+       uint32_t        ifbml_len;      /* buffer size (multiple of elsize) */
+       uint16_t        ifbml_elsize;   /* sizeof(ifbrmacnatent) */
+       uint16_t        ifbml_pad;
+       caddr_t         ifbml_buf;
+};
+
+#else /* XNU_KERNEL_PRIVATE */
+
+struct ifbrmnelist32 {
+       uint32_t        ifbml_len;      /* buffer size */
+       uint16_t        ifbml_elsize;   /* sizeof(ifbrmacnatent) */
+       uint16_t        ifbml_pad;
+       user32_addr_t   ifbml_buf;
+};
+
+struct ifbrmnelist64 {
+       uint32_t        ifbml_len;      /* buffer size */
+       uint16_t        ifbml_elsize;   /* sizeof(ifbrmacnatent) */
+       uint16_t        ifbml_pad;
+       user64_addr_t   ifbml_buf;
+};
+
+#endif /* XNU_KERNEL_PRIVATE */
+
+#pragma pack()
+
 #endif /* PRIVATE */
 #endif /* !_NET_IF_BRIDGEVAR_H_ */
index da7f7579dc745e2aa3ba8f9b3cd3dbb1c72b3341..291aea2d75b5fe95c92cccb27d5725bfa23a6777 100644 (file)
@@ -49,6 +49,13 @@ struct ifnet_interface_advisory;
 
 #include <sys/_types/_sa_family_t.h>
 
+#ifndef PRIVATE
+#include <Availability.h>
+#define __NKE_API_DEPRECATED __API_DEPRECATED("Network Kernel Extension KPI is deprecated", macos(10.4, 10.15.4))
+#else
+#define __NKE_API_DEPRECATED
+#endif /* PRIVATE */
+
 #ifdef XNU_KERNEL_PRIVATE
 #if CONFIG_EMBEDDED
 #define KPI_INTERFACE_EMBEDDED 1
@@ -1259,7 +1266,8 @@ extern errno_t ifnet_allocate_internal(const struct ifnet_init_params *init,
        ifnet_allocate_internal((init), (interface))
 #else
 extern errno_t ifnet_allocate(const struct ifnet_init_params *init,
-    ifnet_t *interface);
+    ifnet_t *interface)
+__NKE_API_DEPRECATED;
 #endif /* KERNEL_PRIVATE */
 
 #ifdef KERNEL_PRIVATE
@@ -1664,7 +1672,8 @@ extern errno_t ifnet_disable_output(ifnet_t interface);
  *       @param interface The interface to increment the reference count of.
  *       @result May return EINVAL if the interface is not valid.
  */
-extern errno_t ifnet_reference(ifnet_t interface);
+extern errno_t ifnet_reference(ifnet_t interface)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_release
@@ -1674,7 +1683,8 @@ extern errno_t ifnet_reference(ifnet_t interface);
  *               and possibly free.
  *       @result May return EINVAL if the interface is not valid.
  */
-extern errno_t ifnet_release(ifnet_t interface);
+extern errno_t ifnet_release(ifnet_t interface)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_attach
@@ -1695,7 +1705,8 @@ extern errno_t ifnet_release(ifnet_t interface);
  *               interface.
  */
 extern errno_t ifnet_attach(ifnet_t interface,
-    const struct sockaddr_dl *ll_addr);
+    const struct sockaddr_dl *ll_addr)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_detach
@@ -1721,7 +1732,8 @@ extern errno_t ifnet_attach(ifnet_t interface,
  *       @param interface The interface to detach.
  *       @result 0 on success, otherwise errno error.
  */
-extern errno_t ifnet_detach(ifnet_t interface);
+extern errno_t ifnet_detach(ifnet_t interface)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_interface_family_find
@@ -1740,7 +1752,8 @@ extern errno_t ifnet_detach(ifnet_t interface);
  *               is rebooted.
  *       @result 0 on success, otherwise errno error.
  */
-extern errno_t ifnet_interface_family_find(const char *module_string, ifnet_family_t *family_id);
+extern errno_t ifnet_interface_family_find(const char *module_string, ifnet_family_t *family_id)
+__NKE_API_DEPRECATED;
 
 /*
  * Interface manipulation.
@@ -1752,7 +1765,8 @@ extern errno_t ifnet_interface_family_find(const char *module_string, ifnet_fami
  *       @param interface Interface to retrieve the storage from.
  *       @result Driver's private storage.
  */
-extern void *ifnet_softc(ifnet_t interface);
+extern void *ifnet_softc(ifnet_t interface)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_name
@@ -1760,7 +1774,8 @@ extern void *ifnet_softc(ifnet_t interface);
  *       @param interface Interface to retrieve the name from.
  *       @result Pointer to the name.
  */
-extern const char *ifnet_name(ifnet_t interface);
+extern const char *ifnet_name(ifnet_t interface)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_family
@@ -1768,7 +1783,8 @@ extern const char *ifnet_name(ifnet_t interface);
  *       @param interface Interface to retrieve the family from.
  *       @result Interface family type.
  */
-extern ifnet_family_t ifnet_family(ifnet_t interface);
+extern ifnet_family_t ifnet_family(ifnet_t interface)
+__NKE_API_DEPRECATED;
 
 #ifdef KERNEL_PRIVATE
 /*
@@ -1786,7 +1802,9 @@ extern ifnet_subfamily_t ifnet_subfamily(ifnet_t interface);
  *       @param interface Interface to retrieve the unit number from.
  *       @result Unit number.
  */
-extern u_int32_t ifnet_unit(ifnet_t interface);
+extern u_int32_t ifnet_unit(ifnet_t interface)
+__NKE_API_DEPRECATED;
+
 
 /*!
  *       @function ifnet_index
@@ -1798,7 +1816,8 @@ extern u_int32_t ifnet_unit(ifnet_t interface);
  *       @param interface Interface to retrieve the index of.
  *       @result Index.
  */
-extern u_int32_t ifnet_index(ifnet_t interface);
+extern u_int32_t ifnet_index(ifnet_t interface)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_set_flags
@@ -1813,7 +1832,8 @@ extern u_int32_t ifnet_index(ifnet_t interface);
  *       @result 0 on success otherwise the errno error.
  */
 extern errno_t ifnet_set_flags(ifnet_t interface, u_int16_t new_flags,
-    u_int16_t mask);
+    u_int16_t mask)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_flags
@@ -1821,8 +1841,8 @@ extern errno_t ifnet_set_flags(ifnet_t interface, u_int16_t new_flags,
  *       @param interface Interface to retrieve the flags from.
  *       @result Flags. These flags are defined in net/if.h
  */
-extern u_int16_t ifnet_flags(ifnet_t interface);
-
+extern u_int16_t ifnet_flags(ifnet_t interface)
+__NKE_API_DEPRECATED;
 
 #ifdef KERNEL_PRIVATE
 /*
@@ -2016,7 +2036,8 @@ extern errno_t ifnet_inet6_defrouter_llreachinfo(ifnet_t interface,
  *       @result 0 on success otherwise the errno error.
  */
 extern errno_t ifnet_set_capabilities_supported(ifnet_t interface, u_int32_t new_caps,
-    u_int32_t mask);
+    u_int32_t mask)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_capabilities_supported
@@ -2024,7 +2045,8 @@ extern errno_t ifnet_set_capabilities_supported(ifnet_t interface, u_int32_t new
  *       @param interface Interface to retrieve the capabilities from.
  *       @result Flags. Capabilities flags are defined in net/if.h
  */
-extern u_int32_t ifnet_capabilities_supported(ifnet_t interface);
+extern u_int32_t ifnet_capabilities_supported(ifnet_t interface)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_set_capabilities_enabled
@@ -2056,7 +2078,8 @@ extern u_int32_t ifnet_capabilities_supported(ifnet_t interface);
  *       @result 0 on success otherwise the errno error.
  */
 extern errno_t ifnet_set_capabilities_enabled(ifnet_t interface, u_int32_t new_caps,
-    u_int32_t mask);
+    u_int32_t mask)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_capabilities_enabled
@@ -2064,8 +2087,8 @@ extern errno_t ifnet_set_capabilities_enabled(ifnet_t interface, u_int32_t new_c
  *       @param interface Interface to retrieve the capabilities from.
  *       @result Flags. Capabilities flags are defined in net/if.h
  */
-extern u_int32_t ifnet_capabilities_enabled(ifnet_t interface);
-
+extern u_int32_t ifnet_capabilities_enabled(ifnet_t interface)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_set_offload
@@ -2083,7 +2106,8 @@ extern u_int32_t ifnet_capabilities_enabled(ifnet_t interface);
  *               the device supports.
  *       @result 0 on success otherwise the errno error.
  */
-extern errno_t ifnet_set_offload(ifnet_t interface, ifnet_offload_t offload);
+extern errno_t ifnet_set_offload(ifnet_t interface, ifnet_offload_t offload)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_offload
@@ -2092,7 +2116,8 @@ extern errno_t ifnet_set_offload(ifnet_t interface, ifnet_offload_t offload);
  *       @param interface Interface to retrieve the offload from.
  *       @result Abilities flags, see ifnet_offload_t.
  */
-extern ifnet_offload_t ifnet_offload(ifnet_t interface);
+extern ifnet_offload_t ifnet_offload(ifnet_t interface)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_set_tso_mtu
@@ -2105,7 +2130,8 @@ extern ifnet_offload_t ifnet_offload(ifnet_t interface);
  *       @result 0 on success otherwise the errno error.
  */
 extern errno_t ifnet_set_tso_mtu(ifnet_t interface, sa_family_t family,
-    u_int32_t mtuLen);
+    u_int32_t mtuLen)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_get_tso_mtu
@@ -2119,7 +2145,8 @@ extern errno_t ifnet_set_tso_mtu(ifnet_t interface, sa_family_t family,
  *       @result 0 on success otherwise the errno error.
  */
 extern errno_t ifnet_get_tso_mtu(ifnet_t interface, sa_family_t family,
-    u_int32_t *mtuLen);
+    u_int32_t *mtuLen)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @enum Interface wake properties
@@ -2139,7 +2166,8 @@ enum {
  *       @param mask Mask of the properties to set of unset.
  *       @result 0 on success otherwise the errno error.
  */
-extern errno_t ifnet_set_wake_flags(ifnet_t interface, u_int32_t properties, u_int32_t mask);
+extern errno_t ifnet_set_wake_flags(ifnet_t interface, u_int32_t properties, u_int32_t mask)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_get_wake_flags
@@ -2147,7 +2175,8 @@ extern errno_t ifnet_set_wake_flags(ifnet_t interface, u_int32_t properties, u_i
  *       @param interface The interface.
  *       @result The wake properties
  */
-extern u_int32_t ifnet_get_wake_flags(ifnet_t interface);
+extern u_int32_t ifnet_get_wake_flags(ifnet_t interface)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_set_link_mib_data
@@ -2166,7 +2195,8 @@ extern u_int32_t ifnet_get_wake_flags(ifnet_t interface);
  *       @result 0 on success otherwise the errno error.
  */
 extern errno_t ifnet_set_link_mib_data(ifnet_t interface, void *mibData,
-    u_int32_t mibLen);
+    u_int32_t mibLen)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_get_link_mib_data
@@ -2183,7 +2213,8 @@ extern errno_t ifnet_set_link_mib_data(ifnet_t interface, void *mibData,
  *               no data.
  */
 extern errno_t ifnet_get_link_mib_data(ifnet_t interface, void *mibData,
-    u_int32_t *mibLen);
+    u_int32_t *mibLen)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_get_link_mib_data_length
@@ -2192,7 +2223,8 @@ extern errno_t ifnet_get_link_mib_data(ifnet_t interface, void *mibData,
  *       @result Returns the number of bytes of mib data associated with the
  *               interface.
  */
-extern u_int32_t ifnet_get_link_mib_data_length(ifnet_t interface);
+extern u_int32_t ifnet_get_link_mib_data_length(ifnet_t interface)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_attach_protocol
@@ -2205,7 +2237,8 @@ extern u_int32_t ifnet_get_link_mib_data_length(ifnet_t interface);
  */
 extern errno_t ifnet_attach_protocol(ifnet_t interface,
     protocol_family_t protocol_family,
-    const struct ifnet_attach_proto_param *proto_details);
+    const struct ifnet_attach_proto_param *proto_details)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_attach_protocol_v2
@@ -2220,7 +2253,8 @@ extern errno_t ifnet_attach_protocol(ifnet_t interface,
  */
 extern errno_t ifnet_attach_protocol_v2(ifnet_t interface,
     protocol_family_t protocol_family,
-    const struct ifnet_attach_proto_param_v2 *proto_details);
+    const struct ifnet_attach_proto_param_v2 *proto_details)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_detach_protocol
@@ -2231,7 +2265,8 @@ extern errno_t ifnet_attach_protocol_v2(ifnet_t interface,
  *       @result 0 on success otherwise the errno error.
  */
 extern errno_t ifnet_detach_protocol(ifnet_t interface,
-    protocol_family_t protocol_family);
+    protocol_family_t protocol_family)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_output
@@ -2257,7 +2292,8 @@ extern errno_t ifnet_detach_protocol(ifnet_t interface,
  */
 extern errno_t ifnet_output(ifnet_t interface,
     protocol_family_t protocol_family, mbuf_t packet, void *route,
-    const struct sockaddr *dest);
+    const struct sockaddr *dest)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_output_raw
@@ -2277,7 +2313,8 @@ extern errno_t ifnet_output(ifnet_t interface,
  *       @result 0 on success otherwise the errno error.
  */
 extern errno_t ifnet_output_raw(ifnet_t interface,
-    protocol_family_t protocol_family, mbuf_t packet);
+    protocol_family_t protocol_family, mbuf_t packet)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_input
@@ -2294,7 +2331,8 @@ extern errno_t ifnet_output_raw(ifnet_t interface,
  *       @result 0 on success otherwise the errno error.
  */
 extern errno_t ifnet_input(ifnet_t interface, mbuf_t first_packet,
-    const struct ifnet_stat_increment_param *stats);
+    const struct ifnet_stat_increment_param *stats)
+__NKE_API_DEPRECATED;
 
 #ifdef KERNEL_PRIVATE
 /*
@@ -2335,7 +2373,8 @@ extern errno_t ifnet_input_extended(ifnet_t interface, mbuf_t first_packet,
  *       @result 0 on success otherwise the errno error.
  */
 extern errno_t ifnet_ioctl(ifnet_t interface, protocol_family_t protocol,
-    unsigned long ioctl_code, void *ioctl_arg);
+    unsigned long ioctl_code, void *ioctl_arg)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_event
@@ -2345,7 +2384,8 @@ extern errno_t ifnet_ioctl(ifnet_t interface, protocol_family_t protocol,
  *               event.
  *       @result 0 on success otherwise the errno error.
  */
-extern errno_t ifnet_event(ifnet_t interface, struct kern_event_msg *event_ptr);
+extern errno_t ifnet_event(ifnet_t interface, struct kern_event_msg *event_ptr)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_set_mtu
@@ -2360,21 +2400,24 @@ extern errno_t ifnet_event(ifnet_t interface, struct kern_event_msg *event_ptr);
  *       @param mtu The new MTU.
  *       @result 0 on success otherwise the errno error.
  */
-extern errno_t ifnet_set_mtu(ifnet_t interface, u_int32_t mtu);
+extern errno_t ifnet_set_mtu(ifnet_t interface, u_int32_t mtu)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_mtu
  *       @param interface The interface.
  *       @result The MTU.
  */
-extern u_int32_t ifnet_mtu(ifnet_t interface);
+extern u_int32_t ifnet_mtu(ifnet_t interface)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_type
  *       @param interface The interface.
  *       @result The type. See net/if_types.h.
  */
-extern u_int8_t ifnet_type(ifnet_t interface);
+extern u_int8_t ifnet_type(ifnet_t interface)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_set_addrlen
@@ -2386,14 +2429,16 @@ extern u_int8_t ifnet_type(ifnet_t interface);
  *       @param addrlen The new address length.
  *       @result 0 on success otherwise the errno error.
  */
-extern errno_t ifnet_set_addrlen(ifnet_t interface, u_int8_t addrlen);
+extern errno_t ifnet_set_addrlen(ifnet_t interface, u_int8_t addrlen)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_addrlen
  *       @param interface The interface.
  *       @result The address length.
  */
-extern u_int8_t ifnet_addrlen(ifnet_t interface);
+extern u_int8_t ifnet_addrlen(ifnet_t interface)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_set_hdrlen
@@ -2405,14 +2450,16 @@ extern u_int8_t ifnet_addrlen(ifnet_t interface);
  *       @param hdrlen The new header length.
  *       @result 0 on success otherwise the errno error.
  */
-extern errno_t ifnet_set_hdrlen(ifnet_t interface, u_int8_t hdrlen);
+extern errno_t ifnet_set_hdrlen(ifnet_t interface, u_int8_t hdrlen)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_hdrlen
  *       @param interface The interface.
  *       @result The header length.
  */
-extern u_int8_t ifnet_hdrlen(ifnet_t interface);
+extern u_int8_t ifnet_hdrlen(ifnet_t interface)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_set_metric
@@ -2424,14 +2471,16 @@ extern u_int8_t ifnet_hdrlen(ifnet_t interface);
  *       @param metric The new metric.
  *       @result 0 on success otherwise the errno error.
  */
-extern errno_t ifnet_set_metric(ifnet_t interface, u_int32_t metric);
+extern errno_t ifnet_set_metric(ifnet_t interface, u_int32_t metric)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_metric
  *       @param interface The interface.
  *       @result The metric.
  */
-extern u_int32_t ifnet_metric(ifnet_t interface);
+extern u_int32_t ifnet_metric(ifnet_t interface)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_set_baudrate
@@ -2443,14 +2492,16 @@ extern u_int32_t ifnet_metric(ifnet_t interface);
  *       @param baudrate The new baudrate.
  *       @result 0 on success otherwise the errno error.
  */
-extern errno_t ifnet_set_baudrate(ifnet_t interface, u_int64_t baudrate);
+extern errno_t ifnet_set_baudrate(ifnet_t interface, u_int64_t baudrate)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_baudrate
  *       @param interface The interface.
  *       @result The baudrate.
  */
-extern u_int64_t ifnet_baudrate(ifnet_t interface);
+extern u_int64_t ifnet_baudrate(ifnet_t interface)
+__NKE_API_DEPRECATED;
 
 #ifdef KERNEL_PRIVATE
 typedef struct if_bandwidths if_bandwidths_t;
@@ -2535,7 +2586,8 @@ extern errno_t ifnet_latencies(ifnet_t interface, if_latencies_t *output_lt,
  *       @result 0 on success otherwise the errno error.
  */
 extern errno_t ifnet_stat_increment(ifnet_t interface,
-    const struct ifnet_stat_increment_param *counts);
+    const struct ifnet_stat_increment_param *counts)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_stat_increment_in
@@ -2555,7 +2607,8 @@ extern errno_t ifnet_stat_increment(ifnet_t interface,
  *       @result 0 on success otherwise the errno error.
  */
 extern errno_t ifnet_stat_increment_in(ifnet_t interface,
-    u_int32_t packets_in, u_int32_t bytes_in, u_int32_t errors_in);
+    u_int32_t packets_in, u_int32_t bytes_in, u_int32_t errors_in)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_stat_increment_out
@@ -2574,7 +2627,8 @@ extern errno_t ifnet_stat_increment_in(ifnet_t interface,
  *       @result 0 on success otherwise the errno error.
  */
 extern errno_t ifnet_stat_increment_out(ifnet_t interface,
-    u_int32_t packets_out, u_int32_t bytes_out, u_int32_t errors_out);
+    u_int32_t packets_out, u_int32_t bytes_out, u_int32_t errors_out)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_set_stat
@@ -2590,7 +2644,8 @@ extern errno_t ifnet_stat_increment_out(ifnet_t interface,
  *       @result 0 on success otherwise the errno error.
  */
 extern errno_t ifnet_set_stat(ifnet_t interface,
-    const struct ifnet_stats_param *stats);
+    const struct ifnet_stats_param *stats)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_stat
@@ -2599,7 +2654,8 @@ extern errno_t ifnet_set_stat(ifnet_t interface,
  *       @result 0 on success otherwise the errno error.
  */
 extern errno_t ifnet_stat(ifnet_t interface,
-    struct ifnet_stats_param *out_stats);
+    struct ifnet_stats_param *out_stats)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_set_promiscuous
@@ -2616,7 +2672,8 @@ extern errno_t ifnet_stat(ifnet_t interface,
  *               zero, promiscuous mode will be disabled.
  *       @result 0 on success otherwise the errno error.
  */
-extern errno_t ifnet_set_promiscuous(ifnet_t interface, int on);
+extern errno_t ifnet_set_promiscuous(ifnet_t interface, int on)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_touch_lastchange
@@ -2624,7 +2681,8 @@ extern errno_t ifnet_set_promiscuous(ifnet_t interface, int on);
  *       @param interface The interface.
  *       @result 0 on success otherwise the errno error.
  */
-extern errno_t ifnet_touch_lastchange(ifnet_t interface);
+extern errno_t ifnet_touch_lastchange(ifnet_t interface)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_lastchange
@@ -2632,7 +2690,8 @@ extern errno_t ifnet_touch_lastchange(ifnet_t interface);
  *       @param last_change A timeval struct to copy the last time changed in
  *               to.
  */
-extern errno_t ifnet_lastchange(ifnet_t interface, struct timeval *last_change);
+extern errno_t ifnet_lastchange(ifnet_t interface, struct timeval *last_change)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_get_address_list
@@ -2647,7 +2706,8 @@ extern errno_t ifnet_lastchange(ifnet_t interface, struct timeval *last_change);
  *       @param addresses A pointer to a NULL terminated array of ifaddr_ts.
  *       @result 0 on success otherwise the errno error.
  */
-extern errno_t ifnet_get_address_list(ifnet_t interface, ifaddr_t **addresses);
+extern errno_t ifnet_get_address_list(ifnet_t interface, ifaddr_t **addresses)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_get_address_list_family
@@ -2665,7 +2725,8 @@ extern errno_t ifnet_get_address_list(ifnet_t interface, ifaddr_t **addresses);
  *       @result 0 on success otherwise the errno error.
  */
 extern errno_t ifnet_get_address_list_family(ifnet_t interface,
-    ifaddr_t **addresses, sa_family_t family);
+    ifaddr_t **addresses, sa_family_t family)
+__NKE_API_DEPRECATED;
 
 #ifdef KERNEL_PRIVATE
 /*!
@@ -2693,7 +2754,8 @@ __private_extern__ errno_t ifnet_get_address_list_family_internal(ifnet_t,
  *               memory used for the array of references.
  *       @param addresses An array of ifaddr_ts.
  */
-extern void ifnet_free_address_list(ifaddr_t *addresses);
+extern void ifnet_free_address_list(ifaddr_t *addresses)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_set_lladdr
@@ -2705,7 +2767,8 @@ extern void ifnet_free_address_list(ifaddr_t *addresses);
  *       @param lladdr_len The length, in bytes, of the link layer address.
  */
 extern errno_t ifnet_set_lladdr(ifnet_t interface, const void *lladdr,
-    size_t lladdr_len);
+    size_t lladdr_len)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_lladdr_copy_bytes
@@ -2717,7 +2780,8 @@ extern errno_t ifnet_set_lladdr(ifnet_t interface, const void *lladdr,
  *               length of the link-layer address.
  */
 extern errno_t ifnet_lladdr_copy_bytes(ifnet_t interface, void *lladdr,
-    size_t length);
+    size_t length)
+__NKE_API_DEPRECATED;
 
 #ifdef KERNEL_PRIVATE
 /*!
@@ -2739,6 +2803,7 @@ extern errno_t ifnet_guarded_lladdr_copy_bytes(ifnet_t interface, void *lladdr,
  *       @param interface The interface the link-layer address is on.
  */
 extern void *ifnet_lladdr(ifnet_t interface);
+
 #endif /* KERNEL_PRIVATE */
 
 /*!
@@ -2751,7 +2816,8 @@ extern void *ifnet_lladdr(ifnet_t interface);
  *       @param out_len On return, the length of the broadcast address.
  */
 extern errno_t ifnet_llbroadcast_copy_bytes(ifnet_t interface, void *addr,
-    size_t bufferlen, size_t *out_len);
+    size_t bufferlen, size_t *out_len)
+__NKE_API_DEPRECATED;
 
 #ifdef KERNEL_PRIVATE
 /*!
@@ -2767,7 +2833,8 @@ extern errno_t ifnet_llbroadcast_copy_bytes(ifnet_t interface, void *addr,
  *       @param type The link-layer address type.
  */
 extern errno_t ifnet_set_lladdr_and_type(ifnet_t interface, const void *lladdr,
-    size_t length, u_char type);
+    size_t length, u_char type)
+__NKE_API_DEPRECATED;
 #endif /* KERNEL_PRIVATE */
 
 /*!
@@ -2785,7 +2852,8 @@ extern errno_t ifnet_set_lladdr_and_type(ifnet_t interface, const void *lladdr,
  *               indicate other failures.
  */
 extern errno_t ifnet_resolve_multicast(ifnet_t ifp,
-    const struct sockaddr *proto_addr, struct sockaddr *ll_addr, size_t ll_len);
+    const struct sockaddr *proto_addr, struct sockaddr *ll_addr, size_t ll_len)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_add_multicast
@@ -2802,7 +2870,8 @@ extern errno_t ifnet_resolve_multicast(ifnet_t ifp,
  *       @result 0 on success otherwise the errno error.
  */
 extern errno_t ifnet_add_multicast(ifnet_t interface,
-    const struct sockaddr *maddr, ifmultiaddr_t *multicast);
+    const struct sockaddr *maddr, ifmultiaddr_t *multicast)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_remove_multicast
@@ -2828,7 +2897,8 @@ extern errno_t ifnet_add_multicast(ifnet_t interface,
  *       @param multicast The multicast to be removed.
  *       @result 0 on success otherwise the errno error.
  */
-extern errno_t ifnet_remove_multicast(ifmultiaddr_t multicast);
+extern errno_t ifnet_remove_multicast(ifmultiaddr_t multicast)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_get_multicast_list
@@ -2844,7 +2914,8 @@ extern errno_t ifnet_remove_multicast(ifmultiaddr_t multicast);
  *       @result 0 on success otherwise the errno error.
  */
 extern errno_t ifnet_get_multicast_list(ifnet_t interface,
-    ifmultiaddr_t **addresses);
+    ifmultiaddr_t **addresses)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_free_multicast_list
@@ -2853,7 +2924,8 @@ extern errno_t ifnet_get_multicast_list(ifnet_t interface,
  *               multicast address and frees the array.
  *       @param multicasts An array of references to the multicast addresses.
  */
-extern void ifnet_free_multicast_list(ifmultiaddr_t *multicasts);
+extern void ifnet_free_multicast_list(ifmultiaddr_t *multicasts)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_find_by_name
@@ -2866,7 +2938,8 @@ extern void ifnet_free_multicast_list(ifmultiaddr_t *multicasts);
  *               filled in if a matching interface is found.
  *       @result 0 on success otherwise the errno error.
  */
-extern errno_t ifnet_find_by_name(const char *ifname, ifnet_t *interface);
+extern errno_t ifnet_find_by_name(const char *ifname, ifnet_t *interface)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifnet_list_get
@@ -2883,7 +2956,8 @@ extern errno_t ifnet_find_by_name(const char *ifname, ifnet_t *interface);
  *       @result 0 on success otherwise the errno error.
  */
 extern errno_t ifnet_list_get(ifnet_family_t family, ifnet_t **interfaces,
-    u_int32_t *count);
+    u_int32_t *count)
+__NKE_API_DEPRECATED;
 
 #ifdef KERNEL_PRIVATE
 /*!
@@ -2903,6 +2977,7 @@ extern errno_t ifnet_list_get(ifnet_family_t family, ifnet_t **interfaces,
  */
 extern errno_t ifnet_list_get_all(ifnet_family_t family, ifnet_t **interfaces,
     u_int32_t *count);
+
 #endif /* KERNEL_PRIVATE */
 
 /*!
@@ -2914,7 +2989,8 @@ extern errno_t ifnet_list_get_all(ifnet_family_t family, ifnet_t **interfaces,
  *               ifnet_list_free.
  *       @param interfaces An array of interface references from ifnet_list_get.
  */
-extern void ifnet_list_free(ifnet_t *interfaces);
+extern void ifnet_list_free(ifnet_t *interfaces)
+__NKE_API_DEPRECATED;
 
 /******************************************************************************/
 /* ifaddr_t accessors                                                         */
@@ -2927,7 +3003,8 @@ extern void ifnet_list_free(ifnet_t *interfaces);
  *       @param ifaddr The interface address.
  *       @result 0 upon success
  */
-extern errno_t ifaddr_reference(ifaddr_t ifaddr);
+extern errno_t ifaddr_reference(ifaddr_t ifaddr)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifaddr_release
@@ -2936,7 +3013,8 @@ extern errno_t ifaddr_reference(ifaddr_t ifaddr);
  *       @param ifaddr The interface address.
  *       @result 0 upon success
  */
-extern errno_t ifaddr_release(ifaddr_t ifaddr);
+extern errno_t ifaddr_release(ifaddr_t ifaddr)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifaddr_address
@@ -2947,7 +3025,8 @@ extern errno_t ifaddr_release(ifaddr_t ifaddr);
  *       @result 0 upon success
  */
 extern errno_t ifaddr_address(ifaddr_t ifaddr, struct sockaddr *out_addr,
-    u_int32_t addr_size);
+    u_int32_t addr_size)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifaddr_address
@@ -2955,7 +3034,8 @@ extern errno_t ifaddr_address(ifaddr_t ifaddr, struct sockaddr *out_addr,
  *       @param ifaddr The interface address.
  *       @result 0 on failure, address family on success.
  */
-extern sa_family_t ifaddr_address_family(ifaddr_t ifaddr);
+extern sa_family_t ifaddr_address_family(ifaddr_t ifaddr)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifaddr_dstaddress
@@ -2966,7 +3046,8 @@ extern sa_family_t ifaddr_address_family(ifaddr_t ifaddr);
  *       @result 0 upon success
  */
 extern errno_t ifaddr_dstaddress(ifaddr_t ifaddr, struct sockaddr *out_dstaddr,
-    u_int32_t dstaddr_size);
+    u_int32_t dstaddr_size)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifaddr_netmask
@@ -2977,7 +3058,8 @@ extern errno_t ifaddr_dstaddress(ifaddr_t ifaddr, struct sockaddr *out_dstaddr,
  *       @result 0 upon success
  */
 extern errno_t ifaddr_netmask(ifaddr_t ifaddr, struct sockaddr *out_netmask,
-    u_int32_t netmask_size);
+    u_int32_t netmask_size)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifaddr_ifnet
@@ -2989,7 +3071,8 @@ extern errno_t ifaddr_netmask(ifaddr_t ifaddr, struct sockaddr *out_netmask,
  *       @param ifaddr The interface address.
  *       @result A reference to the interface the address is attached to.
  */
-extern ifnet_t ifaddr_ifnet(ifaddr_t ifaddr);
+extern ifnet_t ifaddr_ifnet(ifaddr_t ifaddr)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifaddr_withaddr
@@ -2999,7 +3082,8 @@ extern ifnet_t ifaddr_ifnet(ifaddr_t ifaddr);
  *       @param address The address to search for.
  *       @result A reference to the interface address.
  */
-extern ifaddr_t ifaddr_withaddr(const struct sockaddr *address);
+extern ifaddr_t ifaddr_withaddr(const struct sockaddr *address)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifaddr_withdstaddr
@@ -3010,8 +3094,8 @@ extern ifaddr_t ifaddr_withaddr(const struct sockaddr *address);
  *       @param destination The destination to search for.
  *       @result A reference to the interface address.
  */
-extern ifaddr_t ifaddr_withdstaddr(const struct sockaddr *destination);
-
+extern ifaddr_t ifaddr_withdstaddr(const struct sockaddr *destination)
+__NKE_API_DEPRECATED;
 /*!
  *       @function ifaddr_withnet
  *       @discussion Returns an interface address for the interface with the
@@ -3021,7 +3105,8 @@ extern ifaddr_t ifaddr_withdstaddr(const struct sockaddr *destination);
  *       @param net The network to search for.
  *       @result A reference to the interface address.
  */
-extern ifaddr_t ifaddr_withnet(const struct sockaddr *net);
+extern ifaddr_t ifaddr_withnet(const struct sockaddr *net)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifaddr_withroute
@@ -3035,7 +3120,8 @@ extern ifaddr_t ifaddr_withnet(const struct sockaddr *net);
  *       @result A reference to the interface address.
  */
 extern ifaddr_t ifaddr_withroute(int flags, const struct sockaddr *destination,
-    const struct sockaddr *gateway);
+    const struct sockaddr *gateway)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifaddr_findbestforaddr
@@ -3048,7 +3134,8 @@ extern ifaddr_t ifaddr_withroute(int flags, const struct sockaddr *destination,
  *       @result A reference to the interface address.
  */
 extern ifaddr_t ifaddr_findbestforaddr(const struct sockaddr *addr,
-    ifnet_t interface);
+    ifnet_t interface)
+__NKE_API_DEPRECATED;
 
 /******************************************************************************/
 /* ifmultiaddr_t accessors                                                    */
@@ -3061,7 +3148,8 @@ extern ifaddr_t ifaddr_findbestforaddr(const struct sockaddr *addr,
  *       @param ifmaddr The interface multicast address.
  *       @result 0 on success. Only error will be EINVAL if ifmaddr is not valid.
  */
-extern errno_t ifmaddr_reference(ifmultiaddr_t ifmaddr);
+extern errno_t ifmaddr_reference(ifmultiaddr_t ifmaddr)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifmaddr_release
@@ -3072,7 +3160,8 @@ extern errno_t ifmaddr_reference(ifmultiaddr_t ifmaddr);
  *       @param ifmaddr The interface multicast address.
  *       @result 0 on success. Only error will be EINVAL if ifmaddr is not valid.
  */
-extern errno_t ifmaddr_release(ifmultiaddr_t ifmaddr);
+extern errno_t ifmaddr_release(ifmultiaddr_t ifmaddr)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifmaddr_address
@@ -3082,7 +3171,8 @@ extern errno_t ifmaddr_release(ifmultiaddr_t ifmaddr);
  *       @result 0 on success.
  */
 extern errno_t ifmaddr_address(ifmultiaddr_t ifmaddr,
-    struct sockaddr *out_multicast, u_int32_t addr_size);
+    struct sockaddr *out_multicast, u_int32_t addr_size)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifmaddr_lladdress
@@ -3093,7 +3183,8 @@ extern errno_t ifmaddr_address(ifmultiaddr_t ifmaddr,
  *       @result 0 on success.
  */
 extern errno_t ifmaddr_lladdress(ifmultiaddr_t ifmaddr,
-    struct sockaddr *out_link_layer_multicast, u_int32_t addr_size);
+    struct sockaddr *out_link_layer_multicast, u_int32_t addr_size)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function ifmaddr_ifnet
@@ -3106,7 +3197,8 @@ extern errno_t ifmaddr_lladdress(ifmultiaddr_t ifmaddr,
  *       @param ifmaddr The interface multicast address.
  *       @result A reference to the interface.
  */
-extern ifnet_t ifmaddr_ifnet(ifmultiaddr_t ifmaddr);
+extern ifnet_t ifmaddr_ifnet(ifmultiaddr_t ifmaddr)
+__NKE_API_DEPRECATED;
 
 #ifdef KERNEL_PRIVATE
 /******************************************************************************/
index dd16bd7d426af63d726138d450878c4e4a1d7bfe..819112d7a45276305957f0d429e15f2c3989a66e 100644 (file)
 #include <sys/kernel_types.h>
 #include <net/kpi_interface.h>
 
+#ifndef PRIVATE
+#include <Availability.h>
+#define __NKE_API_DEPRECATED __API_DEPRECATED("Network Kernel Extension KPI is deprecated", macos(10.4, 10.15.4))
+#else
+#define __NKE_API_DEPRECATED
+#endif /* PRIVATE */
+
 struct kev_msg;
 
 __BEGIN_DECLS
@@ -212,7 +219,8 @@ extern errno_t iflt_attach_internal(ifnet_t interface, const struct iff_filter *
        iflt_attach_internal((interface), (filter), (filter_ref))
 #else
 extern errno_t iflt_attach(ifnet_t interface, const struct iff_filter *filter,
-    interface_filter_t *filter_ref);
+    interface_filter_t *filter_ref)
+__NKE_API_DEPRECATED;
 #endif /* KERNEL_PRIVATE */
 
 /*!
@@ -220,7 +228,8 @@ extern errno_t iflt_attach(ifnet_t interface, const struct iff_filter *filter,
  *       @discussion Detaches an interface filter from an interface.
  *       @param filter_ref The reference to the filter from iflt_attach.
  */
-extern void iflt_detach(interface_filter_t filter_ref);
+extern void iflt_detach(interface_filter_t filter_ref)
+__NKE_API_DEPRECATED;
 
 __END_DECLS
 #endif /* __KPI_INTERFACEFILTER__ */
index f8b2ee8a469ebe9bd40de4718b5754671c6fbbd5..f7ba31c2dd5fce8e1a065819b40c9bd65462c3c5 100644 (file)
 #include <sys/kernel_types.h>
 #include <net/kpi_interface.h>
 
+#ifndef PRIVATE
+#include <Availability.h>
+#define __NKE_API_DEPRECATED __API_DEPRECATED("Network Kernel Extension KPI is deprecated", macos(10.4, 10.15.4))
+#else
+#define __NKE_API_DEPRECATED
+#endif /* PRIVATE */
+
 __BEGIN_DECLS
 
 /******************************************************************************/
@@ -103,7 +110,8 @@ extern void proto_unregister_input(protocol_family_t protocol);
  *       @result A errno error on failure. Unless proto_input returns zero,
  *               the caller is responsible for freeing the mbuf.
  */
-extern errno_t proto_input(protocol_family_t protocol, mbuf_t packet);
+extern errno_t proto_input(protocol_family_t protocol, mbuf_t packet)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function proto_inject
@@ -115,7 +123,8 @@ extern errno_t proto_input(protocol_family_t protocol, mbuf_t packet);
  *       @result A errno error on failure. Unless proto_inject returns zero,
  *               the caller is responsible for freeing the mbuf.
  */
-extern errno_t proto_inject(protocol_family_t protocol, mbuf_t packet);
+extern errno_t proto_inject(protocol_family_t protocol, mbuf_t packet)
+__NKE_API_DEPRECATED;
 
 
 /******************************************************************************/
@@ -164,7 +173,8 @@ typedef void (*proto_unplumb_handler)(ifnet_t ifp, protocol_family_t protocol);
  */
 extern errno_t proto_register_plumber(protocol_family_t proto_fam,
     ifnet_family_t if_fam, proto_plumb_handler plumb,
-    proto_unplumb_handler unplumb);
+    proto_unplumb_handler unplumb)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function proto_unregister_plumber
@@ -174,7 +184,8 @@ extern errno_t proto_register_plumber(protocol_family_t proto_fam,
  *       @param if_fam The interface family these plumbing functions handle.
  */
 extern void proto_unregister_plumber(protocol_family_t proto_fam,
-    ifnet_family_t if_fam);
+    ifnet_family_t if_fam)
+__NKE_API_DEPRECATED;
 
 #ifdef BSD_KERNEL_PRIVATE
 /*
index 2133f31f3601376d03be95e0c2f2eedc550860c0..d410484cac3196610fc9dfed59718cf23b5e9c6e 100644 (file)
@@ -286,7 +286,8 @@ struct necp_socket_info {
        errno_t cred_result;
        unsigned has_client : 1;
        unsigned is_platform_binary : 1;
-       unsigned __pad_bits : 6;
+       unsigned used_responsible_pid : 1;
+       unsigned __pad_bits : 5;
 };
 
 static  lck_grp_attr_t  *necp_kernel_policy_grp_attr    = NULL;
@@ -956,7 +957,8 @@ necp_session_set_session_priority(struct necp_session *session, struct necp_sess
 
        // Enforce special session priorities with entitlements
        if (requested_session_priority == NECP_SESSION_PRIORITY_CONTROL ||
-           requested_session_priority == NECP_SESSION_PRIORITY_PRIVILEGED_TUNNEL) {
+           requested_session_priority == NECP_SESSION_PRIORITY_PRIVILEGED_TUNNEL ||
+           requested_session_priority == NECP_SESSION_PRIORITY_HIGH_RESTRICTED) {
                errno_t cred_result = priv_check_cred(kauth_cred_get(), PRIV_NET_PRIVILEGED_NECP_POLICIES, 0);
                if (cred_result != 0) {
                        NECPLOG(LOG_ERR, "Session does not hold necessary entitlement to claim priority level %d", requested_session_priority);
@@ -1788,6 +1790,10 @@ necp_policy_result_is_valid(u_int8_t *buffer, u_int32_t length)
        u_int32_t parameter_length = necp_policy_result_get_parameter_length_from_buffer(buffer, length);
        switch (type) {
        case NECP_POLICY_RESULT_PASS:
+               if (parameter_length == 0 || parameter_length == sizeof(u_int32_t)) {
+                       validated = TRUE;
+               }
+               break;
        case NECP_POLICY_RESULT_DROP:
        case NECP_POLICY_RESULT_ROUTE_RULES:
        case NECP_POLICY_RESULT_SCOPED_DIRECT:
@@ -3507,6 +3513,12 @@ necp_policy_apply(struct necp_session *session, struct necp_session_policy *poli
        ultimate_result = necp_policy_get_result_type(policy);
        switch (ultimate_result) {
        case NECP_POLICY_RESULT_PASS: {
+               u_int32_t pass_flags = 0;
+               if (necp_policy_result_get_parameter_length_from_buffer(policy->result, policy->result_size) > 0) {
+                       if (necp_policy_get_result_parameter(policy, (u_int8_t *)&pass_flags, sizeof(pass_flags))) {
+                               ultimate_result_parameter.pass_flags = pass_flags;
+                       }
+               }
                if (socket_only_conditions) {         // socket_ip_conditions can be TRUE or FALSE
                        socket_layer_non_id_conditions = TRUE;
                        ip_output_layer_id_condition = TRUE;
@@ -4016,7 +4028,7 @@ necp_get_result_description(char *result_string, necp_kernel_policy_result resul
                break;
        }
        case NECP_KERNEL_POLICY_RESULT_PASS: {
-               snprintf(result_string, MAX_RESULT_STRING_LEN, "Pass");
+               snprintf(result_string, MAX_RESULT_STRING_LEN, "Pass (%X)", result_parameter.pass_flags);
                break;
        }
        case NECP_KERNEL_POLICY_RESULT_SKIP: {
@@ -5928,7 +5940,7 @@ necp_get_parent_cred_result(proc_t proc, struct necp_socket_info *info)
 
 #define NECP_KERNEL_ADDRESS_TYPE_CONDITIONS (NECP_KERNEL_CONDITION_LOCAL_START | NECP_KERNEL_CONDITION_LOCAL_END | NECP_KERNEL_CONDITION_LOCAL_PREFIX | NECP_KERNEL_CONDITION_REMOTE_START | NECP_KERNEL_CONDITION_REMOTE_END | NECP_KERNEL_CONDITION_REMOTE_PREFIX | NECP_KERNEL_CONDITION_LOCAL_EMPTY | NECP_KERNEL_CONDITION_REMOTE_EMPTY | NECP_KERNEL_CONDITION_LOCAL_NETWORKS)
 static void
-necp_application_fillout_info_locked(uuid_t application_uuid, uuid_t real_application_uuid, char *account, char *domain, pid_t pid, uid_t uid, u_int16_t protocol, u_int32_t bound_interface_index, u_int32_t traffic_class, union necp_sockaddr_union *local_addr, union necp_sockaddr_union *remote_addr, u_int16_t local_port, u_int16_t remote_port, bool has_client, proc_t proc, u_int32_t drop_order, u_int32_t client_flags, struct necp_socket_info *info)
+necp_application_fillout_info_locked(uuid_t application_uuid, uuid_t real_application_uuid, uuid_t responsible_application_uuid, char *account, char *domain, pid_t pid, uid_t uid, u_int16_t protocol, u_int32_t bound_interface_index, u_int32_t traffic_class, union necp_sockaddr_union *local_addr, union necp_sockaddr_union *remote_addr, u_int16_t local_port, u_int16_t remote_port, bool has_client, proc_t proc, u_int32_t drop_order, u_int32_t client_flags, struct necp_socket_info *info)
 {
        memset(info, 0, sizeof(struct necp_socket_info));
 
@@ -5971,6 +5983,15 @@ necp_application_fillout_info_locked(uuid_t application_uuid, uuid_t real_applic
                }
        }
 
+       if (necp_kernel_application_policies_condition_mask & NECP_KERNEL_CONDITION_APP_ID && !uuid_is_null(responsible_application_uuid)) {
+               struct necp_uuid_id_mapping *existing_mapping = necp_uuid_lookup_app_id_locked(responsible_application_uuid);
+               if (existing_mapping != NULL) {
+                       info->real_application_id = info->application_id;
+                       info->application_id = existing_mapping->id;
+                       info->used_responsible_pid = true;
+               }
+       }
+
        if (necp_kernel_application_policies_condition_mask & NECP_KERNEL_CONDITION_ACCOUNT_ID && account != NULL) {
                struct necp_string_id_mapping *existing_mapping = necp_lookup_string_to_id_locked(&necp_account_id_list, account);
                if (existing_mapping) {
@@ -6046,7 +6067,8 @@ necp_application_find_policy_match_internal(proc_t proc,
     struct necp_client_endpoint *returned_v4_gateway,
     struct necp_client_endpoint *returned_v6_gateway,
     struct rtentry **returned_route, bool ignore_address,
-    bool has_client)
+    bool has_client,
+    uuid_t *returned_override_euuid)
 {
        int error = 0;
        size_t offset = 0;
@@ -6090,6 +6112,8 @@ necp_application_find_policy_match_internal(proc_t proc,
        uuid_clear(real_application_uuid);
        proc_getexecutableuuid(proc, real_application_uuid, sizeof(real_application_uuid));
        uuid_copy(application_uuid, real_application_uuid);
+       uuid_t responsible_application_uuid;
+       uuid_clear(responsible_application_uuid);
 
        char *domain = NULL;
        char *account = NULL;
@@ -6108,6 +6132,11 @@ necp_application_find_policy_match_internal(proc_t proc,
        bool has_checked_delegation_entitlement = FALSE;
        bool has_delegation_entitlement = FALSE;
 
+#if defined(XNU_TARGET_OS_OSX)
+       proc_t effective_proc = proc;
+       bool release_eproc = false;
+#endif /* defined(XNU_TARGET_OS_OSX) */
+
        if (returned_result == NULL) {
                return EINVAL;
        }
@@ -6120,6 +6149,10 @@ necp_application_find_policy_match_internal(proc_t proc,
                memset(returned_v6_gateway, 0, sizeof(struct necp_client_endpoint));
        }
 
+       if (returned_override_euuid != NULL) {
+               uuid_clear(*returned_override_euuid);
+       }
+
        memset(returned_result, 0, sizeof(struct necp_aggregate_result));
 
        u_int32_t drop_order = necp_process_drop_order(proc_ucred(proc));
@@ -6331,17 +6364,40 @@ necp_application_find_policy_match_internal(proc_t proc,
                return 0;
        }
 
+#if defined(XNU_TARGET_OS_OSX)
+       if (proc_pid(effective_proc) != pid) {
+               proc_t found_proc = proc_find(pid);
+               if (found_proc != PROC_NULL) {
+                       effective_proc = found_proc;
+                       release_eproc = true;
+               }
+       }
+       if (effective_proc->p_responsible_pid > 0 && effective_proc->p_responsible_pid != pid) {
+               proc_t responsible_proc = proc_find(effective_proc->p_responsible_pid);
+               if (responsible_proc != PROC_NULL) {
+                       proc_getexecutableuuid(responsible_proc, responsible_application_uuid, sizeof(responsible_application_uuid));
+                       proc_rele(responsible_proc);
+               }
+       }
+       if (release_eproc && effective_proc != PROC_NULL) {
+               proc_rele(effective_proc);
+       }
+#endif /* defined(XNU_TARGET_OS_OSX) */
+
        // Lock
        lck_rw_lock_shared(&necp_kernel_policy_lock);
 
        u_int32_t route_rule_id_array[MAX_AGGREGATE_ROUTE_RULES];
        size_t route_rule_id_array_count = 0;
-       necp_application_fillout_info_locked(application_uuid, real_application_uuid, account, domain, pid, uid, protocol, bound_interface_index, traffic_class, &local_addr, &remote_addr, local_port, remote_port, has_client, proc, drop_order, client_flags, &info);
+       necp_application_fillout_info_locked(application_uuid, real_application_uuid, responsible_application_uuid, account, domain, pid, uid, protocol, bound_interface_index, traffic_class, &local_addr, &remote_addr, local_port, remote_port, has_client, proc, drop_order, client_flags, &info);
        matched_policy = necp_socket_find_policy_match_with_info_locked(necp_kernel_socket_policies_app_layer_map, &info, &filter_control_unit, route_rule_id_array, &route_rule_id_array_count, MAX_AGGREGATE_ROUTE_RULES, &service_action, &service, netagent_ids, netagent_use_flags, NECP_MAX_NETAGENTS, required_agent_types, num_required_agent_types, proc, NULL, NULL, &drop_dest_policy_result, &drop_all_bypass);
        if (matched_policy) {
                returned_result->policy_id = matched_policy->id;
                returned_result->routing_result = matched_policy->result;
                memcpy(&returned_result->routing_result_parameter, &matched_policy->result_parameter, sizeof(returned_result->routing_result_parameter));
+               if (returned_override_euuid != NULL && info.used_responsible_pid && !(matched_policy->condition_mask & NECP_KERNEL_CONDITION_REAL_APP_ID)) {
+                       uuid_copy(*returned_override_euuid, responsible_application_uuid);
+               }
        } else {
                bool drop_all = false;
                if (necp_drop_all_order > 0 || info.drop_order > 0 || drop_dest_policy_result == NECP_KERNEL_POLICY_RESULT_DROP) {
@@ -7184,12 +7240,27 @@ necp_socket_fillout_info_locked(struct inpcb *inp, struct sockaddr *override_loc
        }
 
        if (inp->inp_flags2 & INP2_WANT_APP_POLICY && necp_kernel_socket_policies_condition_mask & NECP_KERNEL_CONDITION_APP_ID) {
+               u_int32_t responsible_application_id = 0;
+
                struct necp_uuid_id_mapping *existing_mapping = necp_uuid_lookup_app_id_locked(((so->so_flags & SOF_DELEGATED) ? so->e_uuid : so->last_uuid));
                if (existing_mapping) {
                        info->application_id = existing_mapping->id;
                }
 
-               if (!(so->so_flags & SOF_DELEGATED)) {
+#if defined(XNU_TARGET_OS_OSX)
+               if (so->so_rpid > 0) {
+                       existing_mapping = necp_uuid_lookup_app_id_locked(so->so_ruuid);
+                       if (existing_mapping != NULL) {
+                               responsible_application_id = existing_mapping->id;
+                       }
+               }
+#endif
+
+               if (responsible_application_id > 0) {
+                       info->real_application_id = info->application_id;
+                       info->application_id = responsible_application_id;
+                       info->used_responsible_pid = true;
+               } else if (!(so->so_flags & SOF_DELEGATED)) {
                        info->real_application_id = info->application_id;
                } else if (necp_kernel_socket_policies_condition_mask & NECP_KERNEL_CONDITION_REAL_APP_ID) {
                        struct necp_uuid_id_mapping *real_existing_mapping = necp_uuid_lookup_app_id_locked(so->last_uuid);
@@ -7438,7 +7509,7 @@ necp_socket_find_policy_match_with_info_locked(struct necp_kernel_socket_policy
                                if (policy_search_array[i]->result == NECP_KERNEL_POLICY_RESULT_SKIP) {
                                        skip_order = policy_search_array[i]->result_parameter.skip_policy_order;
                                        skip_session_order = policy_search_array[i]->session_order + 1;
-                                       if (skip_policy_id) {
+                                       if (skip_policy_id && *skip_policy_id == NECP_KERNEL_POLICY_ID_NONE) {
                                                *skip_policy_id = policy_search_array[i]->id;
                                        }
                                        continue;
@@ -7588,6 +7659,11 @@ necp_socket_find_policy_match(struct inpcb *inp, struct sockaddr *override_local
 
        // Check for loopback exception
        if (necp_socket_bypass(override_local_addr, override_remote_addr, inp)) {
+               if (inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_SOCKET_SCOPED) {
+                       // If the previous policy result was "socket scoped", un-scope the socket.
+                       inp->inp_flags &= ~INP_BOUND_IF;
+                       inp->inp_boundifp = NULL;
+               }
                // Mark socket as a pass
                inp->inp_policyresult.policy_id = NECP_KERNEL_POLICY_ID_NO_MATCH;
                inp->inp_policyresult.skip_policy_id = NECP_KERNEL_POLICY_ID_NO_MATCH;
@@ -7604,7 +7680,6 @@ necp_socket_find_policy_match(struct inpcb *inp, struct sockaddr *override_local
        lck_rw_lock_shared(&necp_kernel_policy_lock);
 
        necp_socket_fillout_info_locked(inp, override_local_addr, override_remote_addr, override_bound_interface, drop_order, &info);
-       inp->inp_policyresult.app_id = info.application_id;
 
        // Check info
        u_int32_t flowhash = necp_socket_calc_flowhash_locked(&info);
@@ -7619,8 +7694,10 @@ necp_socket_find_policy_match(struct inpcb *inp, struct sockaddr *override_local
                return inp->inp_policyresult.policy_id;
        }
 
+       inp->inp_policyresult.app_id = info.application_id;
+
        // Match socket to policy
-       necp_kernel_policy_id skip_policy_id;
+       necp_kernel_policy_id skip_policy_id = NECP_KERNEL_POLICY_ID_NONE;
        u_int32_t route_rule_id_array[MAX_AGGREGATE_ROUTE_RULES];
        size_t route_rule_id_array_count = 0;
        matched_policy = necp_socket_find_policy_match_with_info_locked(necp_kernel_socket_policies_map[NECP_SOCKET_MAP_APP_ID_TO_BUCKET(info.application_id)], &info, &filter_control_unit, route_rule_id_array, &route_rule_id_array_count, MAX_AGGREGATE_ROUTE_RULES, &service_action, &service, netagent_ids, NULL, NECP_MAX_NETAGENTS, NULL, 0, current_proc(), &skip_policy_id, inp->inp_route.ro_rt, &drop_dest_policy_result, &drop_all_bypass);
@@ -7721,6 +7798,10 @@ necp_socket_find_policy_match(struct inpcb *inp, struct sockaddr *override_local
                inp->inp_policyresult.results.result = matched_policy->result;
                memcpy(&inp->inp_policyresult.results.result_parameter, &matched_policy->result_parameter, sizeof(matched_policy->result_parameter));
 
+               if (info.used_responsible_pid && (matched_policy->condition_mask & NECP_KERNEL_CONDITION_REAL_APP_ID)) {
+                       inp->inp_policyresult.app_id = info.real_application_id;
+               }
+
                if (necp_socket_is_connected(inp) &&
                    (matched_policy->result == NECP_KERNEL_POLICY_RESULT_DROP ||
                    (matched_policy->result == NECP_KERNEL_POLICY_RESULT_IP_TUNNEL && !necp_socket_uses_interface(inp, matched_policy->result_parameter.tunnel_interface_index)))) {
@@ -9210,7 +9291,7 @@ necp_mark_packet_from_socket(struct mbuf *packet, struct inpcb *inp, necp_kernel
        } else {
                packet->m_pkthdr.necp_mtag.necp_route_rule_id = inp->inp_policyresult.results.route_rule_id;
        }
-       packet->m_pkthdr.necp_mtag.necp_app_id = inp->inp_policyresult.app_id;
+       packet->m_pkthdr.necp_mtag.necp_app_id = (inp->inp_policyresult.app_id > UINT16_MAX ? (inp->inp_policyresult.app_id - UINT16_MAX) : inp->inp_policyresult.app_id);
 
        if (skip_policy_id != NECP_KERNEL_POLICY_ID_NONE &&
            skip_policy_id != NECP_KERNEL_POLICY_ID_NO_MATCH) {
@@ -9344,7 +9425,8 @@ necp_get_app_uuid_from_packet(struct mbuf *packet,
        bool found_mapping = FALSE;
        if (packet->m_pkthdr.necp_mtag.necp_app_id != 0) {
                lck_rw_lock_shared(&necp_kernel_policy_lock);
-               struct necp_uuid_id_mapping *entry = necp_uuid_lookup_uuid_with_app_id_locked(packet->m_pkthdr.necp_mtag.necp_app_id);
+               necp_app_id app_id = (packet->m_pkthdr.necp_mtag.necp_app_id < UINT16_MAX ? (packet->m_pkthdr.necp_mtag.necp_app_id + UINT16_MAX) : packet->m_pkthdr.necp_mtag.necp_app_id);
+               struct necp_uuid_id_mapping *entry = necp_uuid_lookup_uuid_with_app_id_locked(app_id);
                if (entry != NULL) {
                        uuid_copy(app_uuid, entry->uuid);
                        found_mapping = true;
@@ -9746,6 +9828,11 @@ sysctl_handle_necp_drop_dest_level SYSCTL_HANDLER_ARGS
                case NECP_SESSION_PRIORITY_CONTROL:
                case NECP_SESSION_PRIORITY_PRIVILEGED_TUNNEL:
                case NECP_SESSION_PRIORITY_HIGH:
+               case NECP_SESSION_PRIORITY_HIGH_1:
+               case NECP_SESSION_PRIORITY_HIGH_2:
+               case NECP_SESSION_PRIORITY_HIGH_3:
+               case NECP_SESSION_PRIORITY_HIGH_4:
+               case NECP_SESSION_PRIORITY_HIGH_RESTRICTED:
                case NECP_SESSION_PRIORITY_DEFAULT:
                case NECP_SESSION_PRIORITY_LOW:
                        if (tmp_drop_dest_policy.entry_count == 0) {
index 5bf7a9de16d7d41e23dcaff800e1b13d839b2080..f658ad1ed3a40e7a4d4e62238b8ea99f53c45c78 100644 (file)
@@ -169,9 +169,14 @@ struct necp_packet_header {
 #define NECP_POLICY_RESULT_USE_NETAGENT                 14      // netagent uuid_t
 #define NECP_POLICY_RESULT_NETAGENT_SCOPED              15      // netagent uuid_t
 #define NECP_POLICY_RESULT_SCOPED_DIRECT                16      // N/A, scopes to primary physical interface
-#define NECP_POLICY_RESULT_ALLOW_UNENTITLED                             17              // N/A
+#define NECP_POLICY_RESULT_ALLOW_UNENTITLED             17      // N/A
 
-#define NECP_POLICY_RESULT_MAX                                  NECP_POLICY_RESULT_ALLOW_UNENTITLED
+#define NECP_POLICY_RESULT_MAX                          NECP_POLICY_RESULT_ALLOW_UNENTITLED
+
+/*
+ * PASS Result Flags
+ */
+#define NECP_POLICY_PASS_NO_SKIP_IPSEC                  0x01
 
 /*
  * Route Rules
@@ -238,11 +243,15 @@ struct necp_policy_condition_agent_type {
 
 #define NECP_SESSION_PRIORITY_UNKNOWN                   0
 #define NECP_SESSION_PRIORITY_CONTROL                   1
-#define NECP_SESSION_PRIORITY_PRIVILEGED_TUNNEL 2
-#define NECP_SESSION_PRIORITY_HIGH                              3
-#define NECP_SESSION_PRIORITY_DEFAULT                   4
-#define NECP_SESSION_PRIORITY_LOW                               5
-
+#define NECP_SESSION_PRIORITY_PRIVILEGED_TUNNEL         2
+#define NECP_SESSION_PRIORITY_HIGH                      3
+#define NECP_SESSION_PRIORITY_HIGH_1                    4
+#define NECP_SESSION_PRIORITY_HIGH_2                    5
+#define NECP_SESSION_PRIORITY_HIGH_3                    6
+#define NECP_SESSION_PRIORITY_HIGH_4                    7
+#define NECP_SESSION_PRIORITY_HIGH_RESTRICTED           8
+#define NECP_SESSION_PRIORITY_DEFAULT                   9
+#define NECP_SESSION_PRIORITY_LOW                       10
 #define NECP_SESSION_NUM_PRIORITIES                             NECP_SESSION_PRIORITY_LOW
 
 typedef u_int32_t necp_policy_id;
@@ -685,6 +694,8 @@ struct necp_client_result_interface {
        u_int32_t index;
 };
 
+#define NECP_USES_INTERFACE_OPTIONS_FOR_BROWSE 1
+
 struct necp_client_interface_option {
        u_int32_t interface_index;
        u_int32_t interface_generation;
@@ -846,7 +857,8 @@ extern int necp_application_find_policy_match_internal(proc_t proc, u_int8_t *pa
     struct necp_client_endpoint *returned_v4_gateway,
     struct necp_client_endpoint *returned_v6_gateway,
     struct rtentry **returned_route, bool ignore_address,
-    bool has_client);
+    bool has_client,
+    uuid_t *returned_override_euuid);
 /*
  * TLV utilities
  *
@@ -922,7 +934,9 @@ typedef u_int32_t necp_app_id;
 #define NECP_KERNEL_POLICY_RESULT_USE_NETAGENT                  NECP_POLICY_RESULT_USE_NETAGENT
 #define NECP_KERNEL_POLICY_RESULT_NETAGENT_SCOPED               NECP_POLICY_RESULT_NETAGENT_SCOPED
 #define NECP_KERNEL_POLICY_RESULT_SCOPED_DIRECT                 NECP_POLICY_RESULT_SCOPED_DIRECT
-#define NECP_KERNEL_POLICY_RESULT_ALLOW_UNENTITLED                                  NECP_POLICY_RESULT_ALLOW_UNENTITLED
+#define NECP_KERNEL_POLICY_RESULT_ALLOW_UNENTITLED              NECP_POLICY_RESULT_ALLOW_UNENTITLED
+
+#define NECP_KERNEL_POLICY_PASS_NO_SKIP_IPSEC                   NECP_POLICY_PASS_NO_SKIP_IPSEC
 
 typedef struct {
        u_int32_t identifier;
@@ -937,6 +951,7 @@ typedef union {
        u_int32_t                                       skip_policy_order;
        u_int32_t                                       route_rule_id;
        u_int32_t                                       netagent_id;
+       u_int32_t                                       pass_flags;
        necp_kernel_policy_service      service;
 } necp_kernel_policy_result_parameter;
 
index 0e3dd1f47a405cf155d0a246b9cfcf4dcb24197a..0b3ef6782936a213c787e89565fea1a25a2aeaa0 100644 (file)
@@ -382,6 +382,8 @@ struct necp_client {
 
        void *agent_handle;
 
+       uuid_t override_euuid;
+
 
        size_t parameters_length;
        u_int8_t parameters[0];
@@ -1482,7 +1484,7 @@ necp_client_flow_is_viable(proc_t proc, struct necp_client *client,
            &result, &flow->necp_flow_flags, NULL,
            flow->interface_index,
            &flow->local_addr, &flow->remote_addr, NULL, NULL,
-           NULL, ignore_address, true);
+           NULL, ignore_address, true, NULL);
 
        // Check for blocking agents
        for (int i = 0; i < NECP_MAX_NETAGENTS; i++) {
@@ -1634,6 +1636,60 @@ necp_client_mark_all_nonsocket_flows_as_invalid(struct necp_client *client)
        client->interface_option_count = 0;
 }
 
+static inline bool
+necp_netagent_is_required(const struct necp_client_parsed_parameters *parameters,
+    uuid_t *netagent_uuid)
+{
+       // Specific use agents only apply when required
+       bool required = false;
+       if (parameters != NULL) {
+               // Check required agent UUIDs
+               for (int i = 0; i < NECP_MAX_AGENT_PARAMETERS; i++) {
+                       if (uuid_is_null(parameters->required_netagents[i])) {
+                               break;
+                       }
+                       if (uuid_compare(parameters->required_netagents[i], *netagent_uuid) == 0) {
+                               required = true;
+                               break;
+                       }
+               }
+
+               if (!required) {
+                       // Check required agent types
+                       bool fetched_type = false;
+                       char netagent_domain[NETAGENT_DOMAINSIZE];
+                       char netagent_type[NETAGENT_TYPESIZE];
+                       memset(&netagent_domain, 0, NETAGENT_DOMAINSIZE);
+                       memset(&netagent_type, 0, NETAGENT_TYPESIZE);
+
+                       for (int i = 0; i < NECP_MAX_AGENT_PARAMETERS; i++) {
+                               if (strlen(parameters->required_netagent_types[i].netagent_domain) == 0 ||
+                                   strlen(parameters->required_netagent_types[i].netagent_type) == 0) {
+                                       break;
+                               }
+
+                               if (!fetched_type) {
+                                       if (netagent_get_agent_domain_and_type(*netagent_uuid, netagent_domain, netagent_type)) {
+                                               fetched_type = TRUE;
+                                       } else {
+                                               break;
+                                       }
+                               }
+
+                               if ((strlen(parameters->required_netagent_types[i].netagent_domain) == 0 ||
+                                   strncmp(netagent_domain, parameters->required_netagent_types[i].netagent_domain, NETAGENT_DOMAINSIZE) == 0) &&
+                                   (strlen(parameters->required_netagent_types[i].netagent_type) == 0 ||
+                                   strncmp(netagent_type, parameters->required_netagent_types[i].netagent_type, NETAGENT_TYPESIZE) == 0)) {
+                                       required = true;
+                                       break;
+                               }
+                       }
+               }
+       }
+
+       return required;
+}
+
 static bool
 necp_netagent_applies_to_client(struct necp_client *client,
     const struct necp_client_parsed_parameters *parameters,
@@ -1701,53 +1757,7 @@ necp_netagent_applies_to_client(struct necp_client *client,
 
        if (flags & NETAGENT_FLAG_SPECIFIC_USE_ONLY) {
                // Specific use agents only apply when required
-               bool required = FALSE;
-               if (parameters != NULL) {
-                       // Check required agent UUIDs
-                       for (int i = 0; i < NECP_MAX_AGENT_PARAMETERS; i++) {
-                               if (uuid_is_null(parameters->required_netagents[i])) {
-                                       break;
-                               }
-                               if (uuid_compare(parameters->required_netagents[i], *netagent_uuid) == 0) {
-                                       required = TRUE;
-                                       break;
-                               }
-                       }
-
-                       if (!required) {
-                               // Check required agent types
-                               bool fetched_type = FALSE;
-                               char netagent_domain[NETAGENT_DOMAINSIZE];
-                               char netagent_type[NETAGENT_TYPESIZE];
-                               memset(&netagent_domain, 0, NETAGENT_DOMAINSIZE);
-                               memset(&netagent_type, 0, NETAGENT_TYPESIZE);
-
-                               for (int i = 0; i < NECP_MAX_AGENT_PARAMETERS; i++) {
-                                       if (strlen(parameters->required_netagent_types[i].netagent_domain) == 0 ||
-                                           strlen(parameters->required_netagent_types[i].netagent_type) == 0) {
-                                               break;
-                                       }
-
-                                       if (!fetched_type) {
-                                               if (netagent_get_agent_domain_and_type(*netagent_uuid, netagent_domain, netagent_type)) {
-                                                       fetched_type = TRUE;
-                                               } else {
-                                                       break;
-                                               }
-                                       }
-
-                                       if ((strlen(parameters->required_netagent_types[i].netagent_domain) == 0 ||
-                                           strncmp(netagent_domain, parameters->required_netagent_types[i].netagent_domain, NETAGENT_DOMAINSIZE) == 0) &&
-                                           (strlen(parameters->required_netagent_types[i].netagent_type) == 0 ||
-                                           strncmp(netagent_type, parameters->required_netagent_types[i].netagent_type, NETAGENT_TYPESIZE) == 0)) {
-                                               required = TRUE;
-                                               break;
-                                       }
-                               }
-                       }
-               }
-
-               applies = required;
+               applies = necp_netagent_is_required(parameters, netagent_uuid);
        } else {
                applies = TRUE;
        }
@@ -1773,6 +1783,32 @@ necp_client_add_agent_interface_options(struct necp_client *client,
        }
 }
 
+static void
+necp_client_add_browse_interface_options(struct necp_client *client,
+    const struct necp_client_parsed_parameters *parsed_parameters,
+    ifnet_t ifp)
+{
+       if (ifp != NULL && ifp->if_agentids != NULL) {
+               for (u_int32_t i = 0; i < ifp->if_agentcount; i++) {
+                       if (uuid_is_null(ifp->if_agentids[i])) {
+                               continue;
+                       }
+
+                       u_int32_t flags = netagent_get_flags(ifp->if_agentids[i]);
+                       if ((flags & NETAGENT_FLAG_REGISTERED) &&
+                           (flags & NETAGENT_FLAG_ACTIVE) &&
+                           (flags & NETAGENT_FLAG_SUPPORTS_BROWSE) &&
+                           (!(flags & NETAGENT_FLAG_SPECIFIC_USE_ONLY) ||
+                           necp_netagent_is_required(parsed_parameters, &ifp->if_agentids[i]))) {
+                               necp_client_add_interface_option_if_needed(client, ifp->if_index, ifnet_get_generation(ifp), &ifp->if_agentids[i]);
+
+                               // Finding one is enough
+                               break;
+                       }
+               }
+       }
+}
+
 static inline bool
 necp_client_address_is_valid(struct sockaddr *address)
 {
@@ -2418,7 +2454,7 @@ necp_client_lookup_bb_radio_manager(struct necp_client *client,
        }
 
        error = necp_application_find_policy_match_internal(proc, client->parameters, (u_int32_t)client->parameters_length,
-           &result, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, true, true);
+           &result, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, true, true, NULL);
 
        proc_rele(proc);
        proc = PROC_NULL;
@@ -3069,7 +3105,8 @@ necp_calculate_client_result(proc_t proc,
     u_int32_t *flags,
     u_int32_t *reason,
     struct necp_client_endpoint *v4_gateway,
-    struct necp_client_endpoint *v6_gateway)
+    struct necp_client_endpoint *v6_gateway,
+    uuid_t *override_euuid)
 {
        struct rtentry *route = NULL;
 
@@ -3087,7 +3124,8 @@ necp_calculate_client_result(proc_t proc,
                    result, flags, reason, matching_if_index,
                    NULL, NULL,
                    v4_gateway, v6_gateway,
-                   &route, false, true);
+                   &route, false, true,
+                   override_euuid);
                if (error != 0) {
                        if (route != NULL) {
                                rtfree(route);
@@ -3220,14 +3258,16 @@ necp_update_client_result(proc_t proc,
        // Calculate the policy result
        struct necp_client_endpoint v4_gateway = {};
        struct necp_client_endpoint v6_gateway = {};
-       if (!necp_calculate_client_result(proc, client, parsed_parameters, &result, &flags, &reason, &v4_gateway, &v6_gateway)) {
+       uuid_t override_euuid;
+       uuid_clear(override_euuid);
+       if (!necp_calculate_client_result(proc, client, parsed_parameters, &result, &flags, &reason, &v4_gateway, &v6_gateway, &override_euuid)) {
                FREE(parsed_parameters, M_NECP);
                return FALSE;
        }
 
        if (necp_update_parsed_parameters(parsed_parameters, &result)) {
                // Changed the parameters based on result, try again (only once)
-               if (!necp_calculate_client_result(proc, client, parsed_parameters, &result, &flags, &reason, &v4_gateway, &v6_gateway)) {
+               if (!necp_calculate_client_result(proc, client, parsed_parameters, &result, &flags, &reason, &v4_gateway, &v6_gateway, &override_euuid)) {
                        FREE(parsed_parameters, M_NECP);
                        return FALSE;
                }
@@ -3242,8 +3282,10 @@ necp_update_client_result(proc_t proc,
 
        // Save the last policy id on the client
        client->policy_id = result.policy_id;
+       uuid_copy(client->override_euuid, override_euuid);
 
        if ((parsed_parameters->flags & NECP_CLIENT_PARAMETER_FLAG_MULTIPATH) ||
+           (parsed_parameters->flags & NECP_CLIENT_PARAMETER_FLAG_BROWSE) ||
            ((parsed_parameters->flags & NECP_CLIENT_PARAMETER_FLAG_LISTENER) &&
            result.routing_result != NECP_KERNEL_POLICY_RESULT_SOCKET_SCOPED)) {
                client->allow_multiple_flows = TRUE;
@@ -3485,6 +3527,21 @@ necp_update_client_result(proc_t proc,
                                }
                        }
                }
+       } else if (parsed_parameters->flags & NECP_CLIENT_PARAMETER_FLAG_BROWSE) {
+               if (result.routing_result == NECP_KERNEL_POLICY_RESULT_SOCKET_SCOPED) {
+                       if (direct_interface != NULL) {
+                               // Add browse option if it has an agent
+                               necp_client_add_browse_interface_options(client, parsed_parameters, direct_interface);
+                       }
+               } else {
+                       // Get browse interface options from global list
+                       struct ifnet *browse_interface = NULL;
+                       TAILQ_FOREACH(browse_interface, &ifnet_head, if_link) {
+                               if (necp_ifnet_matches_parameters(browse_interface, parsed_parameters, 0, NULL, true, false)) {
+                                       necp_client_add_browse_interface_options(client, parsed_parameters, browse_interface);
+                               }
+                       }
+               }
        }
 
        // Add agents
@@ -5277,7 +5334,11 @@ necp_client_copy_parameters_locked(struct necp_client *client,
        }
        parameters->ethertype = parsed_parameters.ethertype;
        parameters->traffic_class = parsed_parameters.traffic_class;
-       uuid_copy(parameters->euuid, parsed_parameters.effective_uuid);
+       if (uuid_is_null(client->override_euuid)) {
+               uuid_copy(parameters->euuid, parsed_parameters.effective_uuid);
+       } else {
+               uuid_copy(parameters->euuid, client->override_euuid);
+       }
        parameters->is_listener = (parsed_parameters.flags & NECP_CLIENT_PARAMETER_FLAG_LISTENER) ? 1 : 0;
        parameters->is_interpose = (parsed_parameters.flags & NECP_CLIENT_PARAMETER_FLAG_INTERPOSE) ? 1 : 0;
        parameters->is_custom_ether = (parsed_parameters.flags & NECP_CLIENT_PARAMETER_FLAG_CUSTOM_ETHER) ? 1 : 0;
@@ -6188,7 +6249,7 @@ necp_match_policy(struct proc *p, struct necp_match_policy_args *uap, int32_t *r
        }
 
        error = necp_application_find_policy_match_internal(p, parameters, uap->parameters_size,
-           &returned_result, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, false, false);
+           &returned_result, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, false, false, NULL);
        if (error) {
                goto done;
        }
index a7d27aed899dd106b032e356c60ca1d761f50fd3..0072bfaca98d98f484d66816d09954948c14c070 100644 (file)
@@ -732,6 +732,11 @@ netagent_handle_register_inner(struct netagent_session *session, struct netagent
 {
        lck_rw_lock_exclusive(&netagent_lock);
 
+       if (session->wrapper != NULL) {
+               lck_rw_done(&netagent_lock);
+               return EINVAL;
+       }
+
        new_wrapper->control_unit = session->control_unit;
        new_wrapper->event_handler = session->event_handler;
        new_wrapper->event_context = session->event_context;
@@ -757,6 +762,7 @@ netagent_register(netagent_session_t _session, struct netagent *agent)
 {
        int data_size = 0;
        struct netagent_wrapper *new_wrapper = NULL;
+       uuid_t registered_uuid;
 
        struct netagent_session *session = (struct netagent_session *)_session;
        if (session == NULL) {
@@ -790,6 +796,8 @@ netagent_register(netagent_session_t _session, struct netagent *agent)
        memset(new_wrapper, 0, sizeof(*new_wrapper) + data_size);
        __nochk_memcpy(&new_wrapper->netagent, agent, sizeof(struct netagent) + data_size);
 
+       uuid_copy(registered_uuid, new_wrapper->netagent.netagent_uuid);
+
        int error = netagent_handle_register_inner(session, new_wrapper);
        if (error != 0) {
                FREE(new_wrapper, M_NETAGENT);
@@ -797,7 +805,7 @@ netagent_register(netagent_session_t _session, struct netagent *agent)
        }
 
        NETAGENTLOG0(LOG_DEBUG, "Registered new agent");
-       netagent_post_event(new_wrapper->netagent.netagent_uuid, KEV_NETAGENT_REGISTERED, TRUE, false);
+       netagent_post_event(registered_uuid, KEV_NETAGENT_REGISTERED, TRUE, false);
 
        return 0;
 }
@@ -810,6 +818,7 @@ netagent_handle_register_setopt(struct netagent_session *session, u_int8_t *payl
        struct netagent_wrapper *new_wrapper = NULL;
        u_int32_t response_error = 0;
        struct netagent *register_netagent = (struct netagent *)(void *)payload;
+       uuid_t registered_uuid;
 
        if (session == NULL) {
                NETAGENTLOG0(LOG_ERR, "Failed to find session");
@@ -859,6 +868,8 @@ netagent_handle_register_setopt(struct netagent_session *session, u_int8_t *payl
        memset(new_wrapper, 0, sizeof(*new_wrapper) + data_size);
        __nochk_memcpy(&new_wrapper->netagent, register_netagent, sizeof(struct netagent) + data_size);
 
+       uuid_copy(registered_uuid, new_wrapper->netagent.netagent_uuid);
+
        response_error = netagent_handle_register_inner(session, new_wrapper);
        if (response_error != 0) {
                FREE(new_wrapper, M_NETAGENT);
@@ -866,7 +877,7 @@ netagent_handle_register_setopt(struct netagent_session *session, u_int8_t *payl
        }
 
        NETAGENTLOG0(LOG_DEBUG, "Registered new agent");
-       netagent_post_event(new_wrapper->netagent.netagent_uuid, KEV_NETAGENT_REGISTERED, TRUE, false);
+       netagent_post_event(registered_uuid, KEV_NETAGENT_REGISTERED, TRUE, false);
 
 done:
        return response_error;
@@ -880,8 +891,7 @@ netagent_handle_register_message(struct netagent_session *session, u_int32_t mes
        int data_size = 0;
        struct netagent_wrapper *new_wrapper = NULL;
        u_int32_t response_error = NETAGENT_MESSAGE_ERROR_INTERNAL;
-       uuid_t netagent_uuid;
-       uuid_clear(netagent_uuid);
+       uuid_t registered_uuid;
 
        if (session == NULL) {
                NETAGENTLOG0(LOG_ERR, "Failed to find session");
@@ -928,11 +938,19 @@ netagent_handle_register_message(struct netagent_session *session, u_int32_t mes
                goto fail;
        }
 
-       (void)netagent_handle_register_inner(session, new_wrapper);
+       uuid_copy(registered_uuid, new_wrapper->netagent.netagent_uuid);
+
+       error = netagent_handle_register_inner(session, new_wrapper);
+       if (error) {
+               NETAGENTLOG(LOG_ERR, "Failed to register agent: %d", error);
+               FREE(new_wrapper, M_NETAGENT);
+               response_error = NETAGENT_MESSAGE_ERROR_INTERNAL;
+               goto fail;
+       }
 
        NETAGENTLOG0(LOG_DEBUG, "Registered new agent");
        netagent_send_success_response(session, NETAGENT_MESSAGE_TYPE_REGISTER, message_id);
-       netagent_post_event(new_wrapper->netagent.netagent_uuid, KEV_NETAGENT_REGISTERED, TRUE, false);
+       netagent_post_event(registered_uuid, KEV_NETAGENT_REGISTERED, TRUE, false);
        return;
 fail:
        netagent_send_error_response(session, NETAGENT_MESSAGE_TYPE_REGISTER, message_id, response_error);
@@ -1102,6 +1120,8 @@ netagent_update(netagent_session_t _session, struct netagent *agent)
        u_int8_t agent_changed;
        int data_size = 0;
        struct netagent_wrapper *new_wrapper = NULL;
+       bool should_update_immediately;
+       uuid_t updated_uuid;
 
        struct netagent_session *session = (struct netagent_session *)_session;
        if (session == NULL) {
@@ -1134,10 +1154,12 @@ netagent_update(netagent_session_t _session, struct netagent *agent)
        memset(new_wrapper, 0, sizeof(*new_wrapper) + data_size);
        __nochk_memcpy(&new_wrapper->netagent, agent, sizeof(struct netagent) + data_size);
 
+       uuid_copy(updated_uuid, new_wrapper->netagent.netagent_uuid);
+       should_update_immediately = (NETAGENT_FLAG_UPDATE_IMMEDIATELY == (new_wrapper->netagent.netagent_flags & NETAGENT_FLAG_UPDATE_IMMEDIATELY));
+
        int error = netagent_handle_update_inner(session, new_wrapper, data_size, &agent_changed, kNetagentErrorDomainPOSIX);
        if (error == 0) {
-               bool should_update_immediately = (NETAGENT_FLAG_UPDATE_IMMEDIATELY == (session->wrapper->netagent.netagent_flags & NETAGENT_FLAG_UPDATE_IMMEDIATELY));
-               netagent_post_event(session->wrapper->netagent.netagent_uuid, KEV_NETAGENT_UPDATED, agent_changed, should_update_immediately);
+               netagent_post_event(updated_uuid, KEV_NETAGENT_UPDATED, agent_changed, should_update_immediately);
                if (agent_changed == FALSE) {
                        // The session wrapper does not need the "new_wrapper" as nothing changed
                        FREE(new_wrapper, M_NETAGENT);
@@ -1158,6 +1180,8 @@ netagent_handle_update_setopt(struct netagent_session *session, u_int8_t *payloa
        errno_t response_error = 0;
        struct netagent *update_netagent = (struct netagent *)(void *)payload;
        u_int8_t agent_changed;
+       bool should_update_immediately;
+       uuid_t updated_uuid;
 
        if (session == NULL) {
                NETAGENTLOG0(LOG_ERR, "Failed to find session");
@@ -1207,10 +1231,12 @@ netagent_handle_update_setopt(struct netagent_session *session, u_int8_t *payloa
        memset(new_wrapper, 0, sizeof(*new_wrapper) + data_size);
        __nochk_memcpy(&new_wrapper->netagent, update_netagent, sizeof(struct netagent) + data_size);
 
+       uuid_copy(updated_uuid, new_wrapper->netagent.netagent_uuid);
+       should_update_immediately = (NETAGENT_FLAG_UPDATE_IMMEDIATELY == (new_wrapper->netagent.netagent_flags & NETAGENT_FLAG_UPDATE_IMMEDIATELY));
+
        response_error = netagent_handle_update_inner(session, new_wrapper, data_size, &agent_changed, kNetagentErrorDomainPOSIX);
        if (response_error == 0) {
-               bool should_update_immediately = (NETAGENT_FLAG_UPDATE_IMMEDIATELY == (session->wrapper->netagent.netagent_flags & NETAGENT_FLAG_UPDATE_IMMEDIATELY));
-               netagent_post_event(session->wrapper->netagent.netagent_uuid, KEV_NETAGENT_UPDATED, agent_changed, should_update_immediately);
+               netagent_post_event(updated_uuid, KEV_NETAGENT_UPDATED, agent_changed, should_update_immediately);
                if (agent_changed == FALSE) {
                        // The session wrapper does not need the "new_wrapper" as nothing changed
                        FREE(new_wrapper, M_NETAGENT);
@@ -1232,6 +1258,8 @@ netagent_handle_update_message(struct netagent_session *session, u_int32_t messa
        struct netagent_wrapper *new_wrapper = NULL;
        u_int32_t response_error = NETAGENT_MESSAGE_ERROR_INTERNAL;
        u_int8_t agent_changed;
+       uuid_t updated_uuid;
+       bool should_update_immediately;
 
        if (session == NULL) {
                NETAGENTLOG0(LOG_ERR, "Failed to find session");
@@ -1277,6 +1305,9 @@ netagent_handle_update_message(struct netagent_session *session, u_int32_t messa
                goto fail;
        }
 
+       uuid_copy(updated_uuid, new_wrapper->netagent.netagent_uuid);
+       should_update_immediately = (NETAGENT_FLAG_UPDATE_IMMEDIATELY == (new_wrapper->netagent.netagent_flags & NETAGENT_FLAG_UPDATE_IMMEDIATELY));
+
        response_error = netagent_handle_update_inner(session, new_wrapper, data_size, &agent_changed, kNetagentErrorDomainUserDefined);
        if (response_error != 0) {
                FREE(new_wrapper, M_NETAGENT);
@@ -1284,8 +1315,7 @@ netagent_handle_update_message(struct netagent_session *session, u_int32_t messa
        }
 
        netagent_send_success_response(session, NETAGENT_MESSAGE_TYPE_UPDATE, message_id);
-       bool should_update_immediately = (NETAGENT_FLAG_UPDATE_IMMEDIATELY == (session->wrapper->netagent.netagent_flags & NETAGENT_FLAG_UPDATE_IMMEDIATELY));
-       netagent_post_event(session->wrapper->netagent.netagent_uuid, KEV_NETAGENT_UPDATED, agent_changed, should_update_immediately);
+       netagent_post_event(updated_uuid, KEV_NETAGENT_UPDATED, agent_changed, should_update_immediately);
 
        if (agent_changed == FALSE) {
                // The session wrapper does not need the "new_wrapper" as nothing changed
index 29c1ee8188145cdfcc40b07172e2fad35e0b6eb3..5ab4ab0955e7eb6f443fbdea81e5b988b137f179 100644 (file)
@@ -101,6 +101,6 @@ typedef int32_t                 dhcp_lease_t;     /* relative time */
 #define DHCP_INFINITE_LEASE     ((dhcp_lease_t)-1)
 #define DHCP_INFINITE_TIME      ((dhcp_time_secs_t)-1)
 
-#define DHCP_FLAGS_BROADCAST    ((u_short)0x0001)
+#define DHCP_FLAGS_BROADCAST    ((u_int16_t)0x8000)
 
 #endif /* _NETINET_DHCP_H */
index 5a5ab0961968eff80d17434cf173062412df746c..1a405129f0bcc0cb701feb11c81583857e84813f 100644 (file)
@@ -51,6 +51,7 @@
 #include <net/route.h>
 #include <net/flowhash.h>
 #include <net/ntstat.h>
+#include <net/content_filter.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/tcp.h>
@@ -832,7 +833,7 @@ flow_divert_trie_insert(struct flow_divert_trie *trie, uint16_t string_start, si
 
 #define APPLE_WEBCLIP_ID_PREFIX "com.apple.webapp"
 static uint16_t
-flow_divert_trie_search(struct flow_divert_trie *trie, uint8_t *string_bytes)
+flow_divert_trie_search(struct flow_divert_trie *trie, const uint8_t *string_bytes)
 {
        uint16_t current = trie->root;
        uint16_t string_idx = 0;
@@ -853,7 +854,6 @@ flow_divert_trie_search(struct flow_divert_trie *trie, uint8_t *string_bytes)
                                return current; /* Got an exact match */
                        } else if (string_idx == strlen(APPLE_WEBCLIP_ID_PREFIX) &&
                            0 == strncmp((const char *)string_bytes, APPLE_WEBCLIP_ID_PREFIX, string_idx)) {
-                               string_bytes[string_idx] = '\0';
                                return current; /* Got an apple webclip id prefix match */
                        } else if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) {
                                next = TRIE_CHILD(trie, current, string_bytes[string_idx]);
@@ -953,33 +953,174 @@ flow_divert_find_proc_by_uuid(uuid_t uuid)
 }
 
 static int
-flow_divert_get_src_proc(struct socket *so, proc_t *proc)
+flow_divert_add_proc_info(struct flow_divert_pcb *fd_cb, proc_t proc, const char *signing_id, mbuf_t connect_packet, bool is_effective)
 {
-       int release = 0;
+       int error = 0;
+       int cdhash_error = 0;
+       unsigned char cdhash[SHA1_RESULTLEN] = { 0 };
+       audit_token_t audit_token = {};
+       const char *proc_cs_id = signing_id;
 
-       if (so->so_flags & SOF_DELEGATED) {
-               if ((*proc)->p_pid != so->e_pid) {
-                       *proc = proc_find(so->e_pid);
-                       release = 1;
-               } else if (uuid_compare((*proc)->p_uuid, so->e_uuid)) {
-                       *proc = flow_divert_find_proc_by_uuid(so->e_uuid);
-                       release = 1;
+       proc_lock(proc);
+
+       if (proc_cs_id == NULL) {
+               if (proc->p_csflags & (CS_VALID | CS_DEBUGGED)) {
+                       proc_cs_id = cs_identity_get(proc);
+               } else {
+                       FDLOG0(LOG_ERR, fd_cb, "Signature of proc is invalid");
+               }
+       }
+
+       if (is_effective) {
+               lck_rw_lock_shared(&fd_cb->group->lck);
+               if (!(fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP)) {
+                       if (proc_cs_id != NULL) {
+                               uint16_t result = flow_divert_trie_search(&fd_cb->group->signing_id_trie, (const uint8_t *)proc_cs_id);
+                               if (result == NULL_TRIE_IDX) {
+                                       FDLOG(LOG_WARNING, fd_cb, "%s did not match", proc_cs_id);
+                                       error = EPERM;
+                               } else {
+                                       FDLOG(LOG_INFO, fd_cb, "%s matched", proc_cs_id);
+                               }
+                       } else {
+                               error = EPERM;
+                       }
+               }
+               lck_rw_done(&fd_cb->group->lck);
+       }
+
+       if (error != 0) {
+               goto done;
+       }
+
+       /*
+        * If signing_id is not NULL then it came from the flow divert token and will be added
+        * as part of the token, so there is no need to add it here.
+        */
+       if (signing_id == NULL && proc_cs_id != NULL) {
+               error = flow_divert_packet_append_tlv(connect_packet,
+                   (is_effective ? FLOW_DIVERT_TLV_SIGNING_ID : FLOW_DIVERT_TLV_APP_REAL_SIGNING_ID),
+                   strlen(proc_cs_id),
+                   proc_cs_id);
+               if (error != 0) {
+                       FDLOG(LOG_ERR, fd_cb, "failed to append the signing ID: %d", error);
+                       goto done;
                }
-       } else if (*proc == PROC_NULL) {
-               *proc = current_proc();
        }
 
-       if (*proc != PROC_NULL) {
-               if ((*proc)->p_pid == 0) {
-                       if (release) {
-                               proc_rele(*proc);
+       cdhash_error = proc_getcdhash(proc, cdhash);
+       if (cdhash_error == 0) {
+               error = flow_divert_packet_append_tlv(connect_packet,
+                   (is_effective ? FLOW_DIVERT_TLV_CDHASH : FLOW_DIVERT_TLV_APP_REAL_CDHASH),
+                   sizeof(cdhash),
+                   cdhash);
+               if (error) {
+                       FDLOG(LOG_ERR, fd_cb, "failed to append the cdhash: %d", error);
+                       goto done;
+               }
+       } else {
+               FDLOG(LOG_ERR, fd_cb, "failed to get the cdhash: %d", cdhash_error);
+       }
+
+       task_t task = proc_task(proc);
+       if (task != TASK_NULL) {
+               mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
+               kern_return_t rc = task_info(task, TASK_AUDIT_TOKEN, (task_info_t)&audit_token, &count);
+               if (rc == KERN_SUCCESS) {
+                       int append_error = flow_divert_packet_append_tlv(connect_packet,
+                           (is_effective ? FLOW_DIVERT_TLV_APP_AUDIT_TOKEN : FLOW_DIVERT_TLV_APP_REAL_AUDIT_TOKEN),
+                           sizeof(audit_token_t),
+                           &audit_token);
+                       if (append_error) {
+                               FDLOG(LOG_ERR, fd_cb, "failed to append app audit token: %d", append_error);
                        }
-                       release = 0;
-                       *proc = PROC_NULL;
                }
        }
 
-       return release;
+done:
+       proc_unlock(proc);
+
+       return error;
+}
+
+static int
+flow_divert_add_all_proc_info(struct flow_divert_pcb *fd_cb, struct socket *so, proc_t proc, const char *signing_id, mbuf_t connect_packet)
+{
+       int error = 0;
+       proc_t effective_proc = PROC_NULL;
+       proc_t responsible_proc = PROC_NULL;
+       proc_t real_proc = proc_find(so->last_pid);
+       bool release_real_proc = true;
+
+       proc_t src_proc = PROC_NULL;
+       proc_t real_src_proc = PROC_NULL;
+
+       if (real_proc == PROC_NULL) {
+               FDLOG(LOG_ERR, fd_cb, "failed to find the real proc record for %d", so->last_pid);
+               release_real_proc = false;
+               real_proc = proc;
+               if (real_proc == PROC_NULL) {
+                       real_proc = current_proc();
+               }
+       }
+
+       if (so->so_flags & SOF_DELEGATED) {
+               if (real_proc->p_pid != so->e_pid) {
+                       effective_proc = proc_find(so->e_pid);
+               } else if (uuid_compare(real_proc->p_uuid, so->e_uuid)) {
+                       effective_proc = flow_divert_find_proc_by_uuid(so->e_uuid);
+               }
+       }
+
+#if defined(XNU_TARGET_OS_OSX)
+       lck_rw_lock_shared(&fd_cb->group->lck);
+       if (!(fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP)) {
+               if (so->so_rpid > 0) {
+                       responsible_proc = proc_find(so->so_rpid);
+               }
+       }
+       lck_rw_done(&fd_cb->group->lck);
+#endif
+
+       real_src_proc = real_proc;
+
+       if (responsible_proc != PROC_NULL) {
+               src_proc = responsible_proc;
+               if (effective_proc != NULL) {
+                       real_src_proc = effective_proc;
+               }
+       } else if (effective_proc != PROC_NULL) {
+               src_proc = effective_proc;
+       } else {
+               src_proc = real_proc;
+       }
+
+       error = flow_divert_add_proc_info(fd_cb, src_proc, signing_id, connect_packet, true);
+       if (error != 0) {
+               goto done;
+       }
+
+       if (real_src_proc != NULL && real_src_proc != src_proc) {
+               error = flow_divert_add_proc_info(fd_cb, real_src_proc, NULL, connect_packet, false);
+               if (error != 0) {
+                       goto done;
+               }
+       }
+
+done:
+       if (responsible_proc != PROC_NULL) {
+               proc_rele(responsible_proc);
+       }
+
+       if (effective_proc != PROC_NULL) {
+               proc_rele(effective_proc);
+       }
+
+       if (real_proc != PROC_NULL && release_real_proc) {
+               proc_rele(real_proc);
+       }
+
+       return error;
 }
 
 static int
@@ -1020,20 +1161,21 @@ flow_divert_send_packet(struct flow_divert_pcb *fd_cb, mbuf_t packet, Boolean en
 static int
 flow_divert_create_connect_packet(struct flow_divert_pcb *fd_cb, struct sockaddr *to, struct socket *so, proc_t p, mbuf_t *out_connect_packet)
 {
-       int                             error                   = 0;
-       int                             flow_type               = 0;
+       int                     error                   = 0;
+       int                     flow_type               = 0;
        char                    *signing_id = NULL;
-       int                             free_signing_id = 0;
        mbuf_t                  connect_packet = NULL;
-       proc_t                  src_proc = p;
-       int                             release_proc = 0;
+       cfil_sock_id_t          cfil_sock_id            = CFIL_SOCK_ID_NONE;
+       const void              *cfil_id                = NULL;
+       size_t                  cfil_id_size            = 0;
+       struct inpcb            *inp = sotoinpcb(so);
+       struct ifnet            *ifp = NULL;
 
        error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT, &connect_packet);
        if (error) {
                goto done;
        }
 
-       error = EPERM;
 
        if (fd_cb->connect_token != NULL && (fd_cb->flags & FLOW_DIVERT_HAS_HMAC)) {
                uint32_t sid_size = 0;
@@ -1043,103 +1185,22 @@ flow_divert_create_connect_packet(struct flow_divert_pcb *fd_cb, struct sockaddr
                        if (signing_id != NULL) {
                                flow_divert_packet_get_tlv(fd_cb->connect_token, 0, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, signing_id, NULL);
                                FDLOG(LOG_INFO, fd_cb, "Got %s from token", signing_id);
-                               free_signing_id = 1;
                        }
                }
        }
 
        socket_unlock(so, 0);
 
-       release_proc = flow_divert_get_src_proc(so, &src_proc);
-       if (src_proc != PROC_NULL) {
-               proc_lock(src_proc);
-               if (signing_id == NULL) {
-                       if (src_proc->p_csflags & (CS_VALID | CS_DEBUGGED)) {
-                               const char * cs_id;
-                               cs_id = cs_identity_get(src_proc);
-                               signing_id = __DECONST(char *, cs_id);
-                       } else {
-                               FDLOG0(LOG_WARNING, fd_cb, "Signature is invalid");
-                       }
-               }
-       } else {
-               FDLOG0(LOG_WARNING, fd_cb, "Failed to determine the current proc");
-       }
-
-       if (signing_id != NULL) {
-               uint16_t result = NULL_TRIE_IDX;
-               lck_rw_lock_shared(&fd_cb->group->lck);
-               if (fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP) {
-                       result = 1;
-               } else {
-                       result = flow_divert_trie_search(&fd_cb->group->signing_id_trie, (uint8_t *)signing_id);
-               }
-               lck_rw_done(&fd_cb->group->lck);
-               if (result != NULL_TRIE_IDX) {
-                       error = 0;
-                       FDLOG(LOG_INFO, fd_cb, "%s matched", signing_id);
-
-                       error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_SIGNING_ID, strlen(signing_id), signing_id);
-                       if (error == 0) {
-                               if (src_proc != PROC_NULL) {
-                                       unsigned char cdhash[SHA1_RESULTLEN];
-                                       error = proc_getcdhash(src_proc, cdhash);
-                                       if (error == 0) {
-                                               error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_CDHASH, sizeof(cdhash), cdhash);
-                                               if (error) {
-                                                       FDLOG(LOG_ERR, fd_cb, "failed to append the cdhash: %d", error);
-                                               }
-                                       } else {
-                                               FDLOG(LOG_ERR, fd_cb, "failed to get the cdhash: %d", error);
-                                       }
-                               }
-                       } else {
-                               FDLOG(LOG_ERR, fd_cb, "failed to append the signing ID: %d", error);
-                       }
-               } else {
-                       FDLOG(LOG_WARNING, fd_cb, "%s did not match", signing_id);
-               }
-       } else {
-               FDLOG0(LOG_WARNING, fd_cb, "Failed to get the code signing identity");
-               if (fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP) {
-                       error = 0;
-               }
-       }
+       error = flow_divert_add_all_proc_info(fd_cb, so, p, signing_id, connect_packet);
 
-       if (error == 0 && src_proc != PROC_NULL) {
-               task_t task = proc_task(src_proc);
-               if (task != TASK_NULL) {
-                       audit_token_t audit_token;
-                       mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
-                       kern_return_t rc = task_info(task, TASK_AUDIT_TOKEN, (task_info_t)&audit_token, &count);
-                       if (rc == KERN_SUCCESS) {
-                               error = flow_divert_packet_append_tlv(connect_packet,
-                                   FLOW_DIVERT_TLV_APP_AUDIT_TOKEN,
-                                   sizeof(audit_token_t),
-                                   &audit_token);
-                               if (error) {
-                                       FDLOG(LOG_ERR, fd_cb, "failed to append app audit token: %d", error);
-                                       error = 0; /* do not treat this as fatal error, proceed */
-                               }
-                       } else {
-                               FDLOG(LOG_ERR, fd_cb, "failed to retrieve app audit token: %d", rc);
-                       }
-               }
-       }
-
-       if (src_proc != PROC_NULL) {
-               proc_unlock(src_proc);
-               if (release_proc) {
-                       proc_rele(src_proc);
-               }
-       }
        socket_lock(so, 0);
 
-       if (free_signing_id) {
+       if (signing_id != NULL) {
                FREE(signing_id, M_TEMP);
        }
 
        if (error) {
+               FDLOG(LOG_ERR, fd_cb, "Failed to add source proc info: %d", error);
                goto done;
        }
 
@@ -1168,40 +1229,6 @@ flow_divert_create_connect_packet(struct flow_divert_pcb *fd_cb, struct sockaddr
                goto done;
        }
 
-       if (fd_cb->so->so_flags & SOF_DELEGATED) {
-               error = flow_divert_packet_append_tlv(connect_packet,
-                   FLOW_DIVERT_TLV_PID,
-                   sizeof(fd_cb->so->e_pid),
-                   &fd_cb->so->e_pid);
-               if (error) {
-                       goto done;
-               }
-
-               error = flow_divert_packet_append_tlv(connect_packet,
-                   FLOW_DIVERT_TLV_UUID,
-                   sizeof(fd_cb->so->e_uuid),
-                   &fd_cb->so->e_uuid);
-               if (error) {
-                       goto done;
-               }
-       } else {
-               error = flow_divert_packet_append_tlv(connect_packet,
-                   FLOW_DIVERT_TLV_PID,
-                   sizeof(fd_cb->so->e_pid),
-                   &fd_cb->so->last_pid);
-               if (error) {
-                       goto done;
-               }
-
-               error = flow_divert_packet_append_tlv(connect_packet,
-                   FLOW_DIVERT_TLV_UUID,
-                   sizeof(fd_cb->so->e_uuid),
-                   &fd_cb->so->last_uuid);
-               if (error) {
-                       goto done;
-               }
-       }
-
        if (fd_cb->connect_token != NULL) {
                unsigned int token_len = m_length(fd_cb->connect_token);
                mbuf_concatenate(connect_packet, fd_cb->connect_token);
@@ -1225,7 +1252,6 @@ flow_divert_create_connect_packet(struct flow_divert_pcb *fd_cb, struct sockaddr
                error = EALREADY;
                goto done;
        } else {
-               struct inpcb *inp = sotoinpcb(so);
                if (flow_divert_has_pcb_local_address(inp)) {
                        error = flow_divert_inp_to_sockaddr(inp, &fd_cb->local_address);
                        if (error) {
@@ -1244,6 +1270,21 @@ flow_divert_create_connect_packet(struct flow_divert_pcb *fd_cb, struct sockaddr
                }
        }
 
+       if ((inp->inp_flags | INP_BOUND_IF) && inp->inp_boundifp != NULL) {
+               ifp = inp->inp_boundifp;
+       } else if (inp->inp_last_outifp != NULL) {
+               ifp = inp->inp_last_outifp;
+       }
+
+       if (ifp != NULL) {
+               uint32_t flow_if_index = ifp->if_index;
+               error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_OUT_IF_INDEX,
+                   sizeof(flow_if_index), &flow_if_index);
+               if (error) {
+                       goto done;
+               }
+       }
+
        if (so->so_flags1 & SOF1_DATA_IDEMPOTENT) {
                uint32_t flags = FLOW_DIVERT_TOKEN_FLAG_TFO;
                error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_FLAGS, sizeof(flags), &flags);
@@ -1252,6 +1293,22 @@ flow_divert_create_connect_packet(struct flow_divert_pcb *fd_cb, struct sockaddr
                }
        }
 
+       cfil_sock_id = cfil_sock_id_from_socket(so);
+       if (cfil_sock_id != CFIL_SOCK_ID_NONE) {
+               cfil_id = &cfil_sock_id;
+               cfil_id_size = sizeof(cfil_sock_id);
+       } else if (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) {
+               cfil_id = &inp->necp_client_uuid;
+               cfil_id_size = sizeof(inp->necp_client_uuid);
+       }
+
+       if (cfil_id != NULL && cfil_id_size > 0 && cfil_id_size <= sizeof(uuid_t)) {
+               error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_CFIL_ID, cfil_id_size, cfil_id);
+               if (error) {
+                       goto done;
+               }
+       }
+
 done:
        if (!error) {
                *out_connect_packet = connect_packet;
@@ -2071,15 +2128,14 @@ flow_divert_handle_data(struct flow_divert_pcb *fd_cb, mbuf_t packet, size_t off
                                flow_divert_disconnect_socket(fd_cb->so);
                        } else if (!(fd_cb->so->so_state & SS_CANTRCVMORE)) {
                                if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
-                                       if (sbappendstream(&fd_cb->so->so_rcv, data)) {
-                                               fd_cb->bytes_received += data_size;
-                                               flow_divert_add_data_statistics(fd_cb, data_size, FALSE);
-                                               fd_cb->sb_size = fd_cb->so->so_rcv.sb_cc;
+                                       int appended = sbappendstream(&fd_cb->so->so_rcv, data);
+                                       fd_cb->bytes_received += data_size;
+                                       flow_divert_add_data_statistics(fd_cb, data_size, FALSE);
+                                       fd_cb->sb_size += data_size;
+                                       if (appended) {
                                                sorwakeup(fd_cb->so);
-                                               data = NULL;
-                                       } else {
-                                               FDLOG0(LOG_ERR, fd_cb, "received data, but appendstream failed");
                                        }
+                                       data = NULL;
                                } else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
                                        struct sockaddr *append_sa;
                                        mbuf_t mctl;
@@ -2097,14 +2153,14 @@ flow_divert_handle_data(struct flow_divert_pcb *fd_cb, mbuf_t packet, size_t off
 
                                        mctl = flow_divert_get_control_mbuf(fd_cb);
                                        int append_error = 0;
-                                       if (sbappendaddr(&fd_cb->so->so_rcv, append_sa, data, mctl, &append_error)) {
+                                       if (sbappendaddr(&fd_cb->so->so_rcv, append_sa, data, mctl, &append_error) || append_error == EJUSTRETURN) {
                                                fd_cb->bytes_received += data_size;
                                                flow_divert_add_data_statistics(fd_cb, data_size, FALSE);
-                                               fd_cb->sb_size = fd_cb->so->so_rcv.sb_cc;
-                                               sorwakeup(fd_cb->so);
+                                               fd_cb->sb_size += data_size;
+                                               if (append_error == 0) {
+                                                       sorwakeup(fd_cb->so);
+                                               }
                                                data = NULL;
-                                       } else if (append_error != EJUSTRETURN) {
-                                               FDLOG0(LOG_ERR, fd_cb, "received data, but sbappendaddr failed");
                                        }
                                        if (!error) {
                                                FREE(append_sa, M_TEMP);
@@ -2760,8 +2816,7 @@ flow_divert_inp_to_sockaddr(const struct inpcb *inp, struct sockaddr **local_soc
 static boolean_t
 flow_divert_has_pcb_local_address(const struct inpcb *inp)
 {
-       return inp->inp_lport != 0
-              && (inp->inp_laddr.s_addr != INADDR_ANY || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr));
+       return inp->inp_lport != 0;
 }
 
 static errno_t
index 5961653b9ef7064be4cc9176a7761341b37a69a1..705fa6b7ea97060a054316bebdaaeeac91d99cb5 100644 (file)
 #define FLOW_DIVERT_TLV_TARGET_PORT             23
 #define FLOW_DIVERT_TLV_CDHASH                  24
 #define FLOW_DIVERT_TLV_SIGNING_ID              25
-#define FLOW_DIVERT_TLV_PID                     26
-#define FLOW_DIVERT_TLV_UUID                    27
+
+
 #define FLOW_DIVERT_TLV_PREFIX_COUNT            28
 #define FLOW_DIVERT_TLV_FLAGS                   29
 #define FLOW_DIVERT_TLV_FLOW_TYPE               30
 #define FLOW_DIVERT_TLV_APP_DATA                31
 #define FLOW_DIVERT_TLV_APP_AUDIT_TOKEN         32
+#define FLOW_DIVERT_TLV_APP_REAL_SIGNING_ID     33
+#define FLOW_DIVERT_TLV_APP_REAL_CDHASH         34
+#define FLOW_DIVERT_TLV_APP_REAL_AUDIT_TOKEN    35
+#define FLOW_DIVERT_TLV_CFIL_ID                 36
 
 #define FLOW_DIVERT_FLOW_TYPE_TCP               1
 #define FLOW_DIVERT_FLOW_TYPE_UDP               3
index d1627fb3948f864c91a493dcc9e701b0d27c9626..d097b293fc9f883343c355277dc6ad48a3dcaa21 100644 (file)
@@ -3502,6 +3502,7 @@ inp_update_policy(struct inpcb *inp)
        uint32_t pflags = 0;
        int32_t ogencnt;
        int err = 0;
+       uint8_t *lookup_uuid = NULL;
 
        if (!net_io_policy_uuid ||
            so == NULL || inp->inp_state == INPCB_STATE_DEAD) {
@@ -3516,9 +3517,17 @@ inp_update_policy(struct inpcb *inp)
                return 0;
        }
 
+#if defined(XNU_TARGET_OS_OSX)
+       if (so->so_rpid > 0) {
+               lookup_uuid = so->so_ruuid;
+       }
+#endif
+       if (lookup_uuid == NULL) {
+               lookup_uuid = ((so->so_flags & SOF_DELEGATED) ? so->e_uuid : so->last_uuid);
+       }
+
        ogencnt = so->so_policy_gencnt;
-       err = proc_uuid_policy_lookup(((so->so_flags & SOF_DELEGATED) ?
-           so->e_uuid : so->last_uuid), &pflags, &so->so_policy_gencnt);
+       err = proc_uuid_policy_lookup(lookup_uuid, &pflags, &so->so_policy_gencnt);
 
        /*
         * Discard cached generation count if the entry is gone (ENOENT),
index 38a45abfb1b0c2cee67bf36201c51425ad60f657..3e92c7b943d51c22c2048f4f2bd989351eff726e 100644 (file)
@@ -1261,6 +1261,9 @@ sendit:
                necp_mark_packet_from_ip(m, necp_matched_policy_id);
                switch (necp_result) {
                case NECP_KERNEL_POLICY_RESULT_PASS:
+                       if (necp_result_parameter.pass_flags & NECP_KERNEL_POLICY_PASS_NO_SKIP_IPSEC) {
+                               break;
+                       }
                        /* Check if the interface is allowed */
                        if (!necp_packet_is_allowed_over_interface(m, ifp)) {
                                error = EHOSTUNREACH;
index b8fbb62d4acb1e57dcf043b4e93794d263dce447..9803eaf0916c4cd257172a7938be0422a8926e0c 100644 (file)
@@ -236,6 +236,10 @@ SYSCTL_SKMEM_TCP_INT(OID_AUTO, doautorcvbuf,
     CTLFLAG_RW | CTLFLAG_LOCKED, u_int32_t, tcp_do_autorcvbuf, 1,
     "Enable automatic socket buffer tuning");
 
+SYSCTL_SKMEM_TCP_INT(OID_AUTO, autotunereorder,
+    CTLFLAG_RW | CTLFLAG_LOCKED, u_int32_t, tcp_autotune_reorder, 1,
+    "Enable automatic socket buffer tuning even when reordering is present");
+
 SYSCTL_SKMEM_TCP_INT(OID_AUTO, autorcvbufmax,
     CTLFLAG_RW | CTLFLAG_LOCKED, u_int32_t, tcp_autorcvbuf_max, 512 * 1024,
     "Maximum receive socket buffer size");
@@ -1159,10 +1163,6 @@ tcp_sbrcv_grow(struct tcpcb *tp, struct sockbuf *sbrcv,
         * - the high water mark already reached the maximum
         * - the stream is in background and receive side is being
         * throttled
-        * - if there are segments in reassembly queue indicating loss,
-        * do not need to increase recv window during recovery as more
-        * data is not going to be sent. A duplicate ack sent during
-        * recovery should not change the receive window
         */
        if (tcp_do_autorcvbuf == 0 ||
            (sbrcv->sb_flags & SB_AUTOSIZE) == 0 ||
@@ -1170,7 +1170,7 @@ tcp_sbrcv_grow(struct tcpcb *tp, struct sockbuf *sbrcv,
            sbrcv->sb_hiwat >= rcvbuf_max ||
            (tp->t_flagsext & TF_RECV_THROTTLE) ||
            (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) ||
-           !LIST_EMPTY(&tp->t_segq)) {
+           (!tcp_autotune_reorder && !LIST_EMPTY(&tp->t_segq))) {
                /* Can not resize the socket buffer, just return */
                goto out;
        }
@@ -1215,8 +1215,9 @@ tcp_sbrcv_grow(struct tcpcb *tp, struct sockbuf *sbrcv,
                if (TSTMP_GEQ(to->to_tsecr, tp->rfbuf_ts)) {
                        if (tp->rfbuf_cnt + pktlen > (sbrcv->sb_hiwat -
                            (sbrcv->sb_hiwat >> 1))) {
-                               tp->rfbuf_cnt += pktlen;
                                int32_t rcvbuf_inc, min_incr;
+
+                               tp->rfbuf_cnt += pktlen;
                                /*
                                 * Increment the receive window by a
                                 * multiple of maximum sized segments.
@@ -5401,6 +5402,11 @@ dodata:
                                memcpy(&saved_hdr, ip, ip->ip_hl << 2);
                                ip = (struct ip *)&saved_hdr[0];
                        }
+
+                       if (tcp_autotune_reorder) {
+                               tcp_sbrcv_grow(tp, &so->so_rcv, &to, tlen, TCP_AUTORCVBUF_MAX(ifp));
+                       }
+
                        memcpy(&saved_tcphdr, th, sizeof(struct tcphdr));
                        thflags = tcp_reass(tp, th, &tlen, m, ifp, &read_wakeup);
                        th = &saved_tcphdr;
index 6f63e40f7b226d39429bed8aca5512f5be845e36..7c1988f1f9467987fc6e330ef940e511f3ae7205 100644 (file)
@@ -978,39 +978,47 @@ after_sack_rexmit:
         * know that foreign host supports TAO, suppress sending segment.
         */
        if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
-               if (tp->t_state != TCPS_SYN_RECEIVED || tfo_enabled(tp))
-                       flags &= ~TH_SYN;
-               off--;
-               len++;
-               if (len > 0 && tp->t_state == TCPS_SYN_SENT) {
-                       while (inp->inp_sndinprog_cnt == 0 &&
-                               tp->t_pktlist_head != NULL) {
-                               packetlist = tp->t_pktlist_head;
-                               packchain_listadd = tp->t_lastchain;
-                               packchain_sent++;
-                               TCP_PKTLIST_CLEAR(tp);
-
-                               error = tcp_ip_output(so, tp, packetlist,
-                                   packchain_listadd, tp_inp_options,
-                                   (so_options & SO_DONTROUTE),
-                                   (sack_rxmit || (sack_bytes_rxmt != 0)),
-                                   isipv6);
+               if (tp->t_state == TCPS_SYN_RECEIVED && tfo_enabled(tp) && tp->snd_nxt == tp->snd_una + 1) {
+                       /* We are sending the SYN again! */
+                       off--;
+                       len++;
+               } else {
+                       if (tp->t_state != TCPS_SYN_RECEIVED || tfo_enabled(tp)) {
+                               flags &= ~TH_SYN;
                        }
 
-                       /*
-                        * tcp was closed while we were in ip,
-                        * resume close
-                        */
-                       if (inp->inp_sndinprog_cnt == 0 &&
-                               (tp->t_flags & TF_CLOSING)) {
-                               tp->t_flags &= ~TF_CLOSING;
-                               (void) tcp_close(tp);
-                       } else {
-                               tcp_check_timer_state(tp);
+                       off--;
+                       len++;
+                       if (len > 0 && tp->t_state == TCPS_SYN_SENT) {
+                               while (inp->inp_sndinprog_cnt == 0 &&
+                                       tp->t_pktlist_head != NULL) {
+                                       packetlist = tp->t_pktlist_head;
+                                       packchain_listadd = tp->t_lastchain;
+                                       packchain_sent++;
+                                       TCP_PKTLIST_CLEAR(tp);
+
+                                       error = tcp_ip_output(so, tp, packetlist,
+                                           packchain_listadd, tp_inp_options,
+                                           (so_options & SO_DONTROUTE),
+                                           (sack_rxmit || (sack_bytes_rxmt != 0)),
+                                           isipv6);
+                               }
+
+                               /*
+                                * tcp was closed while we were in ip,
+                                * resume close
+                                */
+                               if (inp->inp_sndinprog_cnt == 0 &&
+                                       (tp->t_flags & TF_CLOSING)) {
+                                       tp->t_flags &= ~TF_CLOSING;
+                                       (void) tcp_close(tp);
+                               } else {
+                                       tcp_check_timer_state(tp);
+                               }
+                               KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END,
+                                   0,0,0,0,0);
+                               return 0;
                        }
-                       KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END,
-                           0,0,0,0,0);
-                       return 0;
                }
        }
 
index c19dc1706959300eba3b285aabc8066844266cd9..df95516896cbd1bfef9e9b4371d7ba4b92108e34 100644 (file)
@@ -143,7 +143,7 @@ sysctl_msec_to_ticks SYSCTL_HANDLER_ARGS
                return error;
        }
 
-       tt = s * TCP_RETRANSHZ / 1000;
+       tt = temp * TCP_RETRANSHZ / 1000;
        if (tt < 1 || tt > INT_MAX) {
                return EINVAL;
        }
index b87f6ba7bf5b9d9ac648ff4cc90af78b1beb610b..1d2f1a9a4dff8e3396108fadb172451d348fb4d9 100644 (file)
@@ -580,6 +580,9 @@ loopit:
 
                switch (necp_result) {
                case NECP_KERNEL_POLICY_RESULT_PASS:
+                       if (necp_result_parameter.pass_flags & NECP_KERNEL_POLICY_PASS_NO_SKIP_IPSEC) {
+                               break;
+                       }
                        goto skip_ipsec;
                case NECP_KERNEL_POLICY_RESULT_DROP:
                        error = EHOSTUNREACH;
index c3cb5ecd4180c3af3661dd5e21ff90e240dbb6a8..100b4c482e79cde4e7f0758eb0f68149f3d7a337 100644 (file)
@@ -279,7 +279,7 @@ nd6_ndprl_free(struct nd6_prproxy_prelist *ndprl)
  * Apply routing function on the affected upstream and downstream prefixes,
  * i.e. either set or clear RTF_PROXY on the cloning prefix route; all route
  * entries that were cloned off these prefixes will be blown away.  Caller
- * must have acquried proxy6_lock and must not be holding nd6_mutex.
+ * must have acquired proxy6_lock and must not be holding nd6_mutex.
  */
 static void
 nd6_prproxy_prelist_setroute(boolean_t enable,
index b7c473e6abd980a64fedc51a616ccda20ffbabab..87d8808975ecba901ff29c62280bd3308cd20070 100644 (file)
@@ -174,6 +174,7 @@ __private_extern__ u_int64_t natt_now = 0;
 static LIST_HEAD(_sptree, secpolicy) sptree[IPSEC_DIR_MAX];     /* SPD */
 static LIST_HEAD(_sahtree, secashead) sahtree;                  /* SAD */
 static LIST_HEAD(_regtree, secreg) regtree[SADB_SATYPE_MAX + 1];
+static LIST_HEAD(_custom_sahtree, secashead) custom_sahtree;
 /* registed list */
 
 #define SPIHASHSIZE     128
@@ -470,11 +471,11 @@ static struct mbuf *key_setdumpsp(struct secpolicy *,
     u_int8_t, u_int32_t, u_int32_t);
 static u_int key_getspreqmsglen(struct secpolicy *);
 static int key_spdexpire(struct secpolicy *);
-static struct secashead *key_newsah(struct secasindex *, ifnet_t, u_int, u_int8_t);
+static struct secashead *key_newsah(struct secasindex *, ifnet_t, u_int, u_int8_t, u_int16_t);
 static struct secasvar *key_newsav(struct mbuf *,
     const struct sadb_msghdr *, struct secashead *, int *,
     struct socket *);
-static struct secashead *key_getsah(struct secasindex *);
+static struct secashead *key_getsah(struct secasindex *, u_int16_t);
 static struct secasvar *key_checkspidup(struct secasindex *, u_int32_t);
 static void key_setspi __P((struct secasvar *, u_int32_t));
 static struct secasvar *key_getsavbyspi(struct secashead *, u_int32_t);
@@ -640,6 +641,7 @@ key_init(struct protosw *pp, struct domain *dp)
        ipsec_policy_count = 0;
 
        LIST_INIT(&sahtree);
+       LIST_INIT(&custom_sahtree);
 
        for (i = 0; i <= SADB_SATYPE_MAX; i++) {
                LIST_INIT(&regtree[i]);
@@ -3808,7 +3810,8 @@ static struct secashead *
 key_newsah(struct secasindex *saidx,
     ifnet_t ipsec_if,
     u_int outgoing_if,
-    u_int8_t dir)
+    u_int8_t dir,
+    u_int16_t flags)
 {
        struct secashead *newsah;
 
@@ -3817,6 +3820,8 @@ key_newsah(struct secasindex *saidx,
                panic("key_newsaidx: NULL pointer is passed.\n");
        }
 
+       VERIFY(flags == SECURITY_ASSOCIATION_PFKEY || flags == SECURITY_ASSOCIATION_CUSTOM_IPSEC);
+
        newsah = keydb_newsecashead();
        if (newsah == NULL) {
                return NULL;
@@ -3854,7 +3859,13 @@ key_newsah(struct secasindex *saidx,
        newsah->dir = dir;
        /* add to saidxtree */
        newsah->state = SADB_SASTATE_MATURE;
-       LIST_INSERT_HEAD(&sahtree, newsah, chain);
+       newsah->flags = flags;
+
+       if (flags == SECURITY_ASSOCIATION_PFKEY) {
+               LIST_INSERT_HEAD(&sahtree, newsah, chain);
+       } else {
+               LIST_INSERT_HEAD(&custom_sahtree, newsah, chain);
+       }
        key_start_timehandler();
 
        return newsah;
@@ -4296,18 +4307,33 @@ key_delsav(
  *     others  : found, pointer to a SA.
  */
 static struct secashead *
-key_getsah(struct secasindex *saidx)
+key_getsah(struct secasindex *saidx, u_int16_t flags)
 {
        struct secashead *sah;
 
        LCK_MTX_ASSERT(sadb_mutex, LCK_MTX_ASSERT_OWNED);
 
-       LIST_FOREACH(sah, &sahtree, chain) {
-               if (sah->state == SADB_SASTATE_DEAD) {
-                       continue;
+       if ((flags & SECURITY_ASSOCIATION_ANY) == SECURITY_ASSOCIATION_ANY ||
+           (flags & SECURITY_ASSOCIATION_PFKEY) == SECURITY_ASSOCIATION_PFKEY) {
+               LIST_FOREACH(sah, &sahtree, chain) {
+                       if (sah->state == SADB_SASTATE_DEAD) {
+                               continue;
+                       }
+                       if (key_cmpsaidx(&sah->saidx, saidx, CMP_REQID)) {
+                               return sah;
+                       }
                }
-               if (key_cmpsaidx(&sah->saidx, saidx, CMP_REQID)) {
-                       return sah;
+       }
+
+       if ((flags & SECURITY_ASSOCIATION_ANY) == SECURITY_ASSOCIATION_ANY ||
+           (flags & SECURITY_ASSOCIATION_PFKEY) == SECURITY_ASSOCIATION_CUSTOM_IPSEC) {
+               LIST_FOREACH(sah, &custom_sahtree, chain) {
+                       if (sah->state == SADB_SASTATE_DEAD) {
+                               continue;
+                       }
+                       if (key_cmpsaidx(&sah->saidx, saidx, 0)) {
+                               return sah;
+                       }
                }
        }
 
@@ -4322,9 +4348,9 @@ key_newsah2(struct secasindex *saidx,
 
        LCK_MTX_ASSERT(sadb_mutex, LCK_MTX_ASSERT_OWNED);
 
-       sah = key_getsah(saidx);
+       sah = key_getsah(saidx, SECURITY_ASSOCIATION_ANY);
        if (!sah) {
-               return key_newsah(saidx, NULL, 0, dir);
+               return key_newsah(saidx, NULL, 0, dir, SECURITY_ASSOCIATION_PFKEY);
        }
        return sah;
 }
@@ -7024,15 +7050,21 @@ key_getspi(
        }
 
        /* get a SA index */
-       if ((newsah = key_getsah(&saidx)) == NULL) {
+       if ((newsah = key_getsah(&saidx, SECURITY_ASSOCIATION_ANY)) == NULL) {
                /* create a new SA index: key_addspi is always used for inbound spi */
-               if ((newsah = key_newsah(&saidx, ipsec_if, key_get_outgoing_ifindex_from_message(mhp, SADB_X_EXT_IPSECIF), IPSEC_DIR_INBOUND)) == NULL) {
+               if ((newsah = key_newsah(&saidx, ipsec_if, key_get_outgoing_ifindex_from_message(mhp, SADB_X_EXT_IPSECIF), IPSEC_DIR_INBOUND, SECURITY_ASSOCIATION_PFKEY)) == NULL) {
                        lck_mtx_unlock(sadb_mutex);
                        ipseclog((LOG_DEBUG, "key_getspi: No more memory.\n"));
                        return key_senderror(so, m, ENOBUFS);
                }
        }
 
+       if ((newsah->flags & SECURITY_ASSOCIATION_CUSTOM_IPSEC) == SECURITY_ASSOCIATION_CUSTOM_IPSEC) {
+               lck_mtx_unlock(sadb_mutex);
+               ipseclog((LOG_ERR, "key_getspi: custom ipsec exists\n"));
+               return key_senderror(so, m, EEXIST);
+       }
+
        /* get a new SA */
        /* XXX rewrite */
        newsav = key_newsav(m, mhp, newsah, &error, so);
@@ -7348,7 +7380,7 @@ key_update(
        lck_mtx_lock(sadb_mutex);
 
        /* get a SA header */
-       if ((sah = key_getsah(&saidx)) == NULL) {
+       if ((sah = key_getsah(&saidx, SECURITY_ASSOCIATION_PFKEY)) == NULL) {
                lck_mtx_unlock(sadb_mutex);
                ipseclog((LOG_DEBUG, "key_update: no SA index found.\n"));
                return key_senderror(so, m, ENOENT);
@@ -7546,14 +7578,20 @@ key_migrate(struct socket *so,
        /* Find or create new SAH */
        KEY_SETSECASIDX(proto, sah->saidx.mode, sah->saidx.reqid, src1 + 1, dst1 + 1, ipsec_if1 ? ipsec_if1->if_index : 0, &saidx1);
 
-       if ((newsah = key_getsah(&saidx1)) == NULL) {
-               if ((newsah = key_newsah(&saidx1, ipsec_if1, key_get_outgoing_ifindex_from_message(mhp, SADB_X_EXT_MIGRATE_IPSECIF), sah->dir)) == NULL) {
+       if ((newsah = key_getsah(&saidx1, SECURITY_ASSOCIATION_ANY)) == NULL) {
+               if ((newsah = key_newsah(&saidx1, ipsec_if1, key_get_outgoing_ifindex_from_message(mhp, SADB_X_EXT_MIGRATE_IPSECIF), sah->dir, SECURITY_ASSOCIATION_PFKEY)) == NULL) {
                        lck_mtx_unlock(sadb_mutex);
                        ipseclog((LOG_DEBUG, "key_migrate: No more memory.\n"));
                        return key_senderror(so, m, ENOBUFS);
                }
        }
 
+       if ((newsah->flags & SECURITY_ASSOCIATION_CUSTOM_IPSEC) == SECURITY_ASSOCIATION_CUSTOM_IPSEC) {
+               lck_mtx_unlock(sadb_mutex);
+               ipseclog((LOG_ERR, "key_migrate: custom ipsec exists\n"));
+               return key_senderror(so, m, EEXIST);
+       }
+
        /* Migrate SAV in to new SAH */
        if (key_migratesav(sav, newsah) != 0) {
                lck_mtx_unlock(sadb_mutex);
@@ -7738,9 +7776,9 @@ key_add(
        lck_mtx_lock(sadb_mutex);
 
        /* get a SA header */
-       if ((newsah = key_getsah(&saidx)) == NULL) {
+       if ((newsah = key_getsah(&saidx, SECURITY_ASSOCIATION_ANY)) == NULL) {
                /* create a new SA header: key_addspi is always used for outbound spi */
-               if ((newsah = key_newsah(&saidx, ipsec_if, key_get_outgoing_ifindex_from_message(mhp, SADB_X_EXT_IPSECIF), IPSEC_DIR_OUTBOUND)) == NULL) {
+               if ((newsah = key_newsah(&saidx, ipsec_if, key_get_outgoing_ifindex_from_message(mhp, SADB_X_EXT_IPSECIF), IPSEC_DIR_OUTBOUND, SECURITY_ASSOCIATION_PFKEY)) == NULL) {
                        lck_mtx_unlock(sadb_mutex);
                        ipseclog((LOG_DEBUG, "key_add: No more memory.\n"));
                        bzero_keys(mhp);
@@ -7748,6 +7786,13 @@ key_add(
                }
        }
 
+       if ((newsah->flags & SECURITY_ASSOCIATION_CUSTOM_IPSEC) == SECURITY_ASSOCIATION_CUSTOM_IPSEC) {
+               lck_mtx_unlock(sadb_mutex);
+               ipseclog((LOG_ERR, "key_add: custom ipsec exists\n"));
+               bzero_keys(mhp);
+               return key_senderror(so, m, EEXIST);
+       }
+
        /* set spidx if there */
        /* XXX rewrite */
        error = key_setident(newsah, m, mhp);
@@ -10885,3 +10930,115 @@ key_fill_offload_frames_for_savs(ifnet_t ifp,
 
        return frame_index;
 }
+
+#pragma mark Custom IPsec
+
+__private_extern__ bool
+key_custom_ipsec_token_is_valid(void *ipsec_token)
+{
+       if (ipsec_token == NULL) {
+               return false;
+       }
+
+       struct secashead *sah = (struct secashead *)ipsec_token;
+
+       return (sah->flags & SECURITY_ASSOCIATION_CUSTOM_IPSEC) == SECURITY_ASSOCIATION_CUSTOM_IPSEC;
+}
+
+__private_extern__ int
+key_reserve_custom_ipsec(void **ipsec_token, union sockaddr_in_4_6 *src, union sockaddr_in_4_6 *dst,
+    u_int8_t proto)
+{
+       if (src == NULL || dst == NULL) {
+               ipseclog((LOG_ERR, "register custom ipsec: invalid address\n"));
+               return EINVAL;
+       }
+
+       if (src->sa.sa_family != dst->sa.sa_family) {
+               ipseclog((LOG_ERR, "register custom ipsec: address family mismatched\n"));
+               return EINVAL;
+       }
+
+       if (src->sa.sa_len != dst->sa.sa_len) {
+               ipseclog((LOG_ERR, "register custom ipsec: address struct size mismatched\n"));
+               return EINVAL;
+       }
+
+       if (ipsec_token == NULL) {
+               ipseclog((LOG_ERR, "register custom ipsec: invalid ipsec token\n"));
+               return EINVAL;
+       }
+
+       switch (src->sa.sa_family) {
+       case AF_INET:
+               if (src->sa.sa_len != sizeof(struct sockaddr_in)) {
+                       ipseclog((LOG_ERR, "register custom esp: invalid address length\n"));
+                       return EINVAL;
+               }
+               break;
+       case AF_INET6:
+               if (src->sa.sa_len != sizeof(struct sockaddr_in6)) {
+                       ipseclog((LOG_ERR, "register custom esp: invalid address length\n"));
+                       return EINVAL;
+               }
+               break;
+       default:
+               ipseclog((LOG_ERR, "register custom esp: invalid address length\n"));
+               return EAFNOSUPPORT;
+       }
+
+       if (proto != IPPROTO_ESP && proto != IPPROTO_AH) {
+               ipseclog((LOG_ERR, "register custom esp: invalid proto %u\n", proto));
+               return EINVAL;
+       }
+
+       struct secasindex saidx = {};
+       KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, &src->sa, &dst->sa, 0, &saidx);
+
+       lck_mtx_lock(sadb_mutex);
+
+       struct secashead *sah = NULL;
+       if ((sah = key_getsah(&saidx, SECURITY_ASSOCIATION_ANY)) != NULL) {
+               lck_mtx_unlock(sadb_mutex);
+               ipseclog((LOG_ERR, "register custom esp: SA exists\n"));
+               return EEXIST;
+       }
+
+       if ((sah = key_newsah(&saidx, NULL, 0, IPSEC_DIR_ANY, SECURITY_ASSOCIATION_CUSTOM_IPSEC)) == NULL) {
+               lck_mtx_unlock(sadb_mutex);
+               ipseclog((LOG_DEBUG, "register custom esp: No more memory.\n"));
+               return ENOBUFS;
+       }
+
+       *ipsec_token = (void *)sah;
+
+       lck_mtx_unlock(sadb_mutex);
+       return 0;
+}
+
+__private_extern__ void
+key_release_custom_ipsec(void **ipsec_token)
+{
+       struct secashead *sah = *ipsec_token;
+       VERIFY(sah != NULL);
+
+       lck_mtx_lock(sadb_mutex);
+
+       VERIFY((sah->flags & SECURITY_ASSOCIATION_CUSTOM_IPSEC) == SECURITY_ASSOCIATION_CUSTOM_IPSEC);
+
+       bool sa_present = true;
+       if (LIST_FIRST(&sah->savtree[SADB_SASTATE_LARVAL]) == NULL &&
+           LIST_FIRST(&sah->savtree[SADB_SASTATE_MATURE]) == NULL &&
+           LIST_FIRST(&sah->savtree[SADB_SASTATE_DYING]) == NULL &&
+           LIST_FIRST(&sah->savtree[SADB_SASTATE_DEAD]) == NULL) {
+               sa_present = false;
+       }
+       VERIFY(sa_present == false);
+
+       key_delsah(sah);
+
+       lck_mtx_unlock(sadb_mutex);
+
+       *ipsec_token = NULL;
+       return;
+}
index 418f9792daf2cdd024d57aed5530ee5415137b01..d07289b62072198936a37555c1071b2cce99156a 100644 (file)
@@ -119,7 +119,9 @@ struct ifnet_keepalive_offload_frame;
 extern u_int32_t key_fill_offload_frames_for_savs(struct ifnet *,
     struct ifnet_keepalive_offload_frame *frames_array, u_int32_t, size_t);
 
-
+extern bool key_custom_ipsec_token_is_valid(void *);
+extern int key_reserve_custom_ipsec(void **, union sockaddr_in_4_6 *, union sockaddr_in_4_6 *, u_int8_t proto);
+extern void key_release_custom_ipsec(void **);
 
 #endif /* BSD_KERNEL_PRIVATE */
 #endif /* _NETKEY_KEY_H_ */
index db7a04ef30eb3cc0ed24c795181662ad406f4ca6..92e7e9655e1fa9d58b19f8ec057827b67fa7ab3a 100644 (file)
@@ -49,6 +49,10 @@ struct secasindex {
        u_int ipsec_ifindex;
 };
 
+#define SECURITY_ASSOCIATION_ANY          0x0000
+#define SECURITY_ASSOCIATION_PFKEY        0x0001
+#define SECURITY_ASSOCIATION_CUSTOM_IPSEC 0x0010
+
 /* Security Association Data Base */
 struct secashead {
        LIST_ENTRY(secashead) chain;
@@ -68,6 +72,8 @@ struct secashead {
        /* The first of this list is newer SA */
 
        struct route_in6 sa_route;              /* route cache */
+
+       uint16_t flags;
 };
 
 #define MAX_REPLAY_WINDOWS 4
index 50e547fb16648366a745ad37f4e90c34d4c10293..345762a0ba1e446d9c9355f8382342aa583e8019 100644 (file)
@@ -65,6 +65,9 @@
  *
  */
 
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS_CLIENT
+
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/ioctl.h>
@@ -604,3 +607,5 @@ out1:
        }
        return error;
 }
+
+#endif /* CONFIG_NFS_CLIENT */
index dcb9647e8e8d1fa9a24dd54fbdea38b35c6b4075..60828d4c02d9e1199b2ca3c68c522757b33f6583 100644 (file)
@@ -844,6 +844,7 @@ struct nfs_location_index;
 struct nfs_socket;
 struct nfs_socket_search;
 struct nfsrv_uc_arg;
+struct direntry;
 
 /*
  * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts.
@@ -1288,6 +1289,7 @@ void    nfs_vattr_set_bitmap(struct nfsmount *, uint32_t *, struct vnode_attr *)
 void    nfs3_pathconf_cache(struct nfsmount *, struct nfs_fsattr *);
 int     nfs3_mount_rpc(struct nfsmount *, struct sockaddr *, int, int, char *, vfs_context_t, int, fhandle_t *, struct nfs_sec *);
 void    nfs3_umount_rpc(struct nfsmount *, vfs_context_t, int);
+void    nfs_rdirplus_update_node_attrs(nfsnode_t, struct direntry *, fhandle_t *, struct nfs_vattr *, uint64_t *);
 int     nfs_node_access_slot(nfsnode_t, uid_t, int);
 void    nfs_vnode_notify(nfsnode_t, uint32_t);
 
index ffb82cb064d6b987052ee5d2069a825fee8499cb..4d15ab0e4ab341f416082a0cdfc337df1c020ca0 100644 (file)
@@ -26,6 +26,9 @@
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS_CLIENT
+
 /*
  * miscellaneous support functions for NFSv4
  */
@@ -3032,3 +3035,5 @@ recheckdeleg:
                    vfs_statfs(nmp->nm_mountp)->f_mntfromname, nmp->nm_stategenid, error);
        }
 }
+
+#endif /* CONFIG_NFS_CLIENT */
index 261da73e24f107fcd097a6154d0cde00362b2d31..f6619bfe270185369b9de66af2bf7fcc69d167af 100644 (file)
@@ -26,6 +26,9 @@
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS_CLIENT
+
 /*
  * vnode op calls for NFS version 4
  */
@@ -961,6 +964,10 @@ nfs4_readdir_rpc(nfsnode_t dnp, struct nfsbuf *bp, vfs_context_t ctx)
 
                if (rdirplus) {
                        microuptime(&now);
+                       if (lastcookie == 0) {
+                               dnp->n_rdirplusstamp_sof = now.tv_sec;
+                               dnp->n_rdirplusstamp_eof = 0;
+                       }
                }
 
                /* loop through the entries packing them into the buffer */
@@ -1096,6 +1103,7 @@ nextbuffer:
                                }
                                *(time_t*)(&dp->d_name[dp->d_namlen + 1 + fhlen]) = now.tv_sec;
                                dp->d_reclen = reclen;
+                               nfs_rdirplus_update_node_attrs(dnp, dp, &fh, nvattrp, &savedxid);
                        }
                        padstart = dp->d_name + dp->d_namlen + 1 + xlen;
                        ndbhp->ndbh_count++;
@@ -1117,6 +1125,9 @@ nextbuffer:
                        ndbhp->ndbh_flags |= (NDB_FULL | NDB_EOF);
                        nfs_node_lock_force(dnp);
                        dnp->n_eofcookie = lastcookie;
+                       if (rdirplus) {
+                               dnp->n_rdirplusstamp_eof = now.tv_sec;
+                       }
                        nfs_node_unlock(dnp);
                } else {
                        more_entries = 1;
@@ -8952,3 +8963,5 @@ nfs4_vnop_removenamedstream(
 
 #endif
 #endif /* CONFIG_NFS4 */
+
+#endif /* CONFIG_NFS_CLIENT */
index 2e2dec099c1fcfddec673452083a96e9e4572682..e7ddfaaa5402a81e948fbb0ef48a134ab411b70b 100644 (file)
  *     @(#)nfs_bio.c   8.9 (Berkeley) 3/30/95
  * FreeBSD-Id: nfs_bio.c,v 1.44 1997/09/10 19:52:25 phk Exp $
  */
+
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS_CLIENT
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/resourcevar.h>
@@ -4152,3 +4156,5 @@ nfs_buf_readdir(struct nfsbuf *bp, vfs_context_t ctx)
        }
        return error;
 }
+
+#endif /* CONFIG_NFS_CLIENT */
index 9f5ec1030e6a0c51c1fc85d819ef060c6a659f92..65728d3b1ec0e2a77540b97f015ef293414077a4 100644 (file)
@@ -92,6 +92,9 @@
  *     - replaced big automatic arrays with MALLOC'd data
  */
 
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS_CLIENT
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
@@ -826,3 +829,5 @@ out:
 }
 
 #endif /* NETHER */
+
+#endif /* CONFIG_NFS_CLIENT */
diff --git a/bsd/nfs/nfs_conf.h b/bsd/nfs/nfs_conf.h
new file mode 100644 (file)
index 0000000..7b6662e
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2019 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _BSD_NFS_NFS_CONF_H_
+#define _BSD_NFS_NFS_CONF_H_
+
+#if NFSCLIENT
+#define CONFIG_NFS_CLIENT     1
+#endif /* NFSCLIENT */
+
+#if NFSSERVER
+#define CONFIG_NFS_SERVER     1
+#endif /* NFSSERVER */
+
+
+#if CONFIG_NFS_CLIENT || CONFIG_NFS_SERVER
+#define CONFIG_NFS            1
+#endif /* CONFIG_NFS_CLIENT || CONFIG_NFS_SERVER */
+
+#endif /* _BSD_NFS_NFS_CONF_H_ */
index 95d21f6c6defdbc7edf7402dda8f93f50b25c6f9..71188d12a84ba7755d60df1a4b84121d5c0ec666 100644 (file)
@@ -26,6 +26,9 @@
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS
+
 /*************
  * These functions implement RPCSEC_GSS security for the NFS client and server.
  * The code is specific to the use of Kerberos v5 and the use of DES MAC MD5
 #define NFS_GSS_ISDBG  (NFS_DEBUG_FACILITY &  NFS_FAC_GSS)
 
 
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
 u_long nfs_gss_svc_ctx_hash;
 struct nfs_gss_svc_ctx_hashhead *nfs_gss_svc_ctx_hashtbl;
 lck_mtx_t *nfs_gss_svc_ctx_mutex;
 lck_grp_t *nfs_gss_svc_grp;
 uint32_t nfsrv_gss_context_ttl = GSS_CTX_EXPIRE;
 #define GSS_SVC_CTX_TTL ((uint64_t)max(2*GSS_CTX_PEND, nfsrv_gss_context_ttl) * NSEC_PER_SEC)
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
 
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
 lck_grp_t *nfs_gss_clnt_grp;
-#endif /* NFSCLIENT */
+#endif /* CONFIG_NFS_CLIENT */
 
 #define KRB5_MAX_MIC_SIZE 128
 uint8_t krb5_mech_oid[11] = { 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x12, 0x01, 0x02, 0x02 };
 static uint8_t xdrpad[] = { 0x00, 0x00, 0x00, 0x00};
 
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
 static int      nfs_gss_clnt_ctx_find(struct nfsreq *);
 static int      nfs_gss_clnt_ctx_init(struct nfsreq *, struct nfs_gss_clnt_ctx *);
 static int      nfs_gss_clnt_ctx_init_retry(struct nfsreq *, struct nfs_gss_clnt_ctx *);
@@ -149,9 +152,9 @@ static void     nfs_gss_clnt_ctx_clean(struct nfs_gss_clnt_ctx *);
 static int      nfs_gss_clnt_ctx_copy(struct nfs_gss_clnt_ctx *, struct nfs_gss_clnt_ctx **);
 static void     nfs_gss_clnt_ctx_destroy(struct nfs_gss_clnt_ctx *);
 static void     nfs_gss_clnt_log_error(struct nfsreq *, struct nfs_gss_clnt_ctx *, uint32_t, uint32_t);
-#endif /* NFSCLIENT */
+#endif /* CONFIG_NFS_CLIENT */
 
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
 static struct nfs_gss_svc_ctx *nfs_gss_svc_ctx_find(uint32_t);
 static void     nfs_gss_svc_ctx_insert(struct nfs_gss_svc_ctx *);
 static void     nfs_gss_svc_ctx_timer(void *, void *);
@@ -160,7 +163,7 @@ static int      nfs_gss_svc_seqnum_valid(struct nfs_gss_svc_ctx *, uint32_t);
 
 /* This is only used by server code */
 static void     nfs_gss_nfsm_chain(struct nfsm_chain *, mbuf_t);
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
 
 static void     host_release_special_port(mach_port_t);
 static mach_port_t host_copy_special_port(mach_port_t);
@@ -170,12 +173,12 @@ static int      nfs_gss_mach_vmcopyout(vm_map_copy_t, uint32_t, u_char *);
 static int      nfs_gss_mchain_length(mbuf_t);
 static int      nfs_gss_append_chain(struct nfsm_chain *, mbuf_t);
 
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
 thread_call_t nfs_gss_svc_ctx_timer_call;
 int nfs_gss_timer_on = 0;
 uint32_t nfs_gss_ctx_count = 0;
 const uint32_t nfs_gss_ctx_max = GSS_SVC_MAXCONTEXTS;
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
 
 /*
  * Initialization when NFS starts
@@ -183,18 +186,18 @@ const uint32_t nfs_gss_ctx_max = GSS_SVC_MAXCONTEXTS;
 void
 nfs_gss_init(void)
 {
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
        nfs_gss_clnt_grp = lck_grp_alloc_init("rpcsec_gss_clnt", LCK_GRP_ATTR_NULL);
-#endif /* NFSCLIENT */
+#endif /* CONFIG_NFS_CLIENT */
 
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
        nfs_gss_svc_grp  = lck_grp_alloc_init("rpcsec_gss_svc", LCK_GRP_ATTR_NULL);
 
        nfs_gss_svc_ctx_hashtbl = hashinit(SVC_CTX_HASHSZ, M_TEMP, &nfs_gss_svc_ctx_hash);
        nfs_gss_svc_ctx_mutex = lck_mtx_alloc_init(nfs_gss_svc_grp, LCK_ATTR_NULL);
 
        nfs_gss_svc_ctx_timer_call = thread_call_allocate(nfs_gss_svc_ctx_timer, NULL);
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
 }
 
 /*
@@ -389,7 +392,7 @@ rpc_gss_priv_data_create(gss_ctx_id_t ctx, mbuf_t *mb_head, uint32_t seqnum, uin
        return error;
 }
 
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
 
 /*
  * Restore the argument or result from an rpc_gss_integ_data mbuf chain
@@ -2818,14 +2821,14 @@ out:
        nfs_gss_clnt_ctx_unref(&req);
        return error;
 }
-#endif /* NFSCLIENT */
+#endif /* CONFIG_NFS_CLIENT */
 
 /*************
  *
  * Server functions
  */
 
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
 
 /*
  * Find a server context based on a handle value received
@@ -3842,7 +3845,7 @@ nfs_gss_svc_cleanup(void)
        lck_mtx_unlock(nfs_gss_svc_ctx_mutex);
 }
 
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
 
 
 /*************
@@ -4013,7 +4016,7 @@ nfs_gss_append_chain(struct nfsm_chain *nmc, mbuf_t mc)
        return 0;
 }
 
-#if NFSSERVER /* Only used by NFSSERVER */
+#if CONFIG_NFS_SERVER /* Only used by CONFIG_NFS_SERVER */
 /*
  * Convert an mbuf chain to an NFS mbuf chain
  */
@@ -4034,7 +4037,7 @@ nfs_gss_nfsm_chain(struct nfsm_chain *nmc, mbuf_t mc)
        nmc->nmc_left = mbuf_trailingspace(tail);
        nmc->nmc_flags = 0;
 }
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
 
 
 #if 0
@@ -4061,3 +4064,5 @@ hexdump(const char *msg, void *data, size_t len)
        }
 }
 #endif
+
+#endif /* CONFIG_NFS */
index ab5f4f4c9b9da9b189149e31d0162aeb36cf076d..352e1b61e5796ece006c38a23653e6e66839e640 100644 (file)
@@ -55,6 +55,9 @@
  *      from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp
  */
 
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS_CLIENT
+
 #include <sys/cdefs.h>
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -1050,3 +1053,5 @@ nfslockdnotify(proc_t p, user_addr_t argp)
 
        return error;
 }
+
+#endif /* CONFIG_NFS_CLIENT */
index 90400cfa5a47f3ccbac7db46edfb33881fd9b4cf..60bd5609f93d059a4f07f4ba62b89eac99fbaec1 100644 (file)
@@ -65,6 +65,8 @@
  * FreeBSD-Id: nfs_node.c,v 1.22 1997/10/28 14:06:20 bde Exp $
  */
 
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS_CLIENT
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -1451,3 +1453,5 @@ out:
 
        return i <= nfsnodehash;
 }
+
+#endif /* CONFIG_NFS_CLIENT */
index 2ebb8994bdc082f7ee17d8378458212d7935dc31..189978adf2cbd6a85840bfc9aa588f12411004d4 100644 (file)
@@ -65,6 +65,9 @@
  * FreeBSD-Id: nfs_serv.c,v 1.52 1997/10/28 15:59:05 bde Exp $
  */
 
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS_SERVER
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <security/mac_framework.h>
 #endif
 
-#if NFSSERVER
-
 /*
  * NFS server globals
  */
@@ -4898,6 +4899,7 @@ nfsrv_statfs(
 
        VFSATTR_INIT(&va);
        VFSATTR_WANTED(&va, f_blocks);
+       VFSATTR_WANTED(&va, f_bfree);
        VFSATTR_WANTED(&va, f_bavail);
        VFSATTR_WANTED(&va, f_files);
        VFSATTR_WANTED(&va, f_ffree);
@@ -5284,4 +5286,4 @@ nfsrv_authorize(
        return error;
 }
 
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
index d0a6fd32765c48916a55b56ea11dcb4f86a49b0c..e5c2a590ef55fac4286f6eb3ce28bb0f76d5c827 100644 (file)
@@ -65,6 +65,9 @@
  * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $
  */
 
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS
+
 /*
  * Socket operations for use by nfs
  */
@@ -116,13 +119,13 @@ boolean_t       current_thread_aborted(void);
 kern_return_t   thread_terminate(thread_t);
 
 
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
 int nfsrv_sock_max_rec_queue_length = 128; /* max # RPC records queued on (UDP) socket */
 
 int nfsrv_getstream(struct nfsrv_sock *, int);
 int nfsrv_getreq(struct nfsrv_descript *);
 extern int nfsv3_procid[NFS_NPROCS];
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
 
 /*
  * compare two sockaddr structures
@@ -153,7 +156,7 @@ nfs_sockaddr_cmp(struct sockaddr *sa1, struct sockaddr *sa2)
        return -1;
 }
 
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
 
 int     nfs_connect_search_new_socket(struct nfsmount *, struct nfs_socket_search *, struct timeval *);
 int     nfs_connect_search_socket_connect(struct nfsmount *, struct nfs_socket *, int);
@@ -6324,9 +6327,9 @@ nfs_up(struct nfsmount *nmp, thread_t thd, int flags, const char *msg)
 }
 
 
-#endif /* NFSCLIENT */
+#endif /* CONFIG_NFS_CLIENT */
 
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
 
 /*
  * Generate the rpc reply header
@@ -7046,4 +7049,6 @@ nfsrv_wakenfsd(struct nfsrv_sock *slp)
        wakeup(nd);
 }
 
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
+
+#endif /* CONFIG_NFS */
index 639cca0752150e293e1e3a97c7302db67ea716a3..5addbf6fbac7175270d355dcf1d7d9f82ca26045 100644 (file)
@@ -65,7 +65,9 @@
  * FreeBSD-Id: nfs_srvcache.c,v 1.15 1997/10/12 20:25:46 phk Exp $
  */
 
-#if NFSSERVER
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS_SERVER
+
 /*
  * Reference: Chet Juszczak, "Improving the Performance and Correctness
  *             of an NFS Server", in Proc. Winter 1989 USENIX Conference,
@@ -455,4 +457,4 @@ nfsrv_cleancache(void)
        lck_mtx_unlock(nfsrv_reqcache_mutex);
 }
 
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
index 9c061a4329a8920e67b90b1de87ea7a788510471..b16d3184682b8b7d76f1518a6dd4274787e6c1d4 100644 (file)
@@ -65,6 +65,9 @@
  * FreeBSD-Id: nfs_subs.c,v 1.47 1997/11/07 08:53:24 phk Exp $
  */
 
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS
+
 /*
  * These functions support the macros and help fiddle mbuf chains for
  * the nfs op functions. They do things like create the rpc header and
 #include <nfs/nfsproto.h>
 #include <nfs/nfs.h>
 #include <nfs/nfsnode.h>
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
 #define _NFS_XDR_SUBS_FUNCS_ /* define this to get xdrbuf function definitions */
 #endif
 #include <nfs/xdr_subs.h>
@@ -217,7 +220,7 @@ vtonfsv2_mode(enum vtype vtype, mode_t m)
        }
 }
 
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
 
 /*
  * Mapping of old NFS Version 2 RPC numbers to generic numbers.
@@ -248,7 +251,7 @@ int nfsv3_procid[NFS_NPROCS] = {
        NFSPROC_NOOP
 };
 
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
 
 /*
  * and the reverse mapping from generic to Version 2 procedure numbers
@@ -293,7 +296,7 @@ nfs_mbuf_init(void)
        nfs_mbuf_minclsize = ms.minclsize;
 }
 
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
 
 /*
  * allocate a list of mbufs to hold the given amount of data
@@ -338,7 +341,7 @@ nfsm_mbuf_get_list(size_t size, mbuf_t *mp, int *mbcnt)
        return error;
 }
 
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
 
 /*
  * nfsm_chain_new_mbuf()
@@ -830,7 +833,7 @@ nfsm_chain_get_uio(struct nfsm_chain *nmc, uint32_t len, uio_t uio)
        return error;
 }
 
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
 
 int
 nfsm_chain_add_string_nfc(struct nfsm_chain *nmc, const uint8_t *s, uint32_t slen)
@@ -2232,7 +2235,7 @@ nfs_mountopts(struct nfsmount *nmp, char *buf, int buflen)
        return c > buflen ? ENOMEM : 0;
 }
 
-#endif /* NFSCLIENT */
+#endif /* CONFIG_NFS_CLIENT */
 
 /*
  * Schedule a callout thread to run an NFS timer function
@@ -2248,7 +2251,7 @@ nfs_interval_timer_start(thread_call_t call, int interval)
 }
 
 
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
 
 int nfsrv_cmp_secflavs(struct nfs_sec *, struct nfs_sec *);
 int nfsrv_hang_addrlist(struct nfs_export *, struct user_nfs_export_args *);
@@ -3027,6 +3030,8 @@ nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx)
        vnode_t mvp = NULL, xvp = NULL;
        mount_t mp = NULL;
        char path[MAXPATHLEN];
+       char fl_pathbuff[MAXPATHLEN];
+       int fl_pathbuff_len = MAXPATHLEN;
        int expisroot;
 
        if (unxa->nxa_flags == NXA_CHECK) {
@@ -3134,12 +3139,6 @@ nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx)
                        goto unlock_out;
                }
                if ((unxa->nxa_flags & (NXA_ADD | NXA_OFFLINE)) == NXA_ADD) {
-                       /* if adding, verify that the mount is still what we expect */
-                       mp = vfs_getvfs_by_mntonname(nxfs->nxfs_path);
-                       if (mp) {
-                               mount_ref(mp, 0);
-                               mount_iterdrop(mp);
-                       }
                        /* find exported FS root vnode */
                        NDINIT(&mnd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
                            UIO_SYSSPACE, CAST_USER_ADDR_T(nxfs->nxfs_path), ctx);
@@ -3153,6 +3152,20 @@ nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx)
                                error = EINVAL;
                                goto out;
                        }
+                       /* if adding, verify that the mount is still what we expect */
+                       mp = vfs_getvfs_by_mntonname(nxfs->nxfs_path);
+                       if (!mp) {
+                               /* check for firmlink-free path */
+                               if (vn_getpath_no_firmlink(mvp, fl_pathbuff, &fl_pathbuff_len) == 0 &&
+                                   fl_pathbuff_len > 0 &&
+                                   !strncmp(nxfs->nxfs_path, fl_pathbuff, MAXPATHLEN)) {
+                                       mp = vfs_getvfs_by_mntonname(vnode_mount(mvp)->mnt_vfsstat.f_mntonname);
+                               }
+                       }
+                       if (mp) {
+                               mount_ref(mp, 0);
+                               mount_iterdrop(mp);
+                       }
                        /* sanity check: this should be same mount */
                        if (mp != vnode_mount(mvp)) {
                                error = EINVAL;
@@ -4507,4 +4520,6 @@ nfsrv_errmap(struct nfsrv_descript *nd, int err)
        return (int)*defaulterrp;
 }
 
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
+
+#endif /* CONFIG_NFS */
index fe4bb37cf7de586449dbef76ac7339cac72765d1..adb45d85dde626e77212e9e5e60d9200cc61160d 100644 (file)
@@ -64,6 +64,9 @@
  *     @(#)nfs_syscalls.c      8.5 (Berkeley) 3/30/95
  * FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $
  */
+
+#include <nfs/nfs_conf.h>
+
 /*
  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  * support for mandatory and extensible security protections.  This notice
 
 kern_return_t   thread_terminate(thread_t); /* XXX */
 
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
 
 extern const nfsrv_proc_t nfsrv_procs[NFS_NPROCS];
 
@@ -141,15 +144,17 @@ void    nfsrv_zapsock(struct nfsrv_sock *);
 void    nfsrv_slpderef(struct nfsrv_sock *);
 void    nfsrv_slpfree(struct nfsrv_sock *);
 
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
 
+#if CONFIG_NFS
 /*
  * sysctl stuff
  */
 SYSCTL_DECL(_vfs_generic);
 SYSCTL_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs hinge");
+#endif /* CONFIG_NFS */
 
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
 SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, client, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs client hinge");
 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_initial_delay, 0, "");
 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_delay, 0, "");
@@ -176,9 +181,9 @@ SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, root_steals_gss_context, CTLFLAG_R
 #if CONFIG_NFS4
 SYSCTL_STRING(_vfs_generic_nfs_client, OID_AUTO, default_nfs4domain, CTLFLAG_RW | CTLFLAG_LOCKED, nfs4_default_domain, sizeof(nfs4_default_domain), "");
 #endif
-#endif /* NFSCLIENT */
+#endif /* CONFIG_NFS_CLIENT */
 
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
 SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs server hinge");
 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, "");
 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, "");
@@ -202,12 +207,9 @@ SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | C
 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, "");
 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, __DECONST(int *, &nfsrv_uc_queue_count), 0, "");
 #endif
-#endif /* NFSSERVER */
-
+#endif /* CONFIG_NFS_SERVER */
 
-#if NFSCLIENT
-
-#if CONFIG_NFS4
+#if CONFIG_NFS_CLIENT && CONFIG_NFS4
 static int
 mapname2id(struct nfs_testmapid *map)
 {
@@ -287,11 +289,21 @@ nfsclnt_testidmap(proc_t p, user_addr_t argp)
 
        return error ? error : coerror;
 }
+#endif /* CONFIG_NFS_CLIENT && CONFIG_NFS4 */
+
+#if !CONFIG_NFS_CLIENT
+#define __no_nfs_client_unused      __unused
+#else
+#define __no_nfs_client_unused      /* nothing */
 #endif
 
 int
-nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval)
+nfsclnt(
+       proc_t p __no_nfs_client_unused,
+       struct nfsclnt_args *uap __no_nfs_client_unused,
+       __unused int *retval)
 {
+#if CONFIG_NFS_CLIENT
        struct lockd_ans la;
        int error;
 
@@ -314,8 +326,12 @@ nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval)
                error = EINVAL;
        }
        return error;
+#else
+       return ENOSYS;
+#endif /* CONFIG_NFS_CLIENT */
 }
 
+#if CONFIG_NFS_CLIENT
 
 /*
  * Asynchronous I/O threads for client NFS.
@@ -512,16 +528,20 @@ worktodo:
        return 0;
 }
 
-#endif /* NFSCLIENT */
-
+#endif /* CONFIG_NFS_CLIENT */
 
-#if NFSSERVER
+#if !CONFIG_NFS_SERVER
+#define __no_nfs_server_unused      __unused
+#else
+#define __no_nfs_server_unused      /* nothing */
+#endif
 
 /*
  * NFS server system calls
  * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
  */
 
+#if CONFIG_NFS_SERVER
 static struct nfs_exportfs *
 nfsrv_find_exportfs(const char *ptr)
 {
@@ -543,7 +563,10 @@ nfsrv_find_exportfs(const char *ptr)
  * Get file handle system call
  */
 int
-getfh(proc_t p, struct getfh_args *uap, __unused int *retval)
+getfh(
+       proc_t p __no_nfs_server_unused,
+       struct getfh_args *uap __no_nfs_server_unused,
+       __unused int *retval)
 {
        vnode_t vp;
        struct nfs_filehandle nfh;
@@ -665,7 +688,9 @@ out:
        error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t));
        return error;
 }
+#endif /* CONFIG_NFS_SERVER */
 
+#if CONFIG_NFS_SERVER
 extern const struct fileops vnops;
 
 /*
@@ -676,9 +701,9 @@ extern const struct fileops vnops;
  * security hole.
  */
 int
-fhopen( proc_t p,
-    struct fhopen_args *uap,
-    int32_t *retval)
+fhopen(proc_t p __no_nfs_server_unused,
+    struct fhopen_args *uap __no_nfs_server_unused,
+    int32_t *retval __no_nfs_server_unused)
 {
        vnode_t vp;
        struct nfs_filehandle nfh;
@@ -835,12 +860,16 @@ bad:
        vnode_put(vp);
        return error;
 }
+#endif /* CONFIG_NFS_SERVER */
 
+#if CONFIG_NFS_SERVER
 /*
  * NFS server pseudo system call
  */
 int
-nfssvc(proc_t p, struct nfssvc_args *uap, __unused int *retval)
+nfssvc(proc_t p __no_nfs_server_unused,
+    struct nfssvc_args *uap __no_nfs_server_unused,
+    __unused int *retval)
 {
        mbuf_t nam;
        struct user_nfsd_args user_nfsdarg;
@@ -916,6 +945,9 @@ nfssvc(proc_t p, struct nfssvc_args *uap, __unused int *retval)
        }
        return error;
 }
+#endif /* CONFIG_NFS_SERVER */
+
+#if CONFIG_NFS_SERVER
 
 /*
  * Adds a socket to the list for servicing by nfsds.
@@ -1831,4 +1863,4 @@ nfsrv_cleanup(void)
        nfsrv_udp6sock = NULL;
 }
 
-#endif /* NFS_NOSERVER */
+#endif /* CONFIG_NFS_SERVER */
index 9b83d3fc6cd34398dc1766aa121e35a530855033..b719f88a0b581eab4b7bd908a77c6ad01f06880f 100644 (file)
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
+
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS_SERVER
+
 #include <stdint.h>
 #include <sys/param.h>
 #include <sys/mount_internal.h>
@@ -398,3 +402,5 @@ direct:
 
        return;
 }
+
+#endif /* CONFIG_NFS_SERVER */
index 902680c6855018c31b848339073c67b1beb54b28..ef5457410d0f5bae92effc1fd8e6e9d016be3226 100644 (file)
  *     @(#)nfs_vfsops.c        8.12 (Berkeley) 5/20/95
  * FreeBSD-Id: nfs_vfsops.c,v 1.52 1997/11/12 05:42:21 julian Exp $
  */
+
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS_CLIENT
+
 /*
  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  * support for mandatory and extensible security protections.  This notice
@@ -3041,6 +3045,7 @@ mountnfs(
                nmp->nm_iodlink.tqe_next = NFSNOLIST;
                nmp->nm_deadtimeout = 0;
                nmp->nm_curdeadtimeout = 0;
+               NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_RDIRPLUS); /* enable RDIRPLUS by default. It will be reverted later in case NFSv2 is used */
                NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_NOACL);
                nmp->nm_realm = NULL;
                nmp->nm_principal = NULL;
@@ -6182,7 +6187,7 @@ nfs_vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
        struct netfs_status *nsp = NULL;
        int timeoutmask;
        uint totlen, count, numThreads;
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
        uint pos;
        struct nfs_exportfs *nxfs;
        struct nfs_export *nx;
@@ -6195,7 +6200,7 @@ nfs_vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
        struct nfs_user_stat_path_rec upath_rec;
        uint bytes_avail, bytes_total, recs_copied;
        uint numExports, numRecs;
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
 
        /*
         * All names at this level are terminal.
@@ -6303,7 +6308,7 @@ nfs_vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
                *oldlenp = xb.xb_u.xb_buffer.xbb_len;
                xb_cleanup(&xb);
                break;
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
        case NFS_EXPORTSTATS:
                /* setup export stat descriptor */
                stat_desc.rec_vers = NFS_EXPORT_STAT_REC_VERSION;
@@ -6549,7 +6554,7 @@ ustat_skip:
 
                error = copyout(&nfsrv_user_stat_node_count, oldp, sizeof(nfsrv_user_stat_node_count));
                break;
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
        case VFS_CTL_NOLOCKS:
                if (req->oldptr != USER_ADDR_NULL) {
                        lck_mtx_lock(&nmp->nm_lock);
@@ -6727,3 +6732,5 @@ ustat_skip:
        }
        return error;
 }
+
+#endif /* CONFIG_NFS_CLIENT */
index 1df01abc0c60171168911d27c2585b4f698f8da7..4f9208b251b27fcc07b96fe53f32e067def28887 100644 (file)
@@ -65,6 +65,8 @@
  * FreeBSD-Id: nfs_vnops.c,v 1.72 1997/11/07 09:20:48 phk Exp $
  */
 
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS_CLIENT
 
 /*
  * vnode op calls for Sun NFS version 2 and 3
@@ -462,6 +464,35 @@ int     nfs_getattr_internal(nfsnode_t, struct nfs_vattr *, vfs_context_t, int);
 int     nfs_refresh_fh(nfsnode_t, vfs_context_t);
 
 
+/*
+ * Update nfsnode attributes to avoid extra getattr calls for each direntry.
+ * This function should be called only if RDIRPLUS flag is enabled.
+ */
+void
+nfs_rdirplus_update_node_attrs(nfsnode_t dnp, struct direntry *dp, fhandle_t *fhp, struct nfs_vattr *nvattrp, uint64_t *savedxidp)
+{
+       nfsnode_t np;
+       struct componentname cn;
+       int isdot = (dp->d_namlen == 1) && (dp->d_name[0] == '.');
+       int isdotdot = (dp->d_namlen == 2) && (dp->d_name[0] == '.') && (dp->d_name[1] == '.');
+
+       if (isdot || isdotdot) {
+               return;
+       }
+
+       np = NULL;
+       bzero(&cn, sizeof(cn));
+       cn.cn_nameptr = dp->d_name;
+       cn.cn_namelen = dp->d_namlen;
+       cn.cn_nameiop = LOOKUP;
+
+       nfs_nget(NFSTOMP(dnp), dnp, &cn, fhp->fh_data, fhp->fh_len, nvattrp, savedxidp, RPCAUTH_UNKNOWN, NG_NOCREATE, &np);
+       if (np) {
+               nfs_node_unlock(np);
+               vnode_put(NFSTOV(np));
+       }
+}
+
 /*
  * Find the slot in the access cache for this UID.
  * If adding and no existing slot is found, reuse slots in FIFO order.
@@ -1864,6 +1895,8 @@ nfs3_vnop_getattr(
                                   *  } */*ap)
 {
        int error;
+       nfsnode_t np;
+       uint64_t supported_attrs;
        struct nfs_vattr nva;
        struct vnode_attr *vap = ap->a_vap;
        struct nfsmount *nmp;
@@ -1878,7 +1911,9 @@ nfs3_vnop_getattr(
        /* Return the io size no matter what, since we don't go over the wire for this */
        VATTR_RETURN(vap, va_iosize, nfs_iosize);
 
-       if ((vap->va_active & NFS3_SUPPORTED_VATTRS) == 0) {
+       supported_attrs = NFS3_SUPPORTED_VATTRS;
+
+       if ((vap->va_active & supported_attrs) == 0) {
                return 0;
        }
 
@@ -1887,6 +1922,18 @@ nfs3_vnop_getattr(
                    (uint64_t)VM_KERNEL_ADDRPERM(ap->a_vp),
                    ap->a_vp->v_name ? ap->a_vp->v_name : "empty");
        }
+
+       /*
+        * We should not go over the wire if only fileid was requested and has ever been populated.
+        */
+       if ((vap->va_active & supported_attrs) == VNODE_ATTR_va_fileid) {
+               np = VTONFS(ap->a_vp);
+               if (np->n_attrstamp) {
+                       VATTR_RETURN(vap, va_fileid, np->n_vattr.nva_fileid);
+                       return 0;
+               }
+       }
+
        error = nfs_getattr(VTONFS(ap->a_vp), &nva, ap->a_context, NGA_CACHED);
        if (error) {
                return error;
@@ -3617,6 +3664,9 @@ skipread:
 out:
        nfs_node_lock_force(np);
        np->n_wrbusy--;
+       if ((ioflag & IO_SYNC) && !np->n_wrbusy && !np->n_numoutput) {
+               np->n_flag &= ~NMODIFIED;
+       }
        nfs_node_unlock(np);
        nfs_data_unlock(np);
        FSDBG_BOT(515, np, uio_offset(uio), uio_resid(uio), error);
@@ -5441,7 +5491,7 @@ nfs_vnop_readdir(
        nfsnode_t dnp = VTONFS(dvp);
        struct nfsmount *nmp;
        uio_t uio = ap->a_uio;
-       int error, nfsvers, extended, numdirent, bigcookies, ptc, done;
+       int error, nfsvers, extended, numdirent, bigcookies, ptc, done, attrcachetimeout;
        uint16_t i, iptc, rlen, nlen;
        uint64_t cookie, nextcookie, lbn = 0;
        struct nfsbuf *bp = NULL;
@@ -5449,6 +5499,7 @@ nfs_vnop_readdir(
        struct direntry *dp, *dpptc;
        struct dirent dent;
        char *cp = NULL;
+       struct timeval now;
        thread_t thd;
 
        nmp = VTONMP(dvp);
@@ -5498,6 +5549,23 @@ nfs_vnop_readdir(
                }
        }
 
+       if (dnp->n_rdirplusstamp_eof && dnp->n_rdirplusstamp_sof) {
+               attrcachetimeout = nfs_attrcachetimeout(dnp);
+               microuptime(&now);
+               if (attrcachetimeout && (now.tv_sec - dnp->n_rdirplusstamp_sof > attrcachetimeout - 1)) {
+                       dnp->n_rdirplusstamp_eof = dnp->n_rdirplusstamp_sof = 0;
+                       nfs_invaldir(dnp);
+                       nfs_node_unlock(dnp);
+                       error = nfs_vinvalbuf(dvp, 0, ctx, 1);
+                       if (!error) {
+                               error = nfs_node_lock(dnp);
+                       }
+                       if (error) {
+                               goto out;
+                       }
+               }
+       }
+
        /*
         * check for need to invalidate when (re)starting at beginning
         */
@@ -6021,6 +6089,8 @@ nfs_dir_buf_cache_lookup(nfsnode_t dnp, nfsnode_t *npp, struct componentname *cn
        struct nfsbuflists blist;
        daddr64_t lbn, nextlbn;
        int dotunder = (cnp->cn_namelen > 2) && (cnp->cn_nameptr[0] == '.') && (cnp->cn_nameptr[1] == '_');
+       int isdot = (cnp->cn_namelen == 1) && (cnp->cn_nameptr[0] == '.');
+       int isdotdot = (cnp->cn_namelen == 2) && (cnp->cn_nameptr[0] == '.') && (cnp->cn_nameptr[1] == '.');
 
        nmp = NFSTONMP(dnp);
        if (nfs_mount_gone(nmp)) {
@@ -6030,6 +6100,10 @@ nfs_dir_buf_cache_lookup(nfsnode_t dnp, nfsnode_t *npp, struct componentname *cn
                *npp = NULL;
        }
 
+       if (isdot || isdotdot) {
+               return 0;
+       }
+
        /* first check most recent buffer (and next one too) */
        lbn = dnp->n_lastdbl;
        for (i = 0; i < 2; i++) {
@@ -6266,6 +6340,10 @@ noplus:
 
                if (rdirplus) {
                        microuptime(&now);
+                       if (lastcookie == 0) {
+                               dnp->n_rdirplusstamp_sof = now.tv_sec;
+                               dnp->n_rdirplusstamp_eof = 0;
+                       }
                }
 
                /* loop through the entries packing them into the buffer */
@@ -6391,6 +6469,7 @@ nextbuffer:
                                }
                                *(time_t*)(&dp->d_name[dp->d_namlen + 1 + fhlen]) = now.tv_sec;
                                dp->d_reclen = reclen;
+                               nfs_rdirplus_update_node_attrs(dnp, dp, &fh, nvattrp, &savedxid);
                        }
                        padstart = dp->d_name + dp->d_namlen + 1 + xlen;
                        ndbhp->ndbh_count++;
@@ -6414,6 +6493,9 @@ nextbuffer:
                        ndbhp->ndbh_flags |= (NDB_FULL | NDB_EOF);
                        nfs_node_lock_force(dnp);
                        dnp->n_eofcookie = lastcookie;
+                       if (rdirplus) {
+                               dnp->n_rdirplusstamp_eof = now.tv_sec;
+                       }
                        nfs_node_unlock(dnp);
                } else {
                        more_entries = 1;
@@ -8574,3 +8656,4 @@ nfs_vnode_notify(nfsnode_t np, uint32_t events)
        vnode_notify(NFSTOV(np), events, vap);
 }
 
+#endif /* CONFIG_NFS_CLIENT */
index b16669fc5940f2c4c46b0e1db788ba81490789d8..4c6c8f56f253b013262d5aa493177bde9bc68be8 100644 (file)
@@ -73,6 +73,8 @@
 
 #ifdef __APPLE_API_PRIVATE
 
+#include <nfs/nfs_conf.h>
+
 int nfsm_rpchead(struct nfsreq *, mbuf_t, u_int64_t *, mbuf_t *);
 int nfsm_rpchead2(struct nfsmount *, int, int, int, int, int, kauth_cred_t, struct nfsreq *, mbuf_t, u_int64_t *, mbuf_t *);
 
@@ -96,7 +98,7 @@ int nfsm_chain_get_fh_attr(struct nfsmount *, struct nfsm_chain *, nfsnode_t,
 int nfsm_chain_get_wcc_data_f(struct nfsm_chain *, nfsnode_t, struct timespec *, int *, u_int64_t *);
 int nfsm_chain_get_secinfo(struct nfsm_chain *, uint32_t *, int *);
 
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
 void nfsm_adj(mbuf_t, int, int);
 int nfsm_mbuf_get_list(size_t, mbuf_t *, int *);
 
@@ -106,7 +108,7 @@ int nfsm_chain_add_wcc_data_f(struct nfsrv_descript *, struct nfsm_chain *, int,
 int nfsm_chain_get_path_namei(struct nfsm_chain *, uint32_t, struct nameidata *);
 int nfsm_chain_get_sattr(struct nfsrv_descript *, struct nfsm_chain *, struct vnode_attr *);
 int nfsm_chain_trim_data(struct nfsm_chain *, int, int *);
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
 
 /* check name length */
 #define nfsm_name_len_check(E, ND, LEN) \
index 9562d61448ea021874b5f71a7e13a0a3658972f8..83fa4450530e582baa58e1e7dd95bcfc2ef21580 100644 (file)
@@ -574,6 +574,8 @@ struct nfsnode {
        u_int8_t                n_access[NFS_ACCESS_CACHE_SIZE + 1];      /* ACCESS cache */
        uid_t                   n_accessuid[NFS_ACCESS_CACHE_SIZE];     /* credentials having access */
        time_t                  n_accessstamp[NFS_ACCESS_CACHE_SIZE];   /* access cache timestamp */
+       time_t                  n_rdirplusstamp_sof; /* Readdirplus sof timestamp */
+       time_t                  n_rdirplusstamp_eof; /* Readdirplus eof timestamp */
        union {
                struct {
                        struct timespec n3_mtime; /* Prev modify time. */
index 13137df5f6235e7ef6f1ce8cf372abe6fbc8fa68..d32ee515361af46bff1a715f80438eba74794f69 100644 (file)
 #ifndef _FD_SET
 #define _FD_SET
 
-#include <machine/types.h> /* __int32_t */
+#include <machine/types.h> /* __int32_t and uintptr_t */
+#if !KERNEL
+#include <Availability.h>
+#endif
 
 /*
  * Select uses bit masks of file descriptors in longs.  These macros
@@ -49,17 +52,77 @@ __BEGIN_DECLS
 typedef struct fd_set {
        __int32_t       fds_bits[__DARWIN_howmany(__DARWIN_FD_SETSIZE, __DARWIN_NFDBITS)];
 } fd_set;
+
+#if !KERNEL
+int __darwin_check_fd_set_overflow(int, const void *, int) __attribute__((__weak_import__));
+#endif
 __END_DECLS
 
+#if !KERNEL
+__header_always_inline int
+__darwin_check_fd_set(int _a, const void *_b)
+{
+       if ((uintptr_t)&__darwin_check_fd_set_overflow != (uintptr_t) 0) {
+#if defined(_DARWIN_UNLIMITED_SELECT) || defined(_DARWIN_C_SOURCE)
+               return __darwin_check_fd_set_overflow(_a, _b, 1);
+#else
+               return __darwin_check_fd_set_overflow(_a, _b, 0);
+#endif
+       } else {
+               return 1;
+       }
+}
+
 /* This inline avoids argument side-effect issues with FD_ISSET() */
-static __inline int
-__darwin_fd_isset(int _n, const struct fd_set *_p)
+__header_always_inline int
+__darwin_fd_isset(int _fd, const struct fd_set *_p)
+{
+       if (__darwin_check_fd_set(_fd, (const void *) _p)) {
+               return _p->fds_bits[(unsigned long)_fd / __DARWIN_NFDBITS] & ((__int32_t)(((unsigned long)1) << ((unsigned long)_fd % __DARWIN_NFDBITS)));
+       }
+
+       return 0;
+}
+
+__header_always_inline void
+__darwin_fd_set(int _fd, struct fd_set *const _p)
+{
+       if (__darwin_check_fd_set(_fd, (const void *) _p)) {
+               (_p->fds_bits[(unsigned long)_fd / __DARWIN_NFDBITS] |= ((__int32_t)(((unsigned long)1) << ((unsigned long)_fd % __DARWIN_NFDBITS))));
+       }
+}
+
+__header_always_inline void
+__darwin_fd_clr(int _fd, struct fd_set *const _p)
+{
+       if (__darwin_check_fd_set(_fd, (const void *) _p)) {
+               (_p->fds_bits[(unsigned long)_fd / __DARWIN_NFDBITS] &= ~((__int32_t)(((unsigned long)1) << ((unsigned long)_fd % __DARWIN_NFDBITS))));
+       }
+}
+
+#else /* KERNEL */
+
+__header_always_inline int
+__darwin_fd_isset(int _fd, const struct fd_set *_p)
+{
+       return _p->fds_bits[(unsigned long)_fd / __DARWIN_NFDBITS] & ((__int32_t)(((unsigned long)1) << ((unsigned long)_fd % __DARWIN_NFDBITS)));
+}
+
+__header_always_inline void
+__darwin_fd_set(int _fd, struct fd_set *const _p)
+{
+       (_p->fds_bits[(unsigned long)_fd / __DARWIN_NFDBITS] |= ((__int32_t)(((unsigned long)1) << ((unsigned long)_fd % __DARWIN_NFDBITS))));
+}
+
+__header_always_inline void
+__darwin_fd_clr(int _fd, struct fd_set *const _p)
 {
-       return _p->fds_bits[(unsigned long)_n / __DARWIN_NFDBITS] & ((__int32_t)(((unsigned long)1) << ((unsigned long)_n % __DARWIN_NFDBITS)));
+       (_p->fds_bits[(unsigned long)_fd / __DARWIN_NFDBITS] &= ~((__int32_t)(((unsigned long)1) << ((unsigned long)_fd % __DARWIN_NFDBITS))));
 }
+#endif /* KERNEL */
 
-#define __DARWIN_FD_SET(n, p)   do { int __fd = (n); ((p)->fds_bits[(unsigned long)__fd/__DARWIN_NFDBITS] |= ((__int32_t)(((unsigned long)1)<<((unsigned long)__fd % __DARWIN_NFDBITS)))); } while(0)
-#define __DARWIN_FD_CLR(n, p)   do { int __fd = (n); ((p)->fds_bits[(unsigned long)__fd/__DARWIN_NFDBITS] &= ~((__int32_t)(((unsigned long)1)<<((unsigned long)__fd % __DARWIN_NFDBITS)))); } while(0)
+#define __DARWIN_FD_SET(n, p)   __darwin_fd_set((n), (p))
+#define __DARWIN_FD_CLR(n, p)   __darwin_fd_clr((n), (p))
 #define __DARWIN_FD_ISSET(n, p) __darwin_fd_isset((n), (p))
 
 #if __GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ >= 3
index 5b4f4c133d8f791b774393cd7b2283cd0e43a95f..d5eecd68295ea61f992b9743a62757dea1825c3a 100644 (file)
@@ -531,8 +531,10 @@ typedef struct vol_attributes_attr {
 #define ATTR_CMNEXT_NOFIRMLINKPATH     0x00000020
 #define ATTR_CMNEXT_REALDEVID   0x00000040
 #define ATTR_CMNEXT_REALFSID    0x00000080
+#define ATTR_CMNEXT_CLONEID     0x00000100
+#define ATTR_CMNEXT_EXT_FLAGS   0x00000200
 
-#define ATTR_CMNEXT_VALIDMASK   0x000000fc
+#define ATTR_CMNEXT_VALIDMASK   0x000003fc
 #define ATTR_CMNEXT_SETMASK             0x00000000
 
 /* Deprecated fork attributes */
index 98d3628dd305bf12c6a84960548776ef50ae99c2..a5e6aae66cd841bed5d4cf278c72b3e888c960fe 100644 (file)
@@ -1460,6 +1460,7 @@ typedef struct dtrace_module_symbols {
 } dtrace_module_symbols_t;
 
 #define DTRACE_MODULE_SYMBOLS_SIZE(count) (sizeof(dtrace_module_symbols_t) + ((count - 1) * sizeof(dtrace_symbol_t)))
+#define DTRACE_MODULE_SYMBOLS_COUNT(size) ((size - sizeof(dtrace_module_symbols_t)) / sizeof(dtrace_symbol_t) + 1)
 
 typedef struct dtrace_module_uuids_list {
        uint64_t        dtmul_count;
index a0138830e034ed41216cd94fc1f6e1289e23e2b7..ca681fc2bbf87f1fd11ed85d180c771d4e164d9c 100644 (file)
@@ -124,6 +124,8 @@ struct image_params {
        uint64_t ip_dyld_fsid;
        uint64_t ip_dyld_fsobjid;
        unsigned int    ip_simulator_binary;    /* simulator binary flags */
+
+       ipc_port_t      ip_sc_port;             /* SUID port. */
 };
 
 /*
index c30f0ba3df41077624e30b39eee5ab79af5f2f3f..a6125289067db6d39a980950e487ebb42b767326 100644 (file)
@@ -646,6 +646,7 @@ __BEGIN_DECLS
 
 /* The Kernel Debug Sub Classes for DBG_MISC */
 #define DBG_MISC_COREBRIGHTNESS 0x01
+#define DBG_MISC_VIDEOENG       0x02
 #define DBG_EVENT               0x10
 #define DBG_MISC_INSTRUMENTS    0x11
 #define DBG_MISC_INSTRUMENTSBT  0x12
index 6c5a8b6b69572a26ae99bd165046131e26f04466..c3894b39494239c806ca953553140acbafe854a0 100644 (file)
@@ -44,14 +44,14 @@ typedef struct memorystatus_freeze_entry {
 #ifdef XNU_KERNEL_PRIVATE
 
 extern unsigned long freeze_threshold_percentage;
-extern unsigned int memorystatus_frozen_count;
+extern unsigned int memorystatus_frozen_count; /* # of processes that are currently frozen. */
 extern unsigned int memorystatus_frozen_processes_max;
 extern unsigned int memorystatus_frozen_shared_mb;
 extern unsigned int memorystatus_frozen_shared_mb_max;
 extern unsigned int memorystatus_freeze_shared_mb_per_process_max; /* Max. MB allowed per process to be freezer-eligible. */
 extern unsigned int memorystatus_freeze_private_shared_pages_ratio; /* Ratio of private:shared pages for a process to be freezer-eligible. */
 extern unsigned int memorystatus_suspended_count;
-extern unsigned int memorystatus_thaw_count;
+extern unsigned int memorystatus_thaw_count; /* # of processes that have been thawed in the current interval. */
 extern unsigned int memorystatus_refreeze_eligible_count; /* # of processes currently thawed i.e. have state on disk & in-memory */
 
 void memorystatus_freeze_init(void);
index 3e142976198a52f0173b6280bf162f6f31f00356..bfeafb1867533a4554ae8457a65c8acd374061bf 100644 (file)
 #define __KPI_MBUF__
 #include <sys/kernel_types.h>
 #include <mach/vm_types.h>
+
+#ifndef PRIVATE
+#include <Availability.h>
+#define __NKE_API_DEPRECATED __API_DEPRECATED("Network Kernel Extension KPI is deprecated", macos(10.4, 10.15.4))
+#else
+#define __NKE_API_DEPRECATED
+#endif /* PRIVATE */
+
 #ifdef KERNEL_PRIVATE
 #include <mach/kern_return.h>
 #endif /* KERNEL_PRIVATE */
@@ -294,7 +302,8 @@ __BEGIN_DECLS
  *       @param mbuf The mbuf.
  *       @result A pointer to the data in the mbuf.
  */
-extern void *mbuf_data(mbuf_t mbuf);
+extern void *mbuf_data(mbuf_t mbuf)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_datastart
@@ -307,7 +316,8 @@ extern void *mbuf_data(mbuf_t mbuf);
  *       @param mbuf The mbuf.
  *       @result A pointer to smallest possible value for data.
  */
-extern void *mbuf_datastart(mbuf_t mbuf);
+extern void *mbuf_datastart(mbuf_t mbuf)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_setdata
@@ -323,7 +333,8 @@ extern void *mbuf_datastart(mbuf_t mbuf);
  *       @param len The new length of data in the mbuf.
  *       @result 0 on success, errno error on failure.
  */
-extern errno_t mbuf_setdata(mbuf_t mbuf, void *data, size_t len);
+extern errno_t mbuf_setdata(mbuf_t mbuf, void *data, size_t len)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_align_32
@@ -336,7 +347,8 @@ extern errno_t mbuf_setdata(mbuf_t mbuf, void *data, size_t len);
  *               data location.
  *       @result 0 on success, errno error on failure.
  */
-extern errno_t mbuf_align_32(mbuf_t mbuf, size_t len);
+extern errno_t mbuf_align_32(mbuf_t mbuf, size_t len)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_data_to_physical
@@ -355,7 +367,8 @@ extern errno_t mbuf_align_32(mbuf_t mbuf, size_t len);
  *       @result The 64 bit physical address of the mbuf data or NULL if ptr
  *               does not point to data stored in an mbuf.
  */
-extern addr64_t mbuf_data_to_physical(void *ptr);
+extern addr64_t mbuf_data_to_physical(void *ptr)
+__NKE_API_DEPRECATED;
 
 
 /* Allocation */
@@ -368,7 +381,8 @@ extern addr64_t mbuf_data_to_physical(void *ptr);
  *       @param mbuf The mbuf.
  *       @result 0 on success, errno error on failure.
  */
-extern errno_t mbuf_get(mbuf_how_t how, mbuf_type_t type, mbuf_t *mbuf);
+extern errno_t mbuf_get(mbuf_how_t how, mbuf_type_t type, mbuf_t *mbuf)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_gethdr
@@ -380,7 +394,8 @@ extern errno_t mbuf_get(mbuf_how_t how, mbuf_type_t type, mbuf_t *mbuf);
  *       @param mbuf The mbuf.
  *       @result 0 on success, errno error on failure.
  */
-extern errno_t mbuf_gethdr(mbuf_how_t how, mbuf_type_t type, mbuf_t *mbuf);
+extern errno_t mbuf_gethdr(mbuf_how_t how, mbuf_type_t type, mbuf_t *mbuf)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_attachcluster
@@ -410,7 +425,8 @@ extern errno_t mbuf_gethdr(mbuf_how_t how, mbuf_type_t type, mbuf_t *mbuf);
  */
 extern errno_t mbuf_attachcluster(mbuf_how_t how, mbuf_type_t type,
     mbuf_t *mbuf, caddr_t extbuf, void (*extfree)(caddr_t, u_int, caddr_t),
-    size_t extsize, caddr_t extarg);
+    size_t extsize, caddr_t extarg)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_alloccluster
@@ -435,7 +451,8 @@ extern errno_t mbuf_attachcluster(mbuf_how_t how, mbuf_type_t type,
  *               In this case, the caller is advised to use 4096 bytes or
  *               smaller during subseqent requests.
  */
-extern errno_t mbuf_alloccluster(mbuf_how_t how, size_t *size, caddr_t *addr);
+extern errno_t mbuf_alloccluster(mbuf_how_t how, size_t *size, caddr_t *addr)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_freecluster
@@ -446,7 +463,8 @@ extern errno_t mbuf_alloccluster(mbuf_how_t how, size_t *size, caddr_t *addr);
  *       @param addr The address of the cluster.
  *       @param size The actual size of the cluster.
  */
-extern void mbuf_freecluster(caddr_t addr, size_t size);
+extern void mbuf_freecluster(caddr_t addr, size_t size)
+__NKE_API_DEPRECATED;
 
 #ifdef BSD_KERNEL_PRIVATE
 /*
@@ -491,6 +509,7 @@ extern errno_t mbuf_cluster_get_prop(mbuf_t mbuf, u_int32_t *prop);
  */
 extern errno_t mbuf_getcluster(mbuf_how_t how, mbuf_type_t type, size_t size,
     mbuf_t *mbuf);
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_mclget
@@ -507,7 +526,8 @@ extern errno_t mbuf_getcluster(mbuf_how_t how, mbuf_type_t type, size_t size,
  *               will be freed. If you specify an mbuf value in *mbuf,
  *               mbuf_mclget will not free it.
  */
-extern errno_t mbuf_mclget(mbuf_how_t how, mbuf_type_t type, mbuf_t *mbuf);
+extern errno_t mbuf_mclget(mbuf_how_t how, mbuf_type_t type, mbuf_t *mbuf)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_allocpacket
@@ -545,7 +565,8 @@ extern errno_t mbuf_mclget(mbuf_how_t how, mbuf_type_t type, mbuf_t *mbuf);
  *                   chunks requested
  */
 extern errno_t mbuf_allocpacket(mbuf_how_t how, size_t packetlen,
-    unsigned int * maxchunks, mbuf_t *mbuf);
+    unsigned int * maxchunks, mbuf_t *mbuf)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_allocpacket_list
@@ -584,8 +605,8 @@ extern errno_t mbuf_allocpacket(mbuf_how_t how, size_t packetlen,
  *                   chunks requested
  */
 extern errno_t mbuf_allocpacket_list(unsigned int numpkts, mbuf_how_t how,
-    size_t packetlen, unsigned int * maxchunks, mbuf_t *mbuf);
-
+    size_t packetlen, unsigned int * maxchunks, mbuf_t *mbuf)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_getpacket
@@ -595,7 +616,8 @@ extern errno_t mbuf_allocpacket_list(unsigned int numpkts, mbuf_how_t how,
  *       @param mbuf Upon success, *mbuf will be a reference to the new mbuf.
  *       @result 0 on success, errno error on failure.
  */
-extern errno_t mbuf_getpacket(mbuf_how_t how, mbuf_t *mbuf);
+extern errno_t mbuf_getpacket(mbuf_how_t how, mbuf_t *mbuf)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_free
@@ -604,14 +626,16 @@ extern errno_t mbuf_getpacket(mbuf_how_t how, mbuf_t *mbuf);
  *       @param mbuf The mbuf to free.
  *       @result The next mbuf in the chain.
  */
-extern mbuf_t mbuf_free(mbuf_t mbuf);
+extern mbuf_t mbuf_free(mbuf_t mbuf)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_freem
  *       @discussion Frees a chain of mbufs link through mnext.
  *       @param mbuf The first mbuf in the chain to free.
  */
-extern void mbuf_freem(mbuf_t mbuf);
+extern void mbuf_freem(mbuf_t mbuf)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_freem_list
@@ -620,7 +644,8 @@ extern void mbuf_freem(mbuf_t mbuf);
  *       @param mbuf The first mbuf in the linked list to free.
  *       @result The number of mbufs freed.
  */
-extern int mbuf_freem_list(mbuf_t mbuf);
+extern int mbuf_freem_list(mbuf_t mbuf)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_leadingspace
@@ -629,7 +654,8 @@ extern int mbuf_freem_list(mbuf_t mbuf);
  *       @param mbuf The mbuf.
  *       @result The number of unused bytes at the start of the mbuf.
  */
-extern size_t mbuf_leadingspace(const mbuf_t mbuf);
+extern size_t mbuf_leadingspace(const mbuf_t mbuf)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_trailingspace
@@ -638,7 +664,8 @@ extern size_t mbuf_leadingspace(const mbuf_t mbuf);
  *       @param mbuf The mbuf.
  *       @result The number of unused bytes following the current data.
  */
-extern size_t mbuf_trailingspace(const mbuf_t mbuf);
+extern size_t mbuf_trailingspace(const mbuf_t mbuf)
+__NKE_API_DEPRECATED;
 
 /* Manipulation */
 
@@ -657,7 +684,8 @@ extern size_t mbuf_trailingspace(const mbuf_t mbuf);
  *       @result 0 upon success otherwise the errno error.
  */
 extern errno_t mbuf_copym(const mbuf_t src, size_t offset, size_t len,
-    mbuf_how_t how, mbuf_t *new_mbuf);
+    mbuf_how_t how, mbuf_t *new_mbuf)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_dup
@@ -670,7 +698,8 @@ extern errno_t mbuf_copym(const mbuf_t src, size_t offset, size_t len,
  *       @param new_mbuf Upon success, the newly allocated mbuf.
  *       @result 0 upon success otherwise the errno error.
  */
-extern errno_t mbuf_dup(const mbuf_t src, mbuf_how_t how, mbuf_t *new_mbuf);
+extern errno_t mbuf_dup(const mbuf_t src, mbuf_how_t how, mbuf_t *new_mbuf)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_prepend
@@ -685,7 +714,8 @@ extern errno_t mbuf_dup(const mbuf_t src, mbuf_how_t how, mbuf_t *new_mbuf);
  *       @param how Blocking or non-blocking.
  *       @result 0 upon success otherwise the errno error.
  */
-extern errno_t mbuf_prepend(mbuf_t *mbuf, size_t len, mbuf_how_t how);
+extern errno_t mbuf_prepend(mbuf_t *mbuf, size_t len, mbuf_how_t how)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_split
@@ -701,7 +731,8 @@ extern errno_t mbuf_prepend(mbuf_t *mbuf, size_t len, mbuf_how_t how);
  *               preserved.
  */
 extern errno_t mbuf_split(mbuf_t src, size_t offset, mbuf_how_t how,
-    mbuf_t *new_mbuf);
+    mbuf_t *new_mbuf)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_pullup
@@ -714,7 +745,8 @@ extern errno_t mbuf_split(mbuf_t src, size_t offset, mbuf_how_t how,
  *       @result 0 upon success otherwise the errno error. In the case of an
  *               error, the mbuf chain has been freed.
  */
-extern errno_t mbuf_pullup(mbuf_t *mbuf, size_t len);
+extern errno_t mbuf_pullup(mbuf_t *mbuf, size_t len)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_pulldown
@@ -735,7 +767,8 @@ extern errno_t mbuf_pullup(mbuf_t *mbuf, size_t len);
  *       @result 0 upon success otherwise the errno error.
  */
 extern errno_t mbuf_pulldown(mbuf_t src, size_t *offset, size_t length,
-    mbuf_t *location);
+    mbuf_t *location)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_adj
@@ -746,7 +779,8 @@ extern errno_t mbuf_pulldown(mbuf_t src, size_t *offset, size_t length,
  *       @param mbuf The mbuf chain to trim.
  *       @param len The number of bytes to trim from the mbuf chain.
  */
-extern void mbuf_adj(mbuf_t mbuf, int len);
+extern void mbuf_adj(mbuf_t mbuf, int len)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_adjustlen
@@ -759,7 +793,8 @@ extern void mbuf_adj(mbuf_t mbuf, int len);
  *       @param amount The number of bytes increment the length by.
  *       @result 0 upon success otherwise the errno error.
  */
-extern errno_t mbuf_adjustlen(mbuf_t mbuf, int amount);
+extern errno_t mbuf_adjustlen(mbuf_t mbuf, int amount)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_concatenate
@@ -778,7 +813,8 @@ extern errno_t mbuf_adjustlen(mbuf_t mbuf, int amount);
  *               chain.  Otherwise it returns NULL if the original dst mbuf
  *               chain is NULL.
  */
-extern mbuf_t mbuf_concatenate(mbuf_t dst, mbuf_t src);
+extern mbuf_t mbuf_concatenate(mbuf_t dst, mbuf_t src)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_copydata
@@ -793,7 +829,8 @@ extern mbuf_t mbuf_concatenate(mbuf_t dst, mbuf_t src);
  *       @result 0 upon success otherwise the errno error.
  */
 extern errno_t mbuf_copydata(const mbuf_t mbuf, size_t offset, size_t length,
-    void *out_data);
+    void *out_data)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_copyback
@@ -818,7 +855,8 @@ extern errno_t mbuf_copydata(const mbuf_t mbuf, size_t offset, size_t length,
  *       @result 0 upon success, EINVAL or ENOBUFS upon failure.
  */
 extern errno_t mbuf_copyback(mbuf_t mbuf, size_t offset, size_t length,
-    const void *data, mbuf_how_t how);
+    const void *data, mbuf_how_t how)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_mclhasreference
@@ -828,7 +866,8 @@ extern errno_t mbuf_copyback(mbuf_t mbuf, size_t offset, size_t length,
  *       @param mbuf The mbuf with the cluster to test.
  *       @result 0 if there is no reference by another mbuf, 1 otherwise.
  */
-extern int mbuf_mclhasreference(mbuf_t mbuf);
+extern int mbuf_mclhasreference(mbuf_t mbuf)
+__NKE_API_DEPRECATED;
 
 
 /* mbuf header */
@@ -839,7 +878,8 @@ extern int mbuf_mclhasreference(mbuf_t mbuf);
  *       @param mbuf The mbuf.
  *       @result The next mbuf in the chain.
  */
-extern mbuf_t mbuf_next(const mbuf_t mbuf);
+extern mbuf_t mbuf_next(const mbuf_t mbuf)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_setnext
@@ -848,7 +888,8 @@ extern mbuf_t mbuf_next(const mbuf_t mbuf);
  *       @param next The new next mbuf.
  *       @result 0 upon success otherwise the errno error.
  */
-extern errno_t mbuf_setnext(mbuf_t mbuf, mbuf_t next);
+extern errno_t mbuf_setnext(mbuf_t mbuf, mbuf_t next)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_nextpkt
@@ -856,7 +897,8 @@ extern errno_t mbuf_setnext(mbuf_t mbuf, mbuf_t next);
  *       @param mbuf The mbuf.
  *       @result The nextpkt.
  */
-extern mbuf_t mbuf_nextpkt(const mbuf_t mbuf);
+extern mbuf_t mbuf_nextpkt(const mbuf_t mbuf)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_setnextpkt
@@ -864,7 +906,8 @@ extern mbuf_t mbuf_nextpkt(const mbuf_t mbuf);
  *       @param mbuf The mbuf.
  *       @param nextpkt The new next packet.
  */
-extern void mbuf_setnextpkt(mbuf_t mbuf, mbuf_t nextpkt);
+extern void mbuf_setnextpkt(mbuf_t mbuf, mbuf_t nextpkt)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_len
@@ -872,7 +915,8 @@ extern void mbuf_setnextpkt(mbuf_t mbuf, mbuf_t nextpkt);
  *       @param mbuf The mbuf.
  *       @result The length.
  */
-extern size_t mbuf_len(const mbuf_t mbuf);
+extern size_t mbuf_len(const mbuf_t mbuf)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_setlen
@@ -881,7 +925,8 @@ extern size_t mbuf_len(const mbuf_t mbuf);
  *       @param mbuf The mbuf.
  *       @param len The new length.
  */
-extern void mbuf_setlen(mbuf_t mbuf, size_t len);
+extern void mbuf_setlen(mbuf_t mbuf, size_t len)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_maxlen
@@ -892,7 +937,8 @@ extern void mbuf_setlen(mbuf_t mbuf, size_t len);
  *       @param mbuf The mbuf.
  *       @result The maximum lenght of data for this mbuf.
  */
-extern size_t mbuf_maxlen(const mbuf_t mbuf);
+extern size_t mbuf_maxlen(const mbuf_t mbuf)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_type
@@ -900,7 +946,8 @@ extern size_t mbuf_maxlen(const mbuf_t mbuf);
  *       @param mbuf The mbuf.
  *       @result The type.
  */
-extern mbuf_type_t mbuf_type(const mbuf_t mbuf);
+extern mbuf_type_t mbuf_type(const mbuf_t mbuf)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_settype
@@ -909,7 +956,8 @@ extern mbuf_type_t mbuf_type(const mbuf_t mbuf);
  *       @param new_type The new type.
  *       @result 0 upon success otherwise the errno error.
  */
-extern errno_t mbuf_settype(mbuf_t mbuf, mbuf_type_t new_type);
+extern errno_t mbuf_settype(mbuf_t mbuf, mbuf_type_t new_type)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_flags
@@ -917,7 +965,8 @@ extern errno_t mbuf_settype(mbuf_t mbuf, mbuf_type_t new_type);
  *       @param mbuf The mbuf.
  *       @result The flags.
  */
-extern mbuf_flags_t mbuf_flags(const mbuf_t mbuf);
+extern mbuf_flags_t mbuf_flags(const mbuf_t mbuf)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_setflags
@@ -927,7 +976,8 @@ extern mbuf_flags_t mbuf_flags(const mbuf_t mbuf);
  *               cleared.  Certain flags such as MBUF_EXT cannot be altered.
  *       @result 0 upon success otherwise the errno error.
  */
-extern errno_t mbuf_setflags(mbuf_t mbuf, mbuf_flags_t flags);
+extern errno_t mbuf_setflags(mbuf_t mbuf, mbuf_flags_t flags)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_setflags_mask
@@ -940,7 +990,8 @@ extern errno_t mbuf_setflags(mbuf_t mbuf, mbuf_flags_t flags);
  *       @result 0 upon success otherwise the errno error.
  */
 extern errno_t mbuf_setflags_mask(mbuf_t mbuf, mbuf_flags_t flags,
-    mbuf_flags_t mask);
+    mbuf_flags_t mask)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_copy_pkthdr
@@ -949,7 +1000,8 @@ extern errno_t mbuf_setflags_mask(mbuf_t mbuf, mbuf_flags_t flags,
  *       @param dest The mbuf to which the packet header will be copied.
  *       @result 0 upon success otherwise the errno error.
  */
-extern errno_t mbuf_copy_pkthdr(mbuf_t dest, const mbuf_t src);
+extern errno_t mbuf_copy_pkthdr(mbuf_t dest, const mbuf_t src)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_pkthdr_len
@@ -957,7 +1009,8 @@ extern errno_t mbuf_copy_pkthdr(mbuf_t dest, const mbuf_t src);
  *       @param mbuf The mbuf containing the packet header
  *       @result The length, in bytes, of the packet.
  */
-extern size_t mbuf_pkthdr_len(const mbuf_t mbuf);
+extern size_t mbuf_pkthdr_len(const mbuf_t mbuf)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_pkthdr_setlen
@@ -965,7 +1018,8 @@ extern size_t mbuf_pkthdr_len(const mbuf_t mbuf);
  *       @param mbuf The mbuf containing the packet header.
  *       @param len The new length of the packet.
  */
-extern void mbuf_pkthdr_setlen(mbuf_t mbuf, size_t len);
+extern void mbuf_pkthdr_setlen(mbuf_t mbuf, size_t len)
+__NKE_API_DEPRECATED;
 
 #ifdef XNU_KERNEL_PRIVATE
 /*!
@@ -987,7 +1041,8 @@ extern size_t mbuf_pkthdr_maxlen(const mbuf_t mbuf);
  *       @param amount The number of bytes to adjust the packet header length
  *               field by.
  */
-extern void mbuf_pkthdr_adjustlen(mbuf_t mbuf, int amount);
+extern void mbuf_pkthdr_adjustlen(mbuf_t mbuf, int amount)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_pkthdr_rcvif
@@ -1001,7 +1056,8 @@ extern void mbuf_pkthdr_adjustlen(mbuf_t mbuf, int amount);
  *       @param mbuf The mbuf containing the packet header.
  *       @result A reference to the interface.
  */
-extern ifnet_t mbuf_pkthdr_rcvif(const mbuf_t mbuf);
+extern ifnet_t mbuf_pkthdr_rcvif(const mbuf_t mbuf)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_pkthdr_setrcvif
@@ -1010,7 +1066,8 @@ extern ifnet_t mbuf_pkthdr_rcvif(const mbuf_t mbuf);
  *       @param ifp A reference to an interface.
  *       @result 0 upon success otherwise the errno error.
  */
-extern errno_t mbuf_pkthdr_setrcvif(mbuf_t mbuf, ifnet_t ifp);
+extern errno_t mbuf_pkthdr_setrcvif(mbuf_t mbuf, ifnet_t ifp)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_pkthdr_header
@@ -1018,7 +1075,8 @@ extern errno_t mbuf_pkthdr_setrcvif(mbuf_t mbuf, ifnet_t ifp);
  *       @param mbuf The mbuf containing the packet header.
  *       @result A pointer to the packet header.
  */
-extern void *mbuf_pkthdr_header(const mbuf_t mbuf);
+extern void *mbuf_pkthdr_header(const mbuf_t mbuf)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_pkthdr_setheader
@@ -1026,7 +1084,8 @@ extern void *mbuf_pkthdr_header(const mbuf_t mbuf);
  *       @param mbuf The mbuf containing the packet header.
  *       @param header A pointer to the header.
  */
-extern void mbuf_pkthdr_setheader(mbuf_t mbuf, void *header);
+extern void mbuf_pkthdr_setheader(mbuf_t mbuf, void *header)
+__NKE_API_DEPRECATED;
 
 /* Checksums */
 
@@ -1043,7 +1102,8 @@ extern void mbuf_pkthdr_setheader(mbuf_t mbuf, void *header);
  *               original checksum was valid.
  *       @param mbuf The mbuf that has been modified.
  */
-extern void mbuf_inbound_modified(mbuf_t mbuf);
+extern void mbuf_inbound_modified(mbuf_t mbuf)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_outbound_finalize
@@ -1081,7 +1141,8 @@ extern void mbuf_inbound_modified(mbuf_t mbuf);
  *               would be the length of an ethernet header.
  */
 extern void mbuf_outbound_finalize(mbuf_t mbuf, u_int32_t protocol_family,
-    size_t protocol_offset);
+    size_t protocol_offset)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_set_vlan_tag
@@ -1092,7 +1153,8 @@ extern void mbuf_outbound_finalize(mbuf_t mbuf, u_int32_t protocol_family,
  *       @param vlan The protocol family of the aux data to add.
  *       @result 0 upon success otherwise the errno error.
  */
-extern errno_t mbuf_set_vlan_tag(mbuf_t mbuf, u_int16_t vlan);
+extern errno_t mbuf_set_vlan_tag(mbuf_t mbuf, u_int16_t vlan)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_get_vlan_tag
@@ -1106,7 +1168,8 @@ extern errno_t mbuf_set_vlan_tag(mbuf_t mbuf, u_int16_t vlan);
  *       @result 0 upon success otherwise the errno error. ENXIO indicates
  *               that the vlan tag is not set.
  */
-extern errno_t mbuf_get_vlan_tag(mbuf_t mbuf, u_int16_t *vlan);
+extern errno_t mbuf_get_vlan_tag(mbuf_t mbuf, u_int16_t *vlan)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_clear_vlan_tag
@@ -1115,7 +1178,8 @@ extern errno_t mbuf_get_vlan_tag(mbuf_t mbuf, u_int16_t *vlan);
  *       @param mbuf The mbuf containing the packet.
  *       @result 0 upon success otherwise the errno error.
  */
-extern errno_t mbuf_clear_vlan_tag(mbuf_t mbuf);
+extern errno_t mbuf_clear_vlan_tag(mbuf_t mbuf)
+__NKE_API_DEPRECATED;
 
 #ifdef KERNEL_PRIVATE
 /*!
@@ -1147,7 +1211,8 @@ extern errno_t mbuf_set_csum_requested(mbuf_t mbuf,
  *       @result 0 upon success otherwise the errno error.
  */
 extern errno_t mbuf_get_csum_requested(mbuf_t mbuf,
-    mbuf_csum_request_flags_t *request, u_int32_t *value);
+    mbuf_csum_request_flags_t *request, u_int32_t *value)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_get_tso_requested
@@ -1160,7 +1225,8 @@ extern errno_t mbuf_get_csum_requested(mbuf_t mbuf,
  *       @result 0 upon success otherwise the errno error.
  */
 extern errno_t mbuf_get_tso_requested(mbuf_t mbuf,
-    mbuf_tso_request_flags_t *request, u_int32_t *value);
+    mbuf_tso_request_flags_t *request, u_int32_t *value)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_clear_csum_requested
@@ -1168,7 +1234,8 @@ extern errno_t mbuf_get_tso_requested(mbuf_t mbuf,
  *       @param mbuf The mbuf containing the packet.
  *       @result 0 upon success otherwise the errno error.
  */
-extern errno_t mbuf_clear_csum_requested(mbuf_t mbuf);
+extern errno_t mbuf_clear_csum_requested(mbuf_t mbuf)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_set_csum_performed
@@ -1183,7 +1250,8 @@ extern errno_t mbuf_clear_csum_requested(mbuf_t mbuf);
  *       @result 0 upon success otherwise the errno error.
  */
 extern errno_t mbuf_set_csum_performed(mbuf_t mbuf,
-    mbuf_csum_performed_flags_t flags, u_int32_t value);
+    mbuf_csum_performed_flags_t flags, u_int32_t value)
+__NKE_API_DEPRECATED;
 
 #ifdef KERNEL_PRIVATE
 /*
@@ -1210,7 +1278,8 @@ extern errno_t mbuf_get_csum_performed(mbuf_t mbuf,
  *               legacy MLEN macro.
  *       @result       The number of bytes of available data.
  */
-extern u_int32_t mbuf_get_mlen(void);
+extern u_int32_t mbuf_get_mlen(void)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_get_mhlen
@@ -1218,7 +1287,8 @@ extern u_int32_t mbuf_get_mlen(void);
  *               header mbuf.  This is equivalent to the legacy MHLEN macro.
  *       @result       The number of bytes of available data.
  */
-extern u_int32_t mbuf_get_mhlen(void);
+extern u_int32_t mbuf_get_mhlen(void)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_get_minclsize
@@ -1227,7 +1297,8 @@ extern u_int32_t mbuf_get_mhlen(void);
  *               legacy MINCLSIZE macro.
  *       @result       The minimum number of bytes before a cluster will be used.
  */
-extern u_int32_t mbuf_get_minclsize(void);
+extern u_int32_t mbuf_get_minclsize(void)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_clear_csum_performed
@@ -1235,7 +1306,8 @@ extern u_int32_t mbuf_get_minclsize(void);
  *       @param mbuf The mbuf containing the packet.
  *       @result 0 upon success otherwise the errno error.
  */
-extern errno_t mbuf_clear_csum_performed(mbuf_t mbuf);
+extern errno_t mbuf_clear_csum_performed(mbuf_t mbuf)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_inet_cksum
@@ -1264,7 +1336,8 @@ extern errno_t mbuf_clear_csum_performed(mbuf_t mbuf);
  *       @result 0 upon success otherwise the errno error.
  */
 extern errno_t mbuf_inet_cksum(mbuf_t mbuf, int protocol, u_int32_t offset,
-    u_int32_t length, u_int16_t *csum);
+    u_int32_t length, u_int16_t *csum)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_inet6_cksum
@@ -1293,7 +1366,8 @@ extern errno_t mbuf_inet_cksum(mbuf_t mbuf, int protocol, u_int32_t offset,
  *       @result 0 upon success otherwise the errno error.
  */
 extern errno_t mbuf_inet6_cksum(mbuf_t mbuf, int protocol, u_int32_t offset,
-    u_int32_t length, u_int16_t *csum);
+    u_int32_t length, u_int16_t *csum)
+__NKE_API_DEPRECATED;
 
 /* mbuf tags */
 
@@ -1316,7 +1390,8 @@ extern errno_t mbuf_inet6_cksum(mbuf_t mbuf, int protocol, u_int32_t offset,
  *       @result 0 upon success otherwise the errno error.
  */
 extern errno_t mbuf_tag_id_find(const char *module_string,
-    mbuf_tag_id_t *module_id);
+    mbuf_tag_id_t *module_id)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_tag_allocate
@@ -1340,7 +1415,8 @@ extern errno_t mbuf_tag_id_find(const char *module_string,
  *       @result 0 upon success otherwise the errno error.
  */
 extern errno_t mbuf_tag_allocate(mbuf_t mbuf, mbuf_tag_id_t module_id,
-    mbuf_tag_type_t type, size_t length, mbuf_how_t how, void **data_p);
+    mbuf_tag_type_t type, size_t length, mbuf_how_t how, void **data_p)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_tag_find
@@ -1355,7 +1431,8 @@ extern errno_t mbuf_tag_allocate(mbuf_t mbuf, mbuf_tag_id_t module_id,
  *       @result 0 upon success otherwise the errno error.
  */
 extern errno_t mbuf_tag_find(mbuf_t mbuf, mbuf_tag_id_t module_id,
-    mbuf_tag_type_t type, size_t *length, void **data_p);
+    mbuf_tag_type_t type, size_t *length, void **data_p)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_tag_free
@@ -1365,7 +1442,8 @@ extern errno_t mbuf_tag_find(mbuf_t mbuf, mbuf_tag_id_t module_id,
  *       @param type The type of the tag to free.
  */
 extern void mbuf_tag_free(mbuf_t mbuf, mbuf_tag_id_t module_id,
-    mbuf_tag_type_t type);
+    mbuf_tag_type_t type)
+__NKE_API_DEPRECATED;
 
 #ifdef KERNEL_PRIVATE
 /*!
@@ -1431,7 +1509,8 @@ extern void mbuf_del_drvaux(mbuf_t mbuf);
  *       @discussion Get the mbuf statistics.
  *       @param stats Storage to copy the stats in to.
  */
-extern void mbuf_stats(struct mbuf_stat *stats);
+extern void mbuf_stats(struct mbuf_stat *stats)
+__NKE_API_DEPRECATED;
 
 
 /*!
@@ -1464,7 +1543,8 @@ typedef enum {
  *       @param mbuf The mbuf to get the traffic class of.
  *       @result The traffic class
  */
-extern mbuf_traffic_class_t mbuf_get_traffic_class(mbuf_t mbuf);
+extern mbuf_traffic_class_t mbuf_get_traffic_class(mbuf_t mbuf)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_set_traffic_class
@@ -1473,7 +1553,8 @@ extern mbuf_traffic_class_t mbuf_get_traffic_class(mbuf_t mbuf);
  *       @param tc The traffic class
  *       @result 0 on success, EINVAL if bad parameter is passed
  */
-extern errno_t mbuf_set_traffic_class(mbuf_t mbuf, mbuf_traffic_class_t tc);
+extern errno_t mbuf_set_traffic_class(mbuf_t mbuf, mbuf_traffic_class_t tc)
+__NKE_API_DEPRECATED;
 
 /*!
  *       @function mbuf_is_traffic_class_privileged
@@ -1482,7 +1563,8 @@ extern errno_t mbuf_set_traffic_class(mbuf_t mbuf, mbuf_traffic_class_t tc);
  *       @param mbuf The mbuf to retrieve the status from.
  *       @result Non-zero if privileged, 0 otherwise.
  */
-extern int mbuf_is_traffic_class_privileged(mbuf_t mbuf);
+extern int mbuf_is_traffic_class_privileged(mbuf_t mbuf)
+__NKE_API_DEPRECATED;
 
 #ifdef KERNEL_PRIVATE
 
index 05e522c99b21b23db953cbf59024d7d137ce53c2..a8010e620af55c072e8db8fa1cfc0a2c9595dcf6 100644 (file)
@@ -247,9 +247,7 @@ extern struct mount * dead_mountp;
 #define MNTK_SWAP_MOUNT         0x00000100      /* we are swapping to this mount */
 #define MNTK_DENY_READDIREXT 0x00000200 /* Deny Extended-style readdir's for this volume */
 #define MNTK_PERMIT_UNMOUNT     0x00000400      /* Allow (non-forced) unmounts by UIDs other than the one that mounted the volume */
-#ifdef NFSCLIENT
 #define MNTK_TYPENAME_OVERRIDE  0x00000800      /* override the fstypename for statfs() */
-#endif /* NFSCLIENT */
 #define MNTK_KERNEL_MOUNT       0x00001000      /* mount came from kernel side */
 #ifdef CONFIG_IMGSRC_ACCESS
 #define MNTK_HAS_MOVED          0x00002000
index 87a39398b29a8d76f340e78da9e0ef135337d77c..e427f2ebeb9124c7a12a3b600cc4d9d761498f09 100644 (file)
@@ -245,7 +245,9 @@ extern int proc_selfpid(void);
 /* this routine returns the pid of the parent of the current process */
 extern int proc_selfppid(void);
 /* this routine returns the csflags of the current process */
-extern int proc_selfcsflags(void);
+extern uint64_t proc_selfcsflags(void);
+/* this routine populates the given flags param with the csflags of the given process. Returns 0 on success, -1 on error. */
+extern int proc_csflags(proc_t p, uint64_t* flags);
 /* this routine returns sends a signal signum to the process identified by the pid */
 extern void proc_signal(int pid, int signum);
 /* this routine checks whether any signal identified by the mask are pending in the  process identified by the pid. The check is  on all threads of the process. */
@@ -304,6 +306,16 @@ extern int proc_issetugid(proc_t p);
 
 extern int proc_tbe(proc_t);
 
+/*!
+ *  @function proc_gettty
+ *  @abstract Copies the associated tty vnode for a given process if it exists. The caller needs to decrement the iocount of the vnode.
+ *  @return 0 on success. ENOENT if the process has no associated TTY. EINVAL if arguments are NULL or vnode_getwithvid fails.
+ */
+extern int proc_gettty(proc_t p, vnode_t *vp);
+
+/* this routine populates the associated tty device for a given process if it exists, returns 0 on success or else returns EINVAL */
+extern int proc_gettty_dev(proc_t p, dev_t *dev);
+
 /*!
  *  @function proc_selfpgrpid
  *  @abstract Get the process group id for the current process, as with proc_pgrpid().
index 2e30057f1f3a54eb9227749f6ebad2479b80b905..3611e2c6a7236764a2f18c543f4fa102857d8964 100644 (file)
@@ -320,10 +320,16 @@ struct socket {
 
        pid_t           e_pid;          /* pid of the effective owner */
        u_int64_t       e_upid;         /* upid of the effective owner */
+#if defined(XNU_TARGET_OS_OSX)
+       pid_t           so_rpid;        /* pid of the responsible process */
+#endif
 
        uuid_t          last_uuid;      /* uuid of most recent accessor */
        uuid_t          e_uuid;         /* uuid of effective owner */
        uuid_t          so_vuuid;       /* UUID of the Voucher originator */
+#if defined(XNU_TARGET_OS_OSX)
+       uuid_t          so_ruuid;       /* UUID of the responsible process */
+#endif
 
        int32_t         so_policy_gencnt; /* UUID policy gencnt */
 
index d963cfdb9726f4fe44d720fbd5670255261c372d..f0b7866f17f008b21a2891dd4f91cec0be09fb90 100644 (file)
@@ -77,6 +77,7 @@ typedef enum {
        PSPA_AU_SESSION = 2,
        PSPA_IMP_WATCHPORTS = 3,
        PSPA_REGISTERED_PORTS = 4,
+       PSPA_SUID_CRED = 6,
 } pspa_t;
 
 /*
index 18c9ad95001b6a4364aaa941b91a312096f602e4..ca2a54e5bb59997112cd5f9066e0b945edf8707c 100644 (file)
@@ -540,6 +540,17 @@ extern void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp);
 #endif
 #endif
 
+/*
+ * Extended flags ("EF") returned by ATTR_CMNEXT_EXT_FLAGS from getattrlist/getattrlistbulk
+ */
+#define EF_MAY_SHARE_BLOCKS     0x00000001      /* file may share blocks with another file */
+#define EF_NO_XATTRS            0x00000002      /* file has no xattrs at all */
+#define EF_IS_SYNC_ROOT         0x00000004      /* file is a sync root for iCloud */
+#define EF_IS_PURGEABLE         0x00000008      /* file is purgeable */
+#define EF_IS_SPARSE            0x00000010      /* file has at least one sparse region */
+
+
+
 #ifndef KERNEL
 
 __BEGIN_DECLS
index e5263caf4dd0108e42b8b9c50151a78163c1bfd4..7dfb01ef2abcd2a739fa3587c19d22b1e0ecf9d9 100644 (file)
@@ -559,6 +559,8 @@ struct vnode_trigger_param {
 #define VNODE_ATTR_va_fsid64            (1LL<<41)       /* 20000000000 */
 #define VNODE_ATTR_va_write_gencount    (1LL<<42)       /* 40000000000 */
 #define VNODE_ATTR_va_private_size      (1LL<<43)       /* 80000000000 */
+#define VNODE_ATTR_va_clone_id          (1LL<<44)       /* 100000000000 */
+#define VNODE_ATTR_va_extflags          (1LL<<45)       /* 200000000000 */
 
 #define VNODE_ATTR_BIT(n)       (VNODE_ATTR_ ## n)
 
@@ -608,7 +610,9 @@ struct vnode_trigger_param {
                                VNODE_ATTR_BIT(va_rsrc_alloc) |         \
                                VNODE_ATTR_BIT(va_fsid64) |             \
                                VNODE_ATTR_BIT(va_write_gencount) |     \
-                               VNODE_ATTR_BIT(va_private_size))
+                               VNODE_ATTR_BIT(va_private_size) |       \
+                               VNODE_ATTR_BIT(va_clone_id) |           \
+                               VNODE_ATTR_BIT(va_extflags))
 
 /*
  * Read-only attributes.
@@ -637,8 +641,11 @@ struct vnode_trigger_param {
                                VNODE_ATTR_BIT(va_rsrc_length) |        \
                                VNODE_ATTR_BIT(va_rsrc_alloc) |         \
                                VNODE_ATTR_BIT(va_fsid64) |             \
-                               VNODE_ATTR_BIT(va_write_gencount) |             \
-                               VNODE_ATTR_BIT(va_private_size))
+                               VNODE_ATTR_BIT(va_write_gencount) |     \
+                               VNODE_ATTR_BIT(va_private_size) |       \
+                               VNODE_ATTR_BIT(va_clone_id) |           \
+                               VNODE_ATTR_BIT(va_extflags))
+
 /*
  * Attributes that can be applied to a new file object.
  */
@@ -742,6 +749,8 @@ struct vnode_attr {
        uint32_t va_write_gencount;     /* counter that increments each time the file changes */
 
        uint64_t va_private_size; /* If the file were deleted, how many bytes would be freed immediately */
+       uint64_t va_clone_id;     /* If a file is cloned this is a unique id shared by all "perfect" clones */
+       uint64_t va_extflags;     /* extended file/directory flags */
 
        /* add new fields here only */
 };
@@ -1689,6 +1698,19 @@ int     vnode_isdyldsharedcache(vnode_t vp);
  */
 int     vn_authorize_unlink(vnode_t dvp, vnode_t vp, struct componentname *cnp, vfs_context_t ctx, void *reserved);
 
+
+/*!
+ *  @function vn_authorize_rmdir
+ *  @abstract Authorize an rmdir operation given the vfs_context_t
+ *  @discussion Check if the context assocated with vfs_context_t is allowed to rmdir the vnode vp in directory dvp.
+ *  @param dvp Parent vnode of the directory to be rmdir'ed
+ *  @param vp The vnode to be rmdir'ed
+ *  @param cnp A componentname containing the name of the file to be rmdir'ed.  May be NULL.
+ *  @param reserved Pass NULL
+ *  @return returns zero if the operation is allowed, non-zero indicates the rmdir is not authorized.
+ */
+int     vn_authorize_rmdir(vnode_t dvp, vnode_t vp, struct componentname *cnp, vfs_context_t ctx, void *reserved);
+
 /*!
  *  @function vn_getpath_fsenter
  *  @abstract Attempt to get a vnode's path, willing to enter the filesystem.
@@ -1751,6 +1773,7 @@ int     vn_getpath_ext(struct vnode *vp, struct vnode *dvp, char *pathbuf, int *
 #define VN_GETPATH_FSENTER              0x0001 /* Can re-enter filesystem */
 #define VN_GETPATH_NO_FIRMLINK          0x0002
 #define VN_GETPATH_VOLUME_RELATIVE      0x0004 /* also implies VN_GETPATH_NO_FIRMLINK */
+#define VN_GETPATH_NO_PROCROOT          0x0008 /* Give the non chrooted path for a process */
 
 #endif /* KERNEL_PRIVATE */
 
@@ -2379,6 +2402,7 @@ void vnode_clearnoflush(vnode_t);
 #define BUILDPATH_CHECK_MOVED     0x4 /* Return EAGAIN if the parent hierarchy is modified */
 #define BUILDPATH_VOLUME_RELATIVE 0x8 /* Return path relative to the nearest mount point */
 #define BUILDPATH_NO_FIRMLINK     0x10 /* Return non-firmlinked path */
+#define BUILDPATH_NO_PROCROOT     0x20 /* Return path relative to system root, not the process root */
 
 int     build_path(vnode_t first_vp, char *buff, int buflen, int *outlen, int flags, vfs_context_t ctx);
 
index 4e271502f2ce83f77287962fcaebbff0c58e02fa..29d6d9f72ca4ffe48a5f6f2c9a67cfb62568770b 100644 (file)
@@ -449,7 +449,6 @@ int vn_authorize_renamex(struct vnode *fdvp, struct vnode *fvp, struct component
 int vn_authorize_renamex_with_paths(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp, const char *from_path,
     struct vnode *tdvp, struct vnode *tvp, struct componentname *tcnp, const char *to_path,
     vfs_context_t ctx, vfs_rename_flags_t flags, void *reserved);
-int     vn_authorize_rmdir(vnode_t dvp, vnode_t vp, struct componentname *cnp, vfs_context_t ctx, void *reserved);
 
 typedef int (*vn_create_authorizer_t)(vnode_t, struct componentname *, struct vnode_attr *, vfs_context_t, void*);
 int vn_authorize_mkdir(vnode_t, struct componentname *, struct vnode_attr *, vfs_context_t, void*);
index c344bef005a290ef5134ea8536319252845cfcc6..d7ba266002f07959ef72af8b330ef14336e55a96 100644 (file)
@@ -545,6 +545,8 @@ static struct getattrlist_attrtab getattrlist_common_extended_tab[] = {
        {.attr = ATTR_CMNEXT_NOFIRMLINKPATH, .bits = 0, .size = sizeof(struct attrreference), .action = KAUTH_VNODE_READ_ATTRIBUTES},
        {.attr = ATTR_CMNEXT_REALDEVID, .bits = VATTR_BIT(va_devid), .size = sizeof(uint32_t), .action = KAUTH_VNODE_READ_ATTRIBUTES},
        {.attr = ATTR_CMNEXT_REALFSID, .bits = VATTR_BIT(va_fsid64), .size = sizeof(fsid_t), .action = KAUTH_VNODE_READ_ATTRIBUTES},
+       {.attr = ATTR_CMNEXT_CLONEID, .bits = VATTR_BIT(va_clone_id), .size = sizeof(uint64_t), .action = KAUTH_VNODE_READ_ATTRIBUTES},
+       {.attr = ATTR_CMNEXT_EXT_FLAGS, .bits = VATTR_BIT(va_extflags), .size = sizeof(uint64_t), .action = KAUTH_VNODE_READ_ATTRIBUTES},
        {.attr = 0, .bits = 0, .size = 0, .action = 0}
 };
 
@@ -607,7 +609,8 @@ static struct getattrlist_attrtab getattrlistbulk_common_extended_tab[] = {
 
 #define VFS_DFLT_ATTR_CMN_EXT   (ATTR_CMNEXT_PRIVATESIZE | ATTR_CMNEXT_LINKID |  \
                                 ATTR_CMNEXT_NOFIRMLINKPATH | ATTR_CMNEXT_REALDEVID |  \
-                                ATTR_CMNEXT_REALFSID)
+                                ATTR_CMNEXT_REALFSID | ATTR_CMNEXT_CLONEID | \
+                                ATTR_CMNEXT_EXT_FLAGS)
 
 #define VFS_DFLT_ATTR_DIR       (ATTR_DIR_LINKCOUNT | ATTR_DIR_MOUNTSTATUS)
 
@@ -984,6 +987,7 @@ getvolattrlist(vfs_context_t ctx, vnode_t vp, struct attrlist *alp,
                                VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: could not allocate f_vol_name buffer");
                                goto out;
                        }
+                       vs.f_vol_name[0] = '\0';
                }
 
                VFS_DEBUG(ctx, vp, "ATTRLIST -       calling to get %016llx with supported %016llx", vs.f_active, vs.f_supported);
@@ -2357,6 +2361,26 @@ attr_pack_common_extended(mount_t mp, struct vnode *vp, struct attrlist *alp,
                }
        }
 
+       if (alp->forkattr & ATTR_CMNEXT_CLONEID) {
+               if (VATTR_IS_SUPPORTED(vap, va_clone_id)) {
+                       ATTR_PACK8((*abp), vap->va_clone_id);
+                       abp->actual.forkattr |= ATTR_CMNEXT_CLONEID;
+               } else if (!return_valid || pack_invalid) {
+                       uint64_t zero_val = 0;
+                       ATTR_PACK8((*abp), zero_val);
+               }
+       }
+
+       if (alp->forkattr & ATTR_CMNEXT_EXT_FLAGS) {
+               if (VATTR_IS_SUPPORTED(vap, va_extflags)) {
+                       ATTR_PACK8((*abp), vap->va_extflags);
+                       abp->actual.forkattr |= ATTR_CMNEXT_EXT_FLAGS;
+               } else if (!return_valid || pack_invalid) {
+                       uint64_t zero_val = 0;
+                       ATTR_PACK8((*abp), zero_val);
+               }
+       }
+
        return 0;
 }
 
index 18a0906b8b82e9119e23a1b04b64d16c1b33f188..b027e9535ec7d725527ca713a10c65ab8ae4281b 100644 (file)
@@ -450,7 +450,7 @@ build_path_with_parent(vnode_t first_vp, vnode_t parent_vp, char *buff, int bufl
        /*
         * Grab the process fd so we can evaluate fd_rdir.
         */
-       if (vfs_context_proc(ctx)->p_fd) {
+       if (vfs_context_proc(ctx)->p_fd && !(flags & BUILDPATH_NO_PROCROOT)) {
                proc_root_dir_vp = vfs_context_proc(ctx)->p_fd->fd_rdir;
        } else {
                proc_root_dir_vp = NULL;
index 1d61ed28446c538b29cb2d4d9d88b7c9bbaded40..fb97b1864935e18559239e52a968881fee276f1c 100644 (file)
@@ -72,6 +72,8 @@
 #include <sys/mount_internal.h>
 #include <sys/vnode_internal.h>
 
+#include <nfs/nfs_conf.h>
+
 /*
  * These define the root filesystem, device, and root filesystem type.
  */
@@ -122,7 +124,7 @@ enum fs_type_num {
  */
 static struct vfstable vfstbllist[] = {
        /* Sun-compatible Network Filesystem */
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
        {
                .vfc_vfsops = &nfs_vfsops,
                .vfc_name = "nfs",
@@ -138,7 +140,7 @@ static struct vfstable vfstbllist[] = {
                .vfc_descsize = 0,
                .vfc_sysctl = NULL
        },
-#endif /* NFSCLIENT */
+#endif /* CONFIG_NFS_CLIENT */
 
        /* Device Filesystem */
 #if DEVFS
@@ -321,7 +323,7 @@ const struct vnodeopv_desc *vfs_opv_descs[] = {
 #if MFS
        &mfs_vnodeop_opv_desc,
 #endif
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
        &nfsv2_vnodeop_opv_desc,
        &spec_nfsv2nodeop_opv_desc,
 #if CONFIG_NFS4
@@ -334,7 +336,7 @@ const struct vnodeopv_desc *vfs_opv_descs[] = {
        &fifo_nfsv4nodeop_opv_desc,
 #endif /* CONFIG_NFS4 */
 #endif /* FIFO */
-#endif /* NFSCLIENT */
+#endif /* CONFIG_NFS_CLIENT */
 #if DEVFS
        &devfs_vnodeop_opv_desc,
        &devfs_spec_vnodeop_opv_desc,
index f7916db48c0379a19ebf10116e0e73b5a29f2f8c..5b5455b4b509d21d0218a905e1dbc38bb22e3440 100644 (file)
@@ -263,7 +263,7 @@ unlock_fs_event_list(void)
 // forward prototype
 static void release_event_ref(kfs_event *kfse);
 
-static int
+static boolean_t
 watcher_cares_about_dev(fs_event_watcher *watcher, dev_t dev)
 {
        unsigned int i;
@@ -271,20 +271,20 @@ watcher_cares_about_dev(fs_event_watcher *watcher, dev_t dev)
        // if devices_not_to_watch is NULL then we care about all
        // events from all devices
        if (watcher->devices_not_to_watch == NULL) {
-               return 1;
+               return true;
        }
 
        for (i = 0; i < watcher->num_devices; i++) {
                if (dev == watcher->devices_not_to_watch[i]) {
                        // found a match! that means we do not
                        // want events from this device.
-                       return 0;
+                       return false;
                }
        }
 
        // if we're here it's not in the devices_not_to_watch[]
        // list so that means we do care about it
-       return 1;
+       return true;
 }
 
 
@@ -1564,35 +1564,47 @@ restart_watch:
                        break;
                }
 
-               if (watcher->event_list[kfse->type] == FSE_REPORT && watcher_cares_about_dev(watcher, kfse->dev)) {
-                       if (!(watcher->flags & WATCHER_APPLE_SYSTEM_SERVICE) && kfse->type != FSE_DOCID_CREATED && kfse->type != FSE_DOCID_CHANGED && is_ignored_directory(kfse->str)) {
-                               // If this is not an Apple System Service, skip specified directories
-                               // radar://12034844
-                               error = 0;
-                               skipped = 1;
+               if (watcher->event_list[kfse->type] == FSE_REPORT) {
+                       boolean_t watcher_cares;
+
+                       if (watcher->devices_not_to_watch == NULL) {
+                               watcher_cares = true;
                        } else {
-                               skipped = 0;
-                               if (last_event_ptr == kfse) {
-                                       last_event_ptr = NULL;
-                                       last_event_type = -1;
-                                       last_coalesced_time = 0;
-                               }
-                               error = copy_out_kfse(watcher, kfse, uio);
-                               if (error != 0) {
-                                       // if an event won't fit or encountered an error while
-                                       // we were copying it out, then backup to the last full
-                                       // event and just bail out.  if the error was ENOENT
-                                       // then we can continue regular processing, otherwise
-                                       // we should unlock things and return.
-                                       uio_setresid(uio, last_full_event_resid);
-                                       if (error != ENOENT) {
-                                               lck_rw_unlock_shared(&event_handling_lock);
-                                               error = 0;
-                                               goto get_out;
+                               lock_watch_table();
+                               watcher_cares = watcher_cares_about_dev(watcher, kfse->dev);
+                               unlock_watch_table();
+                       }
+
+                       if (watcher_cares) {
+                               if (!(watcher->flags & WATCHER_APPLE_SYSTEM_SERVICE) && kfse->type != FSE_DOCID_CREATED && kfse->type != FSE_DOCID_CHANGED && is_ignored_directory(kfse->str)) {
+                                       // If this is not an Apple System Service, skip specified directories
+                                       // radar://12034844
+                                       error = 0;
+                                       skipped = 1;
+                               } else {
+                                       skipped = 0;
+                                       if (last_event_ptr == kfse) {
+                                               last_event_ptr = NULL;
+                                               last_event_type = -1;
+                                               last_coalesced_time = 0;
+                                       }
+                                       error = copy_out_kfse(watcher, kfse, uio);
+                                       if (error != 0) {
+                                               // if an event won't fit or encountered an error while
+                                               // we were copying it out, then backup to the last full
+                                               // event and just bail out.  if the error was ENOENT
+                                               // then we can continue regular processing, otherwise
+                                               // we should unlock things and return.
+                                               uio_setresid(uio, last_full_event_resid);
+                                               if (error != ENOENT) {
+                                                       lck_rw_unlock_shared(&event_handling_lock);
+                                                       error = 0;
+                                                       goto get_out;
+                                               }
                                        }
-                               }
 
-                               last_full_event_resid = uio_resid(uio);
+                                       last_full_event_resid = uio_resid(uio);
+                               }
                        }
                }
 
index 85d47741ee3c9fea99a9709b9d45e49bd584b61d..77c525baac8014c906286e7d6489e74aa51f3943 100644 (file)
@@ -1261,13 +1261,15 @@ dirloop:
                        tdp = dp;
                        dp = tdp->v_mount->mnt_vnodecovered;
 
-                       vnode_put(tdp);
-
                        if ((vnode_getwithref(dp))) {
+                               vnode_put(tdp);
                                dp = NULLVP;
                                error = ENOENT;
                                goto bad;
                        }
+
+                       vnode_put(tdp);
+
                        ndp->ni_dvp = dp;
                        dp_authorized = 0;
                }
index f8304f9ad51c5a0d22eca0ebda38f575e1426823..866780991fa543f6afd6e316620223b0b7f34bbe 100644 (file)
 #include <sys/lockf.h>
 #include <miscfs/fifofs/fifo.h>
 
+#include <nfs/nfs_conf.h>
+
 #include <string.h>
 #include <machine/machine_routines.h>
 
@@ -2408,11 +2410,11 @@ vclean(vnode_t vp, int flags)
         * Clean out any buffers associated with the vnode.
         */
        if (flags & DOCLOSE) {
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
                if (vp->v_tag == VT_NFS) {
                        nfs_vinvalbuf(vp, V_SAVE, ctx, 0);
                } else
-#endif
+#endif /* CONFIG_NFS_CLIENT */
                {
                        VNOP_FSYNC(vp, MNT_WAIT, ctx);
 
@@ -2903,6 +2905,9 @@ vn_getpath_ext(struct vnode *vp, struct vnode *dvp, char *pathbuf, int *len, int
                if (flags & VN_GETPATH_VOLUME_RELATIVE) {
                        bpflags |= (BUILDPATH_VOLUME_RELATIVE | BUILDPATH_NO_FIRMLINK);
                }
+               if (flags & VN_GETPATH_NO_PROCROOT) {
+                       bpflags |= BUILDPATH_NO_PROCROOT;
+               }
        }
 
        return build_path_with_parent(vp, dvp, pathbuf, *len, len, bpflags, vfs_context_current());
@@ -3925,11 +3930,11 @@ sysctl_vfs_ctlbyfsid(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
                        sfs.f_ffree = (user64_long_t)sp->f_ffree;
                        sfs.f_fsid = sp->f_fsid;
                        sfs.f_owner = sp->f_owner;
-#ifdef NFSCLIENT
+#ifdef CONFIG_NFS_CLIENT
                        if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
                                strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
                        } else
-#endif
+#endif /* CONFIG_NFS_CLIENT */
                        {
                                strlcpy(sfs.f_fstypename, sp->f_fstypename, MFSNAMELEN);
                        }
@@ -3987,11 +3992,11 @@ sysctl_vfs_ctlbyfsid(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
                        sfs.f_fsid = sp->f_fsid;
                        sfs.f_owner = sp->f_owner;
 
-#ifdef NFSCLIENT
+#ifdef CONFIG_NFS_CLIENT
                        if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
                                strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
                        } else
-#endif
+#endif /* CONFIG_NFS_CLIENT */
                        {
                                strlcpy(sfs.f_fstypename, sp->f_fstypename, MFSNAMELEN);
                        }
@@ -6227,26 +6232,21 @@ vn_create(vnode_t dvp, vnode_t *vpp, struct nameidata *ndp, struct vnode_attr *v
        vp = *vpp;
        old_error = error;
 
-#if CONFIG_MACF
-       if (!(flags & VN_CREATE_NOLABEL)) {
-               error = vnode_label(vnode_mount(vp), dvp, vp, cnp, VNODE_LABEL_CREATE, ctx);
-               if (error) {
-                       goto error;
-               }
-       }
-#endif
-
        /*
         * If some of the requested attributes weren't handled by the VNOP,
         * use our fallback code.
         */
-       if (!VATTR_ALL_SUPPORTED(vap) && *vpp) {
+       if ((error == 0) && !VATTR_ALL_SUPPORTED(vap) && *vpp) {
                KAUTH_DEBUG("     CREATE - doing fallback with ACL %p", vap->va_acl);
                error = vnode_setattr_fallback(*vpp, vap, ctx);
        }
+
 #if CONFIG_MACF
-error:
+       if ((error == 0) && !(flags & VN_CREATE_NOLABEL)) {
+               error = vnode_label(vnode_mount(vp), dvp, vp, cnp, VNODE_LABEL_CREATE, ctx);
+       }
 #endif
+
        if ((error != 0) && (vp != (vnode_t)0)) {
                /* If we've done a compound open, close */
                if (batched && (old_error == 0) && (vap->va_type == VREG)) {
index 84627d65ecb57986a1158b06487ec35fc5f493ac..a0a04deb8b43910efb399a0595a1ed2051891b57 100644 (file)
 #include <mach/vfs_nspace.h>
 #include <os/log.h>
 
+#include <nfs/nfs_conf.h>
+
 #if ROUTEFS
 #include <miscfs/routefs/routefs.h>
 #endif /* ROUTEFS */
@@ -266,7 +268,7 @@ extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
  * Virtual File System System Calls
  */
 
-#if NFSCLIENT || DEVFS || ROUTEFS
+#if CONFIG_NFS_CLIENT || DEVFS || ROUTEFS
 /*
  * Private in-kernel mounting spi (NFS only, not exported)
  */
@@ -322,7 +324,7 @@ kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
 
        return error;
 }
-#endif /* NFSCLIENT || DEVFS */
+#endif /* CONFIG_NFS_CLIENT || DEVFS */
 
 /*
  * Mount a file system.
@@ -829,14 +831,14 @@ mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
        /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
        vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
 
-#if NFSCLIENT || DEVFS || ROUTEFS
+#if CONFIG_NFS_CLIENT || DEVFS || ROUTEFS
        if (kernelmount) {
                mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
        }
        if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0) {
                mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
        }
-#endif /* NFSCLIENT || DEVFS */
+#endif /* CONFIG_NFS_CLIENT || DEVFS */
 
 update:
 
@@ -4781,6 +4783,9 @@ linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
 
        error = nameiat(&nd, fd1);
        if (error) {
+               if (error == EPERM) {
+                       printf("XXX 54841485: nameiat() src EPERM\n");
+               }
                return error;
        }
        vp = nd.ni_vp;
@@ -4794,6 +4799,7 @@ linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
        if (vp->v_type == VDIR) {
                if (!ISSET(vp->v_mount->mnt_kern_flag, MNTK_DIR_HARDLINKS)) {
                        error = EPERM;   /* POSIX */
+                       printf("XXX 54841485: VDIR EPERM\n");
                        goto out;
                }
 
@@ -4821,6 +4827,9 @@ linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
        nd.ni_dirp = link;
        error = nameiat(&nd, fd2);
        if (error != 0) {
+               if (error == EPERM) {
+                       printf("XXX 54841485: nameiat() dst EPERM\n");
+               }
                goto out;
        }
        dvp = nd.ni_dvp;
@@ -4828,12 +4837,18 @@ linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
 
 #if CONFIG_MACF
        if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0) {
+               if (error == EPERM) {
+                       printf("XXX 54841485: mac_vnode_check_link() EPERM\n");
+               }
                goto out2;
        }
 #endif
 
        /* or to anything that kauth doesn't want us to (eg. immutable items) */
        if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0) {
+               if (error == EPERM) {
+                       printf("XXX 54841485: vnode_authorize() LINKTARGET EPERM\n");
+               }
                goto out2;
        }
 
@@ -4850,12 +4865,18 @@ linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
 
        /* authorize creation of the target note */
        if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0) {
+               if (error == EPERM) {
+                       printf("XXX 54841485: vnode_authorize() ADD_FILE EPERM\n");
+               }
                goto out2;
        }
 
        /* and finally make the link */
        error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
        if (error) {
+               if (error == EPERM) {
+                       printf("XXX 54841485: VNOP_LINK() EPERM\n");
+               }
                goto out2;
        }
 
@@ -5056,16 +5077,16 @@ symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd,
                error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
        }
 
-#if CONFIG_MACF
+       /* do fallback attribute handling */
        if (error == 0 && vp) {
-               error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
+               error = vnode_setattr_fallback(vp, &va, ctx);
        }
-#endif
 
-       /* do fallback attribute handling */
+#if CONFIG_MACF
        if (error == 0 && vp) {
-               error = vnode_setattr_fallback(vp, &va, ctx);
+               error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
        }
+#endif
 
        if (error == 0) {
                int     update_flags = 0;
@@ -7784,10 +7805,6 @@ clonefile_internal(vnode_t fvp, boolean_t data_read_authorised, int dst_dirfd,
                int fsevent;
 #endif /* CONFIG_FSE */
 
-#if CONFIG_MACF
-               (void)vnode_label(vnode_mount(tvp), tdvp, tvp, cnp,
-                   VNODE_LABEL_CREATE, ctx);
-#endif
                /*
                 * If some of the requested attributes weren't handled by the
                 * VNOP, use our fallback code.
@@ -7796,6 +7813,11 @@ clonefile_internal(vnode_t fvp, boolean_t data_read_authorised, int dst_dirfd,
                        (void)vnode_setattr_fallback(tvp, &nva, ctx);
                }
 
+#if CONFIG_MACF
+               (void)vnode_label(vnode_mount(tvp), tdvp, tvp, cnp,
+                   VNODE_LABEL_CREATE, ctx);
+#endif
+
                // Make sure the name & parent pointers are hooked up
                if (tvp->v_name == NULL) {
                        update_flags |= VNODE_UPDATE_NAME;
@@ -12720,7 +12742,9 @@ static int
 snapshot_mount(int dirfd, user_addr_t name, user_addr_t directory,
     __unused user_addr_t mnt_data, __unused uint32_t flags, vfs_context_t ctx)
 {
+       mount_t mp;
        vnode_t rvp, snapdvp, snapvp, vp, pvp;
+       struct fs_snapshot_mount_args smnt_data;
        int error;
        struct nameidata *snapndp, *dirndp;
        /* carving out a chunk for structs that are too big to be on stack. */
@@ -12756,20 +12780,28 @@ snapshot_mount(int dirfd, user_addr_t name, user_addr_t directory,
 
        vp = dirndp->ni_vp;
        pvp = dirndp->ni_dvp;
+       mp = vnode_mount(rvp);
 
        if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
                error = EINVAL;
-       } else {
-               mount_t mp = vnode_mount(rvp);
-               struct fs_snapshot_mount_args smnt_data;
+               goto out2;
+       }
 
-               smnt_data.sm_mp  = mp;
-               smnt_data.sm_cnp = &snapndp->ni_cnd;
-               error = mount_common(mp->mnt_vfsstat.f_fstypename, pvp, vp,
-                   &dirndp->ni_cnd, CAST_USER_ADDR_T(&smnt_data), flags & MNT_DONTBROWSE,
-                   KERNEL_MOUNT_SNAPSHOT, NULL, FALSE, ctx);
+#if CONFIG_MACF
+       error = mac_mount_check_snapshot_mount(ctx, rvp, vp, &dirndp->ni_cnd, snapndp->ni_cnd.cn_nameptr,
+           mp->mnt_vfsstat.f_fstypename);
+       if (error) {
+               goto out2;
        }
+#endif
 
+       smnt_data.sm_mp  = mp;
+       smnt_data.sm_cnp = &snapndp->ni_cnd;
+       error = mount_common(mp->mnt_vfsstat.f_fstypename, pvp, vp,
+           &dirndp->ni_cnd, CAST_USER_ADDR_T(&smnt_data), flags & MNT_DONTBROWSE,
+           KERNEL_MOUNT_SNAPSHOT, NULL, FALSE, ctx);
+
+out2:
        vnode_put(vp);
        vnode_put(pvp);
        nameidone(dirndp);
index 2fae8525c33804ae590bf07b70ccef894196ecbb..0a9075c05ff87faba7bab90b59f3d222d32dda54 100644 (file)
@@ -73,6 +73,7 @@
 
 #include <kern/assert.h>
 #include <sys/kdebug.h>
+#include <nfs/nfs_conf.h>
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
 #include <nfs/nfs.h>
@@ -430,12 +431,12 @@ vnode_pageout(struct vnode *vp,
                 * of it's pages
                 */
                for (offset = upl_offset; isize; isize -= PAGE_SIZE, offset += PAGE_SIZE) {
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
                        if (vp->v_tag == VT_NFS) {
                                /* check with nfs if page is OK to drop */
                                error = nfs_buf_page_inval(vp, (off_t)f_offset);
                        } else
-#endif
+#endif /* CONFIG_NFS_CLIENT */
                        {
                                blkno = ubc_offtoblk(vp, (off_t)f_offset);
                                error = buf_invalblkno(vp, blkno, 0);
@@ -487,12 +488,12 @@ vnode_pageout(struct vnode *vp,
                         * Note we must not sleep here if the buffer is busy - that is
                         * a lock inversion which causes deadlock.
                         */
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
                        if (vp->v_tag == VT_NFS) {
                                /* check with nfs if page is OK to drop */
                                error = nfs_buf_page_inval(vp, (off_t)f_offset);
                        } else
-#endif
+#endif /* CONFIG_NFS_CLIENT */
                        {
                                blkno = ubc_offtoblk(vp, (off_t)f_offset);
                                error = buf_invalblkno(vp, blkno, 0);
index 4e8d858f5a89adaf4a513cd453923d4920c73973..f2ce580fc82b11bc07fc9a11d44276b8297bcc3b 100644 (file)
@@ -696,6 +696,7 @@ _vfs_sysctl
 _vfs_typenum
 _vfs_unbusy
 _vfs_unmountbyfsid
+_vn_authorize_rmdir
 _vn_authorize_unlink
 _vn_bwrite
 _vn_default_error
index 0010db9f1c07d1852ac882bc743711e4fe2f1eba..d29f1f01c3a71339d73b759d1566762309cbb887 100644 (file)
@@ -1453,6 +1453,7 @@ _gIOResourceMatchKey
 _gIOResourcesKey
 _gIOServiceKey
 _gIOServicePlane
+_gIOSupportedPropertiesKey
 _gIOTerminatedNotification
 _gIOUserClientClassKey
 _gIOWillTerminateNotification
@@ -1678,7 +1679,7 @@ __ZTVN25IODataQueueDispatchSource9MetaClassE
 __ZN25IODataQueueDispatchSource19DequeueWithCoalesceEPbU13block_pointerFvPKvmE
 __ZN25IODataQueueDispatchSource19EnqueueWithCoalesceEjPbU13block_pointerFvPvmE
 
-__ZN11IOMemoryMap17_CopyState_InvokeE5IORPCP15OSMetaClassBasePFiS2_P23IOMemoryMapPrivateStateE
+__ZN11IOMemoryMap17_CopyState_InvokeE5IORPCP15OSMetaClassBasePFiS2_P24_IOMemoryMapPrivateStateE
 __ZN12IOUserClient22AsyncCompletion_InvokeE5IORPCP15OSMetaClassBasePFvS2_P8OSActioniPKyjE
 __ZN12IOUserClient22_ExternalMethod_InvokeE5IORPCP15OSMetaClassBasePFiS2_yPKyjP6OSDataP18IOMemoryDescriptorPyPjyPS6_S8_P8OSActionE
 __ZN12IOUserClient30CopyClientMemoryForType_InvokeE5IORPCP15OSMetaClassBasePFiS2_yPyPP18IOMemoryDescriptorE
@@ -1691,7 +1692,7 @@ __ZN16IODispatchSource13Cancel_InvokeE5IORPCP15OSMetaClassBasePFiS2_U13block_poi
 __ZN16IODispatchSource16SetEnable_InvokeE5IORPCP15OSMetaClassBasePFiS2_bE
 __ZN16IODispatchSource19CheckForWork_InvokeE5IORPCP15OSMetaClassBasePFiS2_S0_bE
 __ZN16IODispatchSource30SetEnableWithCompletion_InvokeE5IORPCP15OSMetaClassBasePFiS2_bU13block_pointerFvvEE
-__ZN18IOMemoryDescriptor17_CopyState_InvokeE5IORPCP15OSMetaClassBasePFiS2_P16IOMDPrivateStateE
+__ZN18IOMemoryDescriptor17_CopyState_InvokeE5IORPCP15OSMetaClassBasePFiS2_P17_IOMDPrivateStateE
 __ZN18IOMemoryDescriptor20PrepareForDMA_InvokeE5IORPCP15OSMetaClassBasePFiS2_yP9IOServiceyyPyS5_PjP16IOAddressSegmentE
 __ZN24IOBufferMemoryDescriptor13Create_InvokeE5IORPCPFiyyyPPS_E
 __ZN24IOBufferMemoryDescriptor16SetLength_InvokeE5IORPCP15OSMetaClassBasePFiS2_yE
index e274ed3e14d7ced158ad61f345caf27c87c8428c..afe5c8e1d5d6a7d0db031b92390d927e784f6cab 100644 (file)
@@ -12,6 +12,7 @@ _mac_label_set
 _mac_audit_text
 
 _mac_iokit_check_hid_control
+_mac_mount_check_snapshot_mount
 _mac_vnode_check_trigger_resolve
 
 _sbuf_cat
index 3a06932c552541acb1dd0635572042d98668f4ec..441ebcc083e9b3832ebb7b4ae7d60122a0ad8f15 100644 (file)
@@ -1,4 +1,4 @@
-19.3.0
+19.4.0
 
 # The first line of this file contains the master version number for the kernel.
 # All other instances of the kernel version in xnu are derived from this file.
index 5447e64e68faeb919845248bab18fea1d8729d9d..0436ff54fae04742ddf06f77139790de2213a17a 100644 (file)
@@ -348,6 +348,7 @@ _pffindproto:_pffindproto_old
 _port_name_to_task
 _port_name_to_thread
 _post_sys_powersource
+_proc_csflags
 _proc_get_syscall_filter_mask_size
 _proc_getexecutableoffset
 _proc_getexecutablevnode
@@ -357,9 +358,12 @@ _proc_pidbackgrounded
 _proc_pidversion
 _proc_set_responsible_pid
 _proc_set_syscall_filter_mask
+_proc_selfcsflags
 _proc_task
 _proc_uniqueid
 _proc_puniqueid
+_proc_gettty
+_proc_gettty_dev
 _proc_exitstatus
 _priv_check_cred
 _pru_abort_notsupp
index 449d66ea549d9b7f872194a5dde77324ae7c0023..9975a16e5b1d6974e1477939c88b87b4be231b15 100644 (file)
@@ -45,8 +45,7 @@
  * IOBufferMemoryDescriptor describes a memory buffer allocated in the callers address space.
  *
  * @discussion
- * To allocate memory for I/O or sharing, use IOBufferMemoryDescriptor::Create()
- * Methods in this class are used for memory that was supplied as a parameter.
+ * To allocate memory for I/O or sharing, use IOBufferMemoryDescriptor::Create().
  * IOBufferMemoryDescriptor can be handed to any API that expects an IOMemoryDescriptor.
  */
 
diff --git a/iokit/DriverKit/IOKitKeys.h b/iokit/DriverKit/IOKitKeys.h
new file mode 100644 (file)
index 0000000..758ac24
--- /dev/null
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+ * Copyright (c) 1999 Apple Computer, Inc.  All rights reserved.
+ *
+ * Common symbol definitions for IOKit.
+ *
+ * HISTORY
+ *
+ */
+
+
+#ifndef _IOKIT_IOKITKEYS_H
+#define _IOKIT_IOKITKEYS_H
+
+// properties found in the registry root
+#define kIOKitBuildVersionKey           "IOKitBuildVersion"
+#define kIOKitDiagnosticsKey            "IOKitDiagnostics"
+// a dictionary keyed by plane name
+#define kIORegistryPlanesKey            "IORegistryPlanes"
+#define kIOCatalogueKey                 "IOCatalogue"
+
+// registry plane names
+#define kIOServicePlane                 "IOService"
+#define kIOPowerPlane                   "IOPower"
+#define kIODeviceTreePlane              "IODeviceTree"
+#define kIOAudioPlane                   "IOAudio"
+#define kIOFireWirePlane                "IOFireWire"
+#define kIOUSBPlane                     "IOUSB"
+
+// registry ID number
+#define kIORegistryEntryIDKey           "IORegistryEntryID"
+// property name to get array of property names
+#define kIORegistryEntryPropertyKeysKey "IORegistryEntryPropertyKeys"
+
+// IOService class name
+#define kIOServiceClass                 "IOService"
+
+// IOResources class name
+#define kIOResourcesClass               "IOResources"
+
+// IOService driver probing property names
+#define kIOClassKey                     "IOClass"
+#define kIOProbeScoreKey                "IOProbeScore"
+#define kIOKitDebugKey                  "IOKitDebug"
+
+// Properties to be supported as API
+#define kIOSupportedPropertiesKey       "IOSupportedProperties"
+// Properties writable by dexts
+#define kIOUserServicePropertiesKey     "IOUserServiceProperties"
+
+
+// IOService matching property names
+#define kIOProviderClassKey             "IOProviderClass"
+#define kIONameMatchKey                 "IONameMatch"
+#define kIOPropertyMatchKey             "IOPropertyMatch"
+#define kIOPropertyExistsMatchKey       "IOPropertyExistsMatch"
+#define kIOPathMatchKey                 "IOPathMatch"
+#define kIOLocationMatchKey             "IOLocationMatch"
+#define kIOParentMatchKey               "IOParentMatch"
+#define kIOResourceMatchKey             "IOResourceMatch"
+#define kIOResourceMatchedKey           "IOResourceMatched"
+#define kIOMatchedServiceCountKey       "IOMatchedServiceCountMatch"
+
+#define kIONameMatchedKey               "IONameMatched"
+
+#define kIOMatchCategoryKey             "IOMatchCategory"
+#define kIODefaultMatchCategoryKey      "IODefaultMatchCategory"
+
+#define kIOMatchedPersonalityKey        "IOMatchedPersonality"
+#define kIORematchPersonalityKey        "IORematchPersonality"
+#define kIORematchCountKey              "IORematchCount"
+#define kIODEXTMatchCountKey            "IODEXTMatchCount"
+
+// Entitlements to check against dext process
+// Property is an array, one or more of which may match, of:
+//   an array of entitlement strings, all must be present
+// Any array can be a single string.
+#define kIOServiceDEXTEntitlementsKey   "IOServiceDEXTEntitlements"
+
+// Entitlement required to open dext connection
+#define kIODriverKitEntitlementKey      "com.apple.developer.driverkit"
+
+// Entitlements required to open dext IOUserClient
+// Property is an array of strings containing CFBundleIdentifiers of service being opened
+#define kIODriverKitUserClientEntitlementsKey "com.apple.developer.driverkit.userclient-access"
+
+// Other DriverKit entitlements
+#define kIODriverKitUSBTransportEntitlementKey "com.apple.developer.driverkit.transport.usb"
+#define kIODriverKitHIDTransportEntitlementKey "com.apple.developer.driverkit.transport.hid"
+#define kIODriverKitHIDFamilyDeviceEntitlementKey "com.apple.developer.driverkit.family.hid.device"
+#define kIODriverKitHIDFamilyEventServiceEntitlementKey "com.apple.developer.driverkit.family.hid.eventservice"
+#define kIODriverKitTransportBuiltinEntitlementKey "com.apple.developer.driverkit.builtin"
+
+// Entitlement required to read nvram root-only properties as non-root user
+#define kIONVRAMReadAccessKey           "com.apple.private.iokit.nvram-read-access"
+// Entitlement required to write nvram properties as non-root user
+#define kIONVRAMWriteAccessKey           "com.apple.private.iokit.nvram-write-access"
+
+// When possible, defer matching of this driver until kextd has started.
+#define kIOMatchDeferKey                                "IOMatchDefer"
+
+// Published after processor_start() has been called on all CPUs at boot time.
+#define kIOAllCPUInitializedKey                         "IOAllCPUInitialized"
+
+// IOService default user client class, for loadable user clients
+#define kIOUserClientClassKey           "IOUserClientClass"
+
+// key to find IOMappers
+#define kIOMapperIDKey                          "IOMapperID"
+
+#define kIOUserClientCrossEndianKey             "IOUserClientCrossEndian"
+#define kIOUserClientCrossEndianCompatibleKey   "IOUserClientCrossEndianCompatible"
+#define kIOUserClientSharedInstanceKey          "IOUserClientSharedInstance"
+#if KERNEL_PRIVATE
+#define kIOUserClientMessageAppSuspendedKey     "IOUserClientMessageAppSuspended"
+#endif
+// diagnostic string describing the creating task
+#define kIOUserClientCreatorKey         "IOUserClientCreator"
+// the expected cdhash value of the userspace driver executable
+#define kIOUserServerCDHashKey          "IOUserServerCDHash"
+
+#define kIOUserUserClientKey                    "IOUserUserClient"
+
+
+// IOService notification types
+#define kIOPublishNotification          "IOServicePublish"
+#define kIOFirstPublishNotification     "IOServiceFirstPublish"
+#define kIOMatchedNotification          "IOServiceMatched"
+#define kIOFirstMatchNotification       "IOServiceFirstMatch"
+#define kIOTerminatedNotification       "IOServiceTerminate"
+#define kIOWillTerminateNotification    "IOServiceWillTerminate"
+
+// IOService interest notification types
+#define kIOGeneralInterest              "IOGeneralInterest"
+#define kIOBusyInterest                 "IOBusyInterest"
+#define kIOAppPowerStateInterest        "IOAppPowerStateInterest"
+#define kIOPriorityPowerStateInterest   "IOPriorityPowerStateInterest"
+
+#define kIOPlatformDeviceMessageKey     "IOPlatformDeviceMessage"
+
+// IOService interest notification types
+#define kIOCFPlugInTypesKey             "IOCFPlugInTypes"
+
+#define kIOCompatibilityMatchKey            "IOCompatibilityMatch"
+#define kIOCompatibilityPropertiesKey   "IOCompatibilityProperties"
+#define kIOPathKey                                      "IOPath"
+
+
+// properties found in services that implement command pooling
+#define kIOCommandPoolSizeKey           "IOCommandPoolSize"             // (OSNumber)
+
+// properties found in services that implement priority
+#define kIOMaximumPriorityCountKey      "IOMaximumPriorityCount"        // (OSNumber)
+
+// properties found in services that have transfer constraints
+#define kIOMaximumBlockCountReadKey             "IOMaximumBlockCountRead"             // (OSNumber)
+#define kIOMaximumBlockCountWriteKey            "IOMaximumBlockCountWrite"            // (OSNumber)
+#define kIOMaximumByteCountReadKey              "IOMaximumByteCountRead"              // (OSNumber)
+#define kIOMaximumByteCountWriteKey             "IOMaximumByteCountWrite"             // (OSNumber)
+#define kIOMaximumSegmentCountReadKey           "IOMaximumSegmentCountRead"           // (OSNumber)
+#define kIOMaximumSegmentCountWriteKey          "IOMaximumSegmentCountWrite"          // (OSNumber)
+#define kIOMaximumSegmentByteCountReadKey       "IOMaximumSegmentByteCountRead"       // (OSNumber)
+#define kIOMaximumSegmentByteCountWriteKey      "IOMaximumSegmentByteCountWrite"      // (OSNumber)
+#define kIOMinimumSegmentAlignmentByteCountKey  "IOMinimumSegmentAlignmentByteCount"  // (OSNumber)
+#define kIOMaximumSegmentAddressableBitCountKey "IOMaximumSegmentAddressableBitCount" // (OSNumber)
+#define kIOMinimumSaturationByteCountKey        "IOMinimumSaturationByteCount"        // (OSNumber)
+#define kIOMaximumSwapWriteKey                  "IOMaximumSwapWrite"                  // (OSNumber)
+
+// properties found in services that wish to describe an icon
+//
+// IOIcon =
+// {
+//     CFBundleIdentifier   = "com.example.driver.example";
+//     IOBundleResourceFile = "example.icns";
+// };
+//
+// where IOBundleResourceFile is the filename of the resource
+
+#define kIOIconKey               "IOIcon"               // (OSDictionary)
+#define kIOBundleResourceFileKey "IOBundleResourceFile" // (OSString)
+
+#define kIOBusBadgeKey           "IOBusBadge"           // (OSDictionary)
+#define kIODeviceIconKey         "IODeviceIcon"         // (OSDictionary)
+
+// property of root that describes the machine's serial number as a string
+#define kIOPlatformSerialNumberKey      "IOPlatformSerialNumber"        // (OSString)
+
+// property of root that describes the machine's UUID as a string
+#define kIOPlatformUUIDKey      "IOPlatformUUID"        // (OSString)
+
+// IODTNVRAM property keys
+#define kIONVRAMBootArgsKey             "boot-args"
+#define kIONVRAMDeletePropertyKey       "IONVRAM-DELETE-PROPERTY"
+#define kIONVRAMSyncNowPropertyKey      "IONVRAM-SYNCNOW-PROPERTY"
+#define kIONVRAMActivateCSRConfigPropertyKey    "IONVRAM-ARMCSR-PROPERTY"
+#define kIODTNVRAMPanicInfoKey          "aapl,panic-info"
+
+// keys for complex boot information
+#define kIOBootDeviceKey          "IOBootDevice"                // dict | array of dicts
+#define kIOBootDevicePathKey      "IOBootDevicePath"    // arch-neutral OSString
+#define kIOBootDeviceSizeKey      "IOBootDeviceSize"    // OSNumber of bytes
+
+// keys for OS Version information
+#define kOSBuildVersionKey              "OS Build Version"
+
+#endif /* ! _IOKIT_IOKITKEYS_H */
index 760d48cb1eb7f7be8c7703d87b8b815cdbd634cf..c2c12063b5178270454b7a4e541f3e5b5b367325 100644 (file)
@@ -64,7 +64,7 @@ struct IOAddressSegment {
        uint64_t length;
 };
 
-struct IOMDPrivateState {
+struct _IOMDPrivateState {
        uint64_t length;
        uint64_t options;
 };
@@ -159,7 +159,7 @@ class EXTENDS (IOMemoryDescriptor) IOMemoryDescriptorPrivate
 {
        virtual kern_return_t
        _CopyState(
-               IOMDPrivateState * state);
+               _IOMDPrivateState * state);
 };
 
 
index 716c87f09ec254bbd214838d8fc639572258fb97..4e49f8de115c04135ac759dbed69fbf355506939 100644 (file)
@@ -37,7 +37,7 @@
 
 #include <DriverKit/OSObject.iig>
 
-struct IOMemoryMapPrivateState {
+struct _IOMemoryMapPrivateState {
        uint64_t length;
        uint64_t offset;
        uint64_t options;
@@ -91,7 +91,7 @@ class EXTENDS (IOMemoryMap) IOMemoryMapPrivate
 {
        virtual kern_return_t
        _CopyState(
-               IOMemoryMapPrivateState * state);
+               _IOMemoryMapPrivateState * state);
 };
 
 #endif /* ! _IOKIT_UIOMEMORYMAP_H */
index 5175ee7d0aff65aa66f09cbacd75c7514c3c21bd..13eb9b25d06ea38a827c34c4f21c10e9acbc19a5 100644 (file)
@@ -46,8 +46,11 @@ extern "C" {
 
 #else  /* PLATFORM_DriverKit */
 
-#ifndef _MACH_ERROR_H_
-#define _MACH_ERROR_H_
+#ifdef DRIVERKIT_PRIVATE
+
+#include <mach/error.h>
+
+#else  /* DRIVERKIT_PRIVATE */
 
 typedef int             kern_return_t;
 
@@ -76,7 +79,7 @@ typedef int             kern_return_t;
 #define sub_emask               (err_sub(0xfff))
 #define code_emask              (0x3fff)
 
-#endif  /* _MACH_ERROR_H_ */
+#endif  /* DRIVERKIT_PRIVATE */
 
 #endif /* PLATFORM_DriverKit */
 
@@ -112,6 +115,9 @@ typedef kern_return_t           IOReturn;
 #define sub_iokit_smc                     err_sub(32)
 #endif
 #define sub_iokit_apfs                    err_sub(33)
+#define sub_iokit_acpiec                  err_sub(34)
+#define sub_iokit_timesync_avb            err_sub(35)
+
 #define sub_iokit_platform                err_sub(0x2A)
 #define sub_iokit_audio_video             err_sub(0x45)
 #define sub_iokit_cec                     err_sub(0x46)
index 5885850dc21f1a40a965367cb8e22b33a988b4cd..274950e8a825c152b04cb215c93a9f0f3cf359cd 100644 (file)
@@ -40,6 +40,7 @@
 class IOMemoryDescriptor;
 class IOBufferMemoryDescriptor;
 class IOUserClient;
+class OSAction;
 
 typedef char IOServiceName[128];
 typedef char IOPropertyName[128];
@@ -245,7 +246,83 @@ public:
        Create(
                IOService          * provider,
                const IOPropertyName propertiesKey,
-               IOService         ** result);
+               IOService         ** result) LOCAL;
+
+    /*!
+     * @brief       Start an IOService termination.
+     * @discussion  An IOService object created with Create() may be removed by calling Terminate().
+     *              The termination is asynchronous and will later call Stop() on the service.
+     * @param       options No options are currently defined, pass zero.
+     * @return      kIOReturnSuccess on success. See IOReturn.h for error codes.
+     */
+       virtual kern_return_t
+       Terminate(
+               uint64_t                         options);
+
+   /*!
+    * @brief       Obtain supportable properties describing the provider chain.
+    * @discussion  Obtain supportable properties describing the provider chain. This will be a subset of registry
+    *              properties the OS considers supportable.
+    *              The array is ordered with a dictionary of properties for each entry in the provider chain from this
+    *              service towards the root.
+    * @param       propertyKeys If only certain property values are need, they may be passed in this array.
+    * @param       properties Returned, retained array of dictionaries of properties or NULL. The caller should release
+    *              this array.
+    * @return      kIOReturnSuccess on success. See IOReturn.h for error codes.
+    */
+       virtual kern_return_t
+       CopyProviderProperties(
+               OSArray  * propertyKeys,
+               OSArray ** properties);
+
+
+       /*! @function IOCreatePropertyMatchingDictionary
+        *   @abstract Construct a matching dictionary for property matching.
+        */
+       static OSDictionary *
+       CreatePropertyMatchingDictionary(const char * key, OSObjectPtr value, OSDictionary * matching) LOCALONLY;
+
+       /*! @function IOCreatePropertyMatchingDictionary
+        *   @abstract Construct a matching dictionary for property matching.
+        */
+       static OSDictionary *
+       CreatePropertyMatchingDictionary(const char * key, const char * stringValue, OSDictionary * matching) LOCALONLY;
+
+       /*! @function IOCreateKernelClassMatchingDictionary
+        *   @abstract Construct a matching dictionary for kernel class matching.
+        */
+       static OSDictionary *
+       CreateKernelClassMatchingDictionary(OSString * className, OSDictionary * matching) LOCALONLY;
+
+       /*! @function IOCreateKernelClassMatchingDictionary
+        *   @abstract Construct a matching dictionary for kernel class matching.
+        */
+       static OSDictionary *
+       CreateKernelClassMatchingDictionary(const char * className, OSDictionary * matching) LOCALONLY;
+
+       /*! @function IOCreateUserClassMatchingDictionary
+        *   @abstract Construct a matching dictionary for user class matching.
+        */
+       static OSDictionary *
+       CreateUserClassMatchingDictionary(OSString * className, OSDictionary * matching) LOCALONLY;
+
+       /*! @function IOCreateUserClassMatchingDictionary
+        *   @abstract Construct a matching dictionary for user class matching.
+        */
+       static OSDictionary *
+       CreateUserClassMatchingDictionary(const char * className, OSDictionary * matching) LOCALONLY;
+
+       /*! @function IOCreateNameMatchingDictionary
+        *   @abstract Construct a matching dictionary for IOService name matching.
+        */
+       static OSDictionary *
+       CreateNameMatchingDictionary(OSString * serviceName, OSDictionary * matching) LOCALONLY;
+
+       /*! @function IOCreateNameMatchingDictionary
+        *   @abstract Construct a matching dictionary for IOService name matching.
+        */
+       static OSDictionary *
+       CreateNameMatchingDictionary(const char * serviceName, OSDictionary * matching) LOCALONLY;
 };
 
 #endif /* ! _IOKIT_UIOSERVICE_H */
diff --git a/iokit/DriverKit/IOServiceNotificationDispatchSource.iig b/iokit/DriverKit/IOServiceNotificationDispatchSource.iig
new file mode 100644 (file)
index 0000000..babc74c
--- /dev/null
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2019-2019 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _IOKIT_UIOSERVICEDISPATCHSOURCE_H
+#define _IOKIT_UIOSERVICEDISPATCHSOURCE_H
+
+#include <DriverKit/IODispatchQueue.iig>
+#include <DriverKit/OSAction.iig>
+#include <DriverKit/IOService.iig>
+
+
+typedef void (^IOServiceNotificationBlock)(uint64_t type, IOService * service, uint64_t options);
+
+enum {
+       kIOServiceNotificationTypeTerminated = 0x00000000,
+       kIOServiceNotificationTypeMatched    = 0x00000001,
+       kIOServiceNotificationTypeLast       = kIOServiceNotificationTypeMatched,
+       kIOServiceNotificationTypeNone       = 0xFFFFFFFF,
+};
+
+class NATIVE KERNEL IOServiceNotificationDispatchSource : public IODispatchSource
+{
+public:
+
+    /*!
+     * @brief       Create an IOServiceNotificationDispatchSource for IOService matching and termination events.
+     * @param       matching An IOService matching dictionary.
+     * @param       options None defined, pass zero.
+     * @param       queue IODispatchQueue the source is attached to. Note that the ServiceNotificationReady
+     *              handler is invoked on the queue set for the target method
+     *              of the OSAction, not this queue.
+     * @param       source Created source with +1 retain count to be released by the caller.
+     * @return      kIOReturnSuccess on success. See IOReturn.h for error codes.
+     */
+       static kern_return_t
+       Create(
+               OSDictionary      *     matching,
+               uint64_t            options,
+           IODispatchQueue   * queue,
+               IOServiceNotificationDispatchSource ** notification) LOCAL;
+
+       virtual bool
+       init() override;
+
+       virtual void
+       free() override;
+
+    /*!
+     * @brief       Control the enable state of the notification.
+     * @param       enable Pass true to enable the source or false to disable.
+     * @param       handler Optional block to be executed after the interrupt has been disabled and any pending
+     *              interrupt handlers completed.
+     * @return      kIOReturnSuccess on success. See IOReturn.h for error codes.
+     */
+       virtual kern_return_t
+       SetEnableWithCompletion(
+               bool enable,
+               IODispatchSourceCancelHandler handler) override LOCAL;
+
+    /*!
+     * @brief       Cancel all callbacks from the event source.
+     * @discussion  After cancellation, the source can only be freed. It cannot be reactivated.
+     * @param       handler Handler block to be invoked after any callbacks have completed.
+     * @return      kIOReturnSuccess on success. See IOReturn.h for error codes.
+     */
+       virtual kern_return_t
+       Cancel(IODispatchSourceCancelHandler handler) override LOCAL;
+
+    /*!
+     * @brief       Set the handler block to run when the notification has become ready.
+     * @param       action OSAction instance specifying the callback method. The OSAction object will be retained
+     *              until SetHandler is called again or the event source is cancelled.
+     *              The ServiceNotificationReady handler is invoked on the queue set for the target method of the
+     *              OSAction.
+     * @return      kIOReturnSuccess on success. See IOReturn.h for error codes.
+     */
+       virtual kern_return_t
+       SetHandler(
+       OSAction * action TYPE(ServiceNotificationReady));
+
+    /*!
+     * @brief       Invoke a block for each notification available in response to ServiceNotificationReady.
+     * @discussion  The IOService object passed to the notification is only retained for the duration of the block.
+     *              It should be retained by the block code if used beyond the invocation.
+     * @param       block to be invoked with each notification
+     * @return      kIOReturnSuccess on success. See IOReturn.h for error codes.
+     */
+       kern_return_t
+       DeliverNotifications(IOServiceNotificationBlock block) LOCALONLY;
+
+private:
+       virtual kern_return_t
+       CheckForWork(bool synchronous) override LOCAL;
+
+       virtual void
+       ServiceNotificationReady(
+               OSAction  * action TARGET) LOCAL = 0;
+
+       virtual kern_return_t
+       CopyNextNotification(
+               uint64_t   * type,
+               IOService ** service,
+               uint64_t   * options);
+};
+
+#endif /* ! _IOKIT_UIOSERVICEDISPATCHSOURCE_H */
index 62ab74bc258b344b693427b31f9cf569c5e1b162..e797df2d2c8891acaded3fae4cabeb6f52ec176c 100644 (file)
@@ -29,7 +29,7 @@ GENERATED_IMPL = $(patsubst %.iig,%.iig.cpp,$(ALL_DEFS))
 INSTALL_MI_LIST = $(ALL_DEFS)
 INSTALL_DRIVERKIT_MI_LIST = $(ALL_DEFS)
 
-OTHER_HEADERS = IOTypes.h IOReturn.h IORPC.h
+OTHER_HEADERS = IOTypes.h IOReturn.h IORPC.h IOKitKeys.h
 
 EXPORT_MI_GEN_LIST = $(GENERATED_HEADERS)  $(OTHER_HEADERS)
 INSTALL_MI_GEN_LIST = $(GENERATED_HEADERS)  $(OTHER_HEADERS)
index 999205c4ed34d2ad6df53f953037e7706def8875..ddb3b2e176b0caa75c8f23af647906048e0749e9 100644 (file)
@@ -33,6 +33,7 @@
 
 typedef void (^OSActionCancelHandler)(void);
 typedef void (^OSActionAbortedHandler)(void);
+struct OSActionWaitToken;
 
 /*!
  * @class OSAction
@@ -44,13 +45,22 @@ typedef void (^OSActionAbortedHandler)(void);
  * The callback is specified as a method and object pair.
  * State associated with the callback may be allocated and stored for the creator of the object.
  * Methods to allocate an OSAction instance are generated for each method defined in a class with
- * a TYPE attribute, so there should not be any need to directly call OSAction::Create().
+ * a TYPE attribute. The generated methods are named CreateAction{name of method with type attribute}
+ * and have the following declaration:
+ *
+ * kern_return_t CreateActionNameOfMethod(size_t referenceSize, OSAction **action);
+ *
+ * referenceSize refers to the size of additional state structure available to the creator of the OSAction
+ * with GetReference. If successful, the generated method returns kIOReturnSuccess and a created OSAction
+ * through the 'action' parameter with a +1 retain count to be released by the caller. See IOReturn.h for
+ * error codes.
  */
 
 class NATIVE KERNEL OSAction : public OSObject
 {
 public:
 
+#if DRIVERKIT_PRIVATE
     /*!
      * @brief       Create an instance of OSAction.
         * @discussion  Methods to allocate an OSAction instance are generated for each method defined in a class with
@@ -72,6 +82,7 @@ public:
                uint64_t        msgid,
                size_t          referenceSize,
                OSAction     ** action) LOCAL;
+#endif
 
        virtual void
        free() override;
@@ -105,6 +116,46 @@ public:
        kern_return_t
        SetAbortedHandler(OSActionAbortedHandler handler) LOCALONLY;
 
+    /*!
+     * @brief       Mark this OSAction to be waited for later with Wait().
+     * @discussion  This call should be made before any possible invocation of the action.
+     *              An OSAction instance only supports one waiter and WillWait() will return an error if already called.
+     * @param       token Opaque value to be passed to a later call to Wait() and EndWait().
+     * @return      kIOReturnSuccess on success. See IOReturn.h for error codes.
+     */
+       kern_return_t
+       WillWait(OSActionWaitToken ** token) LOCALONLY;
+
+    /*!
+     * @brief       Discard the OSActionWaitToken for the action.
+     * @discussion  Free any resources needed to wait for the action allocated by WillWait().
+     *              There should be no outstanding invocations of the action when EndWait is called,
+     *              if necessary the action should be canceled before calling EndWait().
+     * @param       token Opaque value to be passed from an earlier call to WillWait().
+     * @return      kIOReturnSuccess on success. kIOReturnAborted if aborted or canceled.
+                                       kIOReturnTimeout if the deadline was passed. See IOReturn.h for error codes.
+     */
+       kern_return_t
+       EndWait(
+               OSActionWaitToken * token) LOCALONLY;
+
+    /*!
+     * @brief       Wait for the action to be invoked.
+     * @discussion  The current thread is blocked until the action invocation has completed, the action canceled
+                                       or aborted, or the deadline passed.
+     * @param       token Opaque value to be passed from an earlier call to WillWait().
+     * @param       options Pass one of the kIOTimerClock* options to specify the timebase for the
+     *              deadline.
+     * @param       deadline Pass the time the wait should timeout, or zero for no timeout.
+     * @return      kIOReturnSuccess on success. kIOReturnAborted if aborted or canceled.
+                                       kIOReturnTimeout if the deadline was passed. See IOReturn.h for error codes.
+     */
+       kern_return_t
+       Wait(
+               OSActionWaitToken * token,
+               uint64_t options,
+               uint64_t deadline) LOCALONLY;
+
        virtual void
        Aborted(void) LOCAL;
 };
index 38b55fab373c36b81858f8da46171d39994fbc8e..f97de5aa9169cd62989b37843ff26001507c8082 100644 (file)
@@ -50,42 +50,75 @@ class OSObject;
 typedef OSObject * OSObjectPtr;
 #endif
 
-#if __IIG && !__IIG_ATTRIBUTES_DEFINED__
+#if !__IIG_ATTRIBUTES_DEFINED__
 
 #define __IIG_ATTRIBUTES_DEFINED__     1
 
-#define KERNEL       __attribute__((annotate("kernel")))
-#define NATIVE       __attribute__((annotate("native")))
-#define LOCAL        __attribute__((annotate("local")))
-#define LOCALONLY    __attribute__((annotate("localonly")))
-#define REMOTE       __attribute__((annotate("remote")))
-
-#define LOCALHOST    __attribute__((annotate("localhost")))
-
-#define INVOKEREPLY  __attribute__((annotate("invokereply")))
-#define REPLY        __attribute__((annotate("reply")))
-
-#define PORTMAKESEND __attribute__((annotate("MACH_MSG_TYPE_MAKE_SEND")))
-#define PORTCOPYSEND __attribute__((annotate("MACH_MSG_TYPE_COPY_SEND")))
-
-#define TARGET       __attribute__((annotate("target")))
-#define TYPE(p)      __attribute__((annotate("type=" # p)))
-
-//#define ARRAY(maxcount) __attribute__((annotate(# maxcount), annotate("array")))
-#define EXTENDS(cls) __attribute__((annotate("extends=" # cls)))
-
-//#define INTERFACE    __attribute__((annotate("interface")))
-//#define IMPLEMENTS(i)   void implements(i *);
-
-#define QUEUENAME(name) __attribute__((annotate("queuename=" # name)))
-
-#define IIG_SERIALIZABLE __attribute__((annotate("serializable")))
-
-#else
+#if __IIG || __DOCUMENTATION__
+
+#define IIG_KERNEL                             __attribute__((annotate("kernel")))
+#define IIG_NATIVE                     __attribute__((annotate("native")))
+#define IIG_LOCAL                      __attribute__((annotate("local")))
+#define IIG_LOCALONLY                  __attribute__((annotate("localonly")))
+#define IIG_REMOTE                     __attribute__((annotate("remote")))
+#define IIG_LOCALHOST                  __attribute__((annotate("localhost")))
+#define IIG_INVOKEREPLY                __attribute__((annotate("invokereply")))
+#define IIG_REPLY                      __attribute__((annotate("reply")))
+#define IIG_PORTMAKESEND               __attribute__((annotate("MACH_MSG_TYPE_MAKE_SEND")))
+#define IIG_PORTCOPYSEND               __attribute__((annotate("MACH_MSG_TYPE_COPY_SEND")))
+#define IIG_TARGET                     __attribute__((annotate("target")))
+#define IIG_TYPE(p)                    __attribute__((annotate("type=" # p)))
+//#define IIG_ARRAY(maxcount)  __attribute__((annotate(# maxcount), annotate("array")))
+#define IIG_EXTENDS(cls)               __attribute__((annotate("extends=" # cls)))
+//#define IIG_INTERFACE                __attribute__((annotate("interface")))
+//#define IIG_IMPLEMENTS(i)            void __implements(i *);
+#define IIG_QUEUENAME(name)            __attribute__((annotate("queuename=" # name)))
+#define IIG_SERIALIZABLE               __attribute__((annotate("serializable")))
 
+#if __IIG
+#define KERNEL                                 IIG_KERNEL
+#endif /* __IIG */
+#define NATIVE                                 IIG_NATIVE
+#define LOCAL                                  IIG_LOCAL
+#define LOCALONLY                              IIG_LOCALONLY
+#define REMOTE                                 IIG_REMOTE
+#define LOCALHOST                              IIG_LOCALHOST
+#define INVOKEREPLY                            IIG_INVOKEREPLY
+#define REPLY                                  IIG_REPLY
+#define PORTMAKESEND                   IIG_PORTMAKESEND
+#define PORTCOPYSEND                   IIG_PORTCOPYSEND
+#define TARGET                                 IIG_TARGET
+#define TYPE(p)                                        IIG_TYPE(p)
+//#define ARRAY(maxcount)              IIG_ARRAY(maxcount)
+#define EXTENDS(cls)                   IIG_EXTENDS(cls)
+//#define INTERFACE                            IIG_INTERFACE
+//#define IMPLEMENTS(i)                        IIG_IMPLEMENTS(i)
+#define QUEUENAME(name)                        IIG_QUEUENAME(name)
+
+#else /* __IIG || __DOCUMENTATION__ */
+
+#define IIG_KERNEL
+#define IIG_NATIVE
+#define IIG_LOCAL
+#define IIG_LOCALONLY
+#define IIG_REMOTE
+#define IIG_LOCALHOST
+#define IIG_INVOKEREPLY
+#define IIG_REPLY
+#define IIG_PORTMAKESEND
+#define IIG_PORTCOPYSEND
+#define IIG_TARGET
+#define IIG_TYPE(p)
+//#define IIG_ARRAY(maxcount)
+#define IIG_EXTENDS(cls)
+//#define IIG_INTERFACE
+//#define IIG_IMPLEMENTS(i)
+#define IIG_QUEUENAME(name)
 #define IIG_SERIALIZABLE
 
-#endif /* __IIG */
+#endif /* __IIG || __DOCUMENTATION__ */
+
+#endif /* __IIG_ATTRIBUTES_DEFINED__ */
 
 
 #if !__IIG
index 682625f4302bc35d48da5aaf215b1e4b3fcce366..01f8f295f55a3c413a2019782cd9b7b64c8fabd6 100644 (file)
@@ -174,6 +174,9 @@ public:
  *   @param unload Flag to cause the actual unloading of the module.
  */
        IOReturn terminateDriversForModule( const char * moduleName, bool unload = true);
+#if XNU_KERNEL_PRIVATE
+       IOReturn terminateDrivers(OSDictionary * matching, io_name_t className);
+#endif /* XNU_KERNEL_PRIVATE */
 
 /*!
  *   @function startMatching
@@ -227,7 +230,6 @@ private:
        IOReturn unloadModule( OSString * moduleName ) const;
 
        IOReturn _removeDrivers(OSDictionary * matching);
-       IOReturn _terminateDrivers(OSDictionary * matching);
 };
 
 extern const OSSymbol * gIOClassKey;
index e8db76ced32d47bea16e33487a0c395d9a263d3f..34da31735a939090ee57f207db5fc1ae143d0271 100644 (file)
 #define kIOProbeScoreKey                "IOProbeScore"
 #define kIOKitDebugKey                  "IOKitDebug"
 
+// Properties to be supported as API
+#define kIOSupportedPropertiesKey       "IOSupportedProperties"
+// Properties writable by dexts
+#define kIOUserServicePropertiesKey     "IOUserServiceProperties"
+
+
 // IOService matching property names
 #define kIOProviderClassKey             "IOProviderClass"
 #define kIONameMatchKey                 "IONameMatch"
index f7cc9eae6094bc416bc52155234b1c9b4373a239..55973bad621682535e844b0332eeb9370662f537 100644 (file)
@@ -155,6 +155,7 @@ extern mach_port_name_t iokit_make_send_right( task_t task,
     io_object_t obj, ipc_kobject_type_t type );
 
 extern mach_port_t ipc_port_make_send(mach_port_t);
+extern mach_port_t ipc_port_copy_send(mach_port_t);
 extern void ipc_port_release_send(ipc_port_t port);
 
 extern io_object_t iokit_lookup_io_object(ipc_port_t port, ipc_kobject_type_t type);
@@ -185,6 +186,8 @@ extern mach_msg_header_t * ipc_kmsg_msg_header(ipc_kmsg_t);
 extern kern_return_t
 uext_server(ipc_kmsg_t request, ipc_kmsg_t * preply);
 
+extern kern_return_t
+iokit_label_dext_task(task_t task);
 
 /*
  * Functions imported by iokit:IOMemoryDescriptor.cpp
index c9e059654e354384b4e82092f5f8c8e533b63751..af3b946ea256d755e0ce7e40f13f0097ee38cd6b 100644 (file)
@@ -184,6 +184,11 @@ public:
            void *arg0 = NULL, void *arg1 = NULL,
            void *arg2 = NULL, void *arg3 = NULL);
 
+#ifdef __BLOCKS__
+       typedef IOReturn (^ActionBlock)(void);
+       IOReturn runPropertyActionBlock(ActionBlock block);
+#endif /* __BLOCKS__ */
+
 private:
 #if __LP64__
        OSMetaClassDeclareReservedUnused(IORegistryEntry, 0);
index d932032226f820a972d0b931767509ba1e6460a4..13eb9b25d06ea38a827c34c4f21c10e9acbc19a5 100644 (file)
@@ -46,6 +46,12 @@ extern "C" {
 
 #else  /* PLATFORM_DriverKit */
 
+#ifdef DRIVERKIT_PRIVATE
+
+#include <mach/error.h>
+
+#else  /* DRIVERKIT_PRIVATE */
+
 typedef int             kern_return_t;
 
 #define KERN_SUCCESS                    0
@@ -73,6 +79,8 @@ typedef int             kern_return_t;
 #define sub_emask               (err_sub(0xfff))
 #define code_emask              (0x3fff)
 
+#endif  /* DRIVERKIT_PRIVATE */
+
 #endif /* PLATFORM_DriverKit */
 
 typedef kern_return_t           IOReturn;
index fdfded6611fa98184166d550e31df3b1057b6e1b..28c99e74c6bdd8b459cf90986090281ef01ffc18 100644 (file)
@@ -162,6 +162,9 @@ extern const OSSymbol *     gIODeviceMemoryKey;
 extern const OSSymbol *     gIOInterruptControllersKey;
 extern const OSSymbol *     gIOInterruptSpecifiersKey;
 
+extern const OSSymbol *     gIOSupportedPropertiesKey;
+extern const OSSymbol *     gIOUserServicePropertiesKey;
+
 extern const OSSymbol *     gIOBSDKey;
 extern const OSSymbol *     gIOBSDNameKey;
 extern const OSSymbol *     gIOBSDMajorKey;
index 76d6a4ef35ae064d0b5e4a46d62709175c9a8247..6db286316bc4bc32fdbd0dfb980192ae235c0358 100644 (file)
@@ -103,10 +103,8 @@ enum {
        kIOPMDeviceUsable               = 0x00008000,
        kIOPMLowPower                   = 0x00010000,
 #if PRIVATE
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
        kIOPMAOTPower                   = 0x00020000,
        kIOPMAOTCapability              = kIOPMAOTPower,
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
 #endif /* PRIVATE */
        kIOPMPreventIdleSleep           = 0x00000040,
        kIOPMSleepCapability            = 0x00000004,
index ee04a50724eacc04bc8200fca20f97089dcde411..fd91d71e9ab448a3989b3faf68f4f2071cf13354 100644 (file)
@@ -693,7 +693,6 @@ enum {
 
 #define kIOPMWakeEventSource                0x00000001
 
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
 /*****************************************************************************
  *
  * AOT defs
@@ -769,8 +768,6 @@ struct IOPMAOTMetrics
 
 #define kIOPMAOTPowerKey    "aot-power"
 
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
-
 /*****************************************************************************
  *
  * System Sleep Policy
index 61334b1cda8aab3db9611f2a1505fa6f831e6575..22e38474e26701ddf2283f73174ea083558dd8bc 100644 (file)
@@ -808,7 +808,6 @@ private:
        OSArray *               _systemWakeEventsArray;
        bool                    _acceptSystemWakeEvents;
 
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
        // AOT --
        IOPMCalendarStruct   _aotWakeTimeCalendar;
        IOTimerEventSource * _aotTimerES;
@@ -838,7 +837,6 @@ public:
        bool        isAOTMode(void);
 private:
        // -- AOT
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
 
        void        updateTasksSuspend(void);
        int         findSuspendedPID(uint32_t pid, uint32_t *outRefCount);
index 7c0201e4df7c2381f379c5e62bde771231810414..6f8f07890de39d3a1daf4ca263a1c16f20168d0f 100644 (file)
@@ -557,17 +557,13 @@ IOCatalogue::unloadModule(OSString * moduleName) const
 }
 
 IOReturn
-IOCatalogue::_terminateDrivers(OSDictionary * matching)
+IOCatalogue::terminateDrivers(OSDictionary * matching, io_name_t className)
 {
        OSDictionary         * dict;
        OSIterator           * iter;
        IOService            * service;
        IOReturn               ret;
 
-       if (!matching) {
-               return kIOReturnBadArgument;
-       }
-
        ret = kIOReturnSuccess;
        dict = NULL;
        iter = IORegistryIterator::iterateOver(gIOServicePlane,
@@ -576,25 +572,70 @@ IOCatalogue::_terminateDrivers(OSDictionary * matching)
                return kIOReturnNoMemory;
        }
 
-       OSKext::uniquePersonalityProperties( matching );
+       if (matching) {
+               OSKext::uniquePersonalityProperties( matching );
+       }
 
        // terminate instances.
        do {
                iter->reset();
                while ((service = (IOService *)iter->getNextObject())) {
-                       dict = service->getPropertyTable();
-                       if (!dict) {
+                       if (className && !service->metaCast(className)) {
                                continue;
                        }
-
-                       /* Terminate only for personalities that match the matching dictionary.
-                        * This comparison must be done with only the keys in the
-                        * "matching" dict to enable general matching.
-                        */
-                       if (!dict->isEqualTo(matching, matching)) {
-                               continue;
+                       if (matching) {
+                               /* Terminate only for personalities that match the matching dictionary.
+                                * This comparison must be done with only the keys in the
+                                * "matching" dict to enable general matching.
+                                */
+                               dict = service->getPropertyTable();
+                               if (!dict) {
+                                       continue;
+                               }
+                               if (!dict->isEqualTo(matching, matching)) {
+                                       continue;
+                               }
                        }
 
+                       OSKext     * kext;
+                       const char * bundleIDStr;
+                       OSObject   * prop;
+                       bool         okToTerminate;
+                       for (okToTerminate = true;;) {
+                               kext = service->getMetaClass()->getKext();
+                               if (!kext) {
+                                       break;
+                               }
+                               bundleIDStr = kext->getIdentifierCString();
+                               if (!bundleIDStr) {
+                                       break;
+                               }
+                               prop = kext->getPropertyForHostArch(kOSBundleAllowUserTerminateKey);
+                               if (prop) {
+                                       okToTerminate = (kOSBooleanTrue == prop);
+                                       break;
+                               }
+                               if (!strcmp(kOSKextKernelIdentifier, bundleIDStr)) {
+                                       okToTerminate = false;
+                                       break;
+                               }
+                               if (!strncmp("com.apple.", bundleIDStr, strlen("com.apple."))) {
+                                       okToTerminate = false;
+                                       break;
+                               }
+                               break;
+                       }
+                       if (!okToTerminate) {
+#if DEVELOPMENT || DEBUG
+                               okToTerminate = true;
+#endif /* DEVELOPMENT || DEBUG */
+                               IOLog("%sallowing kextunload terminate for bundleID %s\n",
+                                   okToTerminate ? "" : "dis", bundleIDStr ? bundleIDStr : "?");
+                               if (!okToTerminate) {
+                                       ret = kIOReturnUnsupported;
+                                       break;
+                               }
+                       }
                        if (!service->terminate(kIOServiceRequired | kIOServiceSynchronous)) {
                                ret = kIOReturnUnsupported;
                                break;
@@ -649,7 +690,10 @@ IOCatalogue::terminateDrivers(OSDictionary * matching)
 {
        IOReturn ret;
 
-       ret = _terminateDrivers(matching);
+       if (!matching) {
+               return kIOReturnBadArgument;
+       }
+       ret = terminateDrivers(matching, NULL);
        IORWLockWrite(lock);
        if (kIOReturnSuccess == ret) {
                ret = _removeDrivers(matching);
@@ -695,7 +739,7 @@ IOCatalogue::terminateDriversForModule(
 
        dict->setObject(gIOModuleIdentifierKey, moduleName);
 
-       ret = _terminateDrivers(dict);
+       ret = terminateDrivers(dict, NULL);
 
        /* No goto between IOLock calls!
         */
index f219e2f1d96edad8f98ee035408e30777031684d..1077c792049fba8e79cbb52c09e600e09648a9df 100644 (file)
@@ -221,14 +221,12 @@ IODeviceTreeAlloc( void * dtTop )
                        if (!intMap && child->getProperty( gIODTInterruptParentKey)) {
                                intMap = true;
                        }
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
                        if (!strcmp("sep", child->getName())
                            || !strcmp("aop", child->getName())
                            || !strcmp("disp0", child->getName())) {
                                uint32_t aotFlags = 1;
                                child->setProperty("aot-power", &aotFlags, sizeof(aotFlags));
                        }
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
                }
                regIter->release();
        }
index 9cf95e50124615c9641a5aa0163c22b5df6cec32..7c1a6a141463f762cf925eb48ebea4aa55af8ff8 100644 (file)
@@ -493,7 +493,7 @@ IOTrackingAddUser(IOTrackingQueue * queue, IOTrackingUser * mem, vm_size_t size)
        if ((kernel_task != current_task()) && (self = proc_self())) {
                bool user_64 = false;
                mem->btPID  = proc_pid(self);
-               (void)backtrace_user(&mem->btUser[0], kIOTrackingCallSiteBTs - 1, &num,
+               num = backtrace_user(&mem->btUser[0], kIOTrackingCallSiteBTs - 1, NULL,
                    &user_64, NULL);
                mem->user32 = !user_64;
                proc_rele(self);
index c1114f9ec1e7d94738614c86d34ed44f1fe44a8c..d72b900912da18fced59eed4b46b643593ca05d9 100644 (file)
@@ -324,15 +324,9 @@ static IOPMPowerState
            .outputPowerCharacter   = kIOPMSleep,
            .inputPowerRequirement  = SLEEP_POWER },
        {   .version                = 1,
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
            .capabilityFlags        = kIOPMAOTCapability,
            .outputPowerCharacter   = kIOPMAOTPower,
            .inputPowerRequirement  = ON_POWER },
-#else /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
-           .capabilityFlags        = 0,
-           .outputPowerCharacter   = 0,
-           .inputPowerRequirement  = 0xFFFFFFFF },
-#endif /* (defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
        {   .version                = 1,
            .capabilityFlags        = kIOPMPowerOn,
            .outputPowerCharacter   = kIOPMPowerOn,
@@ -983,11 +977,7 @@ IOPMrootDomain::updateTasksSuspend(void)
 {
        bool newSuspend;
 
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
        newSuspend = (tasksSuspended || _aotTasksSuspended);
-#else /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
-       newSuspend = tasksSuspended;
-#endif /* (defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
        if (newSuspend == tasksSuspendState) {
                return;
        }
@@ -1234,7 +1224,6 @@ static SYSCTL_INT(_debug, OID_AUTO, swd_panic, CTLFLAG_RW, &gSwdPanic, 0, "");
 static SYSCTL_INT(_debug, OID_AUTO, swd_panic_phase, CTLFLAG_RW, &swd_panic_phase, 0, "");
 #endif
 
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
 //******************************************************************************
 // AOT
 
@@ -1329,8 +1318,6 @@ static SYSCTL_PROC(_kern, OID_AUTO, aotmode,
     NULL, 0, sysctl_aotmode, "I", "");
 
 //******************************************************************************
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
-
 
 static const OSSymbol * gIOPMSettingAutoWakeCalendarKey;
 static const OSSymbol * gIOPMSettingAutoWakeSecondsKey;
@@ -1546,13 +1533,11 @@ IOPMrootDomain::start( IOService * nub )
                &IOPMrootDomain::dispatchPowerEvent));
        gIOPMWorkLoop->addEventSource(pmPowerStateQueue);
 
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
        _aotMode = 0;
        _aotTimerES = IOTimerEventSource::timerEventSource(this,
            OSMemberFunctionCast(IOTimerEventSource::Action,
            this, &IOPMrootDomain::aotEvaluate));
        gIOPMWorkLoop->addEventSource(_aotTimerES);
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
 
        // create our power parent
        patriarch = new IORootParent;
@@ -1634,11 +1619,9 @@ IOPMrootDomain::start( IOService * nub )
        sysctl_register_oid(&sysctl__kern_consoleoptions);
        sysctl_register_oid(&sysctl__kern_progressoptions);
 
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
        sysctl_register_oid(&sysctl__kern_aotmode);
        sysctl_register_oid(&sysctl__kern_aotmodebits);
        sysctl_register_oid(&sysctl__kern_aotmetrics);
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
 
 #if HIBERNATION
        IOHibernateSystemInit(this);
@@ -2647,10 +2630,8 @@ IOPMrootDomain::powerChangeDone( unsigned long previousPowerState )
        unsigned long newState;
        clock_sec_t        secs;
        clock_usec_t       microsecs;
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
        clock_sec_t        adjWakeTime;
        IOPMCalendarStruct nowCalendar;
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
 
        ASSERT_GATED();
        newState = getPowerState();
@@ -2664,7 +2645,6 @@ IOPMrootDomain::powerChangeDone( unsigned long previousPowerState )
        notifierThread = current_thread();
        switch (getPowerState()) {
        case SLEEP_STATE: {
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
                if (kPMCalendarTypeInvalid != _aotWakeTimeCalendar.selector) {
                        secs = 0;
                        microsecs = 0;
@@ -2700,7 +2680,6 @@ IOPMrootDomain::powerChangeDone( unsigned long previousPowerState )
                        }
                }
                _aotPendingFlags &= ~kIOPMWakeEventAOTPerCycleFlags;
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
                acceptSystemWakeEvents(true);
 
                // re-enable this timer for next sleep
@@ -2710,13 +2689,9 @@ IOPMrootDomain::powerChangeDone( unsigned long previousPowerState )
                logtime(secs);
                gIOLastSleepTime.tv_sec  = secs;
                gIOLastSleepTime.tv_usec = microsecs;
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
                if (!_aotLastWakeTime) {
                        gIOLastUserSleepTime = gIOLastSleepTime;
                }
-#else
-               gIOLastUserSleepTime = gIOLastSleepTime;
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
 
                gIOLastWakeTime.tv_sec = 0;
                gIOLastWakeTime.tv_usec = 0;
@@ -2793,7 +2768,6 @@ IOPMrootDomain::powerChangeDone( unsigned long previousPowerState )
                gIOLastWakeTime.tv_sec  = secs;
                gIOLastWakeTime.tv_usec = microsecs;
 
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
                // aot
                if (_aotWakeTimeCalendar.selector != kPMCalendarTypeInvalid) {
                        _aotWakeTimeCalendar.selector = kPMCalendarTypeInvalid;
@@ -2818,7 +2792,6 @@ IOPMrootDomain::powerChangeDone( unsigned long previousPowerState )
                                setWakeTime(_aotTestTime);
                        }
                }
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
 
 #if HIBERNATION
                LOG("System %sWake\n", gIOHibernateState ? "SafeSleep " : "");
@@ -3356,7 +3329,6 @@ IOPMrootDomain::askChangeDownDone(
                        *cancel = true;
                        DLOG("cancel dark->sleep\n");
                }
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
                if (_aotMode && (kPMCalendarTypeInvalid != _aotWakeTimeCalendar.selector)) {
                        uint64_t now = mach_continuous_time();
                        if (((now + _aotWakePreWindow) >= _aotWakeTimeContinuous)
@@ -3365,7 +3337,6 @@ IOPMrootDomain::askChangeDownDone(
                                IOLog("AOT wake window cancel: %qd, %qd\n", now, _aotWakeTimeContinuous);
                        }
                }
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
        }
 }
 
@@ -3856,7 +3827,6 @@ IOPMrootDomain::willNotifyPowerChildren( IOPMPowerStateIndex newPowerState )
 #endif /* !CONFIG_EMBEDDED */
                }
 
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
                _aotReadyToFullWake = false;
 #if 0
                if (_aotLingerTime) {
@@ -3892,7 +3862,6 @@ IOPMrootDomain::willNotifyPowerChildren( IOPMPowerStateIndex newPowerState )
                        clock_interval_to_absolutetime_interval(2000, kMillisecondScale, &_aotWakePreWindow);
                        clock_interval_to_absolutetime_interval(1100, kMillisecondScale, &_aotWakePostWindow);
                }
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
 
 #if HIBERNATION
                IOHibernateSystemSleep();
@@ -5629,13 +5598,11 @@ IOPMrootDomain::overrideOurPowerChange(
        uint32_t changeFlags = *inOutChangeFlags;
        uint32_t currentPowerState = (uint32_t) getPowerState();
 
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
        if ((AOT_STATE == powerState) && (ON_STATE == currentPowerState)) {
                // Assertion may have been taken in AOT leading to changePowerStateTo(AOT)
                *inOutChangeFlags |= kIOPMNotDone;
                return;
        }
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
 
        if (changeFlags & kIOPMParentInitiated) {
                // Root parent is permanently pegged at max power,
@@ -5901,7 +5868,6 @@ IOPMrootDomain::handleOurPowerChangeStart(
                    _desiredCapability, _currentCapability, _pendingCapability);
        }
 
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
        if ((AOT_STATE == powerState) && (SLEEP_STATE != currentPowerState)) {
                panic("illegal AOT entry from %s", getPowerStateString(currentPowerState));
        }
@@ -5909,7 +5875,6 @@ IOPMrootDomain::handleOurPowerChangeStart(
                aotShouldExit(false, true);
                aotExit(false);
        }
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
 }
 
 void
@@ -7118,8 +7083,6 @@ IOPMConvertCalendarToSeconds(const IOPMCalendarStruct * dt)
        return secs;
 }
 
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
-
 unsigned long
 IOPMrootDomain::getRUN_STATE(void)
 {
@@ -7271,22 +7234,6 @@ IOPMrootDomain::aotEvaluate(IOTimerEventSource * timer)
        }
 }
 
-#else /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
-
-unsigned long
-IOPMrootDomain::getRUN_STATE(void)
-{
-       return ON_STATE;
-}
-
-IOReturn
-IOPMrootDomain::setWakeTime(uint64_t wakeContinuousTime)
-{
-       return kIOReturnUnsupported;
-}
-
-#endif /* (defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
-
 //******************************************************************************
 // adjustPowerState
 //
@@ -7305,7 +7252,6 @@ IOPMrootDomain::adjustPowerState( bool sleepASAP )
 
        ASSERT_GATED();
 
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
        if (_aotNow) {
                bool exitNow;
 
@@ -7335,7 +7281,6 @@ IOPMrootDomain::adjustPowerState( bool sleepASAP )
                }
                return;
        }
-#endif /* (defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
 
        if ((!idleSleepEnabled) || !checkSystemSleepEnabled()) {
                changePowerStateToPriv(getRUN_STATE());
@@ -8253,7 +8198,6 @@ IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg )
 unsigned int
 IOPMrootDomain::idleSleepPreventersCount()
 {
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
        if (_aotMode) {
                unsigned int count __block;
                count = 0;
@@ -8264,7 +8208,6 @@ IOPMrootDomain::idleSleepPreventersCount()
                });
                return count;
        }
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
 
        return preventIdleSleepList->getCount();
 }
@@ -8438,14 +8381,10 @@ IOPMrootDomain::evaluateAssertions(IOPMDriverAssertionType newAssertions, IOPMDr
        }
 
        if (changedBits & kIOPMDriverAssertionCPUBit) {
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
                if (_aotNow) {
                        IOLog("CPU assertions %d\n", (0 != (kIOPMDriverAssertionCPUBit & newAssertions)));
                }
                evaluatePolicy(_aotNow ? kStimulusNoIdleSleepPreventers : kStimulusDarkWakeEvaluate);
-#else
-               evaluatePolicy(kStimulusDarkWakeEvaluate);
-#endif
                if (!assertOnWakeSecs && gIOLastWakeAbsTime) {
                        AbsoluteTime    now;
                        clock_usec_t    microsecs;
@@ -9882,10 +9821,7 @@ IOPMrootDomain::acceptSystemWakeEvents( bool accept )
                        _systemWakeEventsArray = OSArray::withCapacity(4);
                }
                _acceptSystemWakeEvents = (_systemWakeEventsArray != NULL);
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
-               if (!(_aotNow && (kIOPMWakeEventAOTExitFlags & _aotPendingFlags)))
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
-               {
+               if (!(_aotNow && (kIOPMWakeEventAOTExitFlags & _aotPendingFlags))) {
                        gWakeReasonString[0] = '\0';
                        if (_systemWakeEventsArray) {
                                _systemWakeEventsArray->flushCollection();
@@ -9954,7 +9890,6 @@ IOPMrootDomain::claimSystemWakeEvent(
                return;
        }
 
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
        IOOptionBits        aotFlags = 0;
        bool                needAOTEvaluate = FALSE;
 
@@ -9978,7 +9913,6 @@ IOPMrootDomain::claimSystemWakeEvent(
                flags |= kIOPMWakeEventAOTPossibleExit;
        }
 #endif /* DEVELOPMENT || DEBUG */
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
 
        deviceName   = device->copyName(gIOServicePlane);
        deviceRegId  = OSNumber::withNumber(device->getRegistryEntryID(), 64);
@@ -10001,7 +9935,6 @@ IOPMrootDomain::claimSystemWakeEvent(
 
        WAKEEVENT_LOCK();
        addWakeReason = _acceptSystemWakeEvents;
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
        if (_aotMode) {
                IOLog("claimSystemWakeEvent(%s, %s, 0x%x) 0x%x %d\n", reason, deviceName->getCStringNoCopy(), (int)flags, _aotPendingFlags, _aotReadyToFullWake);
        }
@@ -10026,7 +9959,6 @@ IOPMrootDomain::claimSystemWakeEvent(
                addWakeReason     = _aotNow && _systemWakeEventsArray && ((kIOPMWakeEventAOTExitFlags & aotFlags));
                needAOTEvaluate   = _aotReadyToFullWake;
        }
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
 
        if (!gWakeReasonSysctlRegistered) {
                // Lazy registration until the platform driver stops registering
@@ -10045,11 +9977,9 @@ IOPMrootDomain::claimSystemWakeEvent(
        }
 
        WAKEEVENT_UNLOCK();
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
        if (needAOTEvaluate) {
                aotEvaluate(NULL);
        }
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
 
 done:
        if (deviceName) {
index 45b3f42e6c25ba6892f7372f9a432577fb5c3d80..456f93729f4c5aea60f01b2a4d65afa47380d5dd 100644 (file)
@@ -664,6 +664,25 @@ runPropertyAction(Action inAction, OSObject *target,
        return res;
 }
 
+static IOReturn
+IORegistryEntryActionToBlock(OSObject *target,
+    void *arg0, void *arg1,
+    void *arg2, void *arg3)
+{
+       IORegistryEntry::ActionBlock block = (typeof(block))arg0;
+       return block();
+}
+
+IOReturn
+IORegistryEntry::runPropertyActionBlock(ActionBlock block)
+{
+       IOReturn res;
+
+       res = runPropertyAction(&IORegistryEntryActionToBlock, this, block);
+
+       return res;
+}
+
 OSObject *
 IORegistryEntry::getProperty( const OSString * aKey) const
 {
index bd20598ef709fc92d8eefbbdec4284ae9506ad88..a8387bf2abe092b0b3717c1d9908db9c50a7f7a8 100644 (file)
@@ -134,6 +134,8 @@ const OSSymbol *                gIOMatchedPersonalityKey;
 const OSSymbol *                gIORematchPersonalityKey;
 const OSSymbol *                gIORematchCountKey;
 const OSSymbol *                gIODEXTMatchCountKey;
+const OSSymbol *                gIOSupportedPropertiesKey;
+const OSSymbol *                gIOUserServicePropertiesKey;
 #if !CONFIG_EMBEDDED
 const OSSymbol *                gIOServiceLegacyMatchingRegistryIDKey;
 #endif
@@ -429,6 +431,9 @@ IOService::initialize( void )
        gIOInterruptSpecifiersKey
                = OSSymbol::withCStringNoCopy("IOInterruptSpecifiers");
 
+       gIOSupportedPropertiesKey = OSSymbol::withCStringNoCopy(kIOSupportedPropertiesKey);
+       gIOUserServicePropertiesKey = OSSymbol::withCStringNoCopy(kIOUserServicePropertiesKey);
+
        gIOMapperIDKey = OSSymbol::withCStringNoCopy(kIOMapperIDKey);
 
        gIOKitDebugKey      = OSSymbol::withCStringNoCopy( kIOKitDebugKey );
index 5cdc56a78209b57c92be5b383ea728a1dc7a3784..5ef79da20ab4e7f2d86db64f3a643a8b786ef654 100644 (file)
@@ -1222,7 +1222,6 @@ IOService::handleRegisterPowerDriver( IOPMRequest * request )
                lowestPowerState   = fPowerStates[0].stateOrderToIndex;
                fHighestPowerState = fPowerStates[numberOfStates - 1].stateOrderToIndex;
 
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
                {
                        uint32_t        aotFlags;
                        IOService *     service;
@@ -1254,7 +1253,6 @@ IOService::handleRegisterPowerDriver( IOPMRequest * request )
                                }
                        }
                }
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
 
                // OR'in all the output power flags
                fMergedOutputPowerFlags = 0;
@@ -1975,11 +1973,9 @@ IOService::handlePowerDomainDidChangeTo( IOPMRequest * request )
                myChangeFlags = kIOPMParentInitiated | kIOPMDomainDidChange |
                    (parentChangeFlags & kIOPMRootBroadcastFlags);
 
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
                if (kIOPMAOTPower & fPowerStates[maxPowerState].inputPowerFlags) {
                        IOLog("aotPS %s0x%qx[%ld]\n", getName(), getRegistryEntryID(), maxPowerState);
                }
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
 
                result = startPowerChange(
                        /* flags        */ myChangeFlags,
@@ -2747,14 +2743,12 @@ IOService::computeDesiredState( unsigned long localClamp, bool computeOnly )
                newPowerState = fHighestPowerState;
        }
 
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
        if (getPMRootDomain()->isAOTMode()) {
                if ((kIOPMPreventIdleSleep & fPowerStates[newPowerState].capabilityFlags)
                    && !(kIOPMPreventIdleSleep & fPowerStates[fDesiredPowerState].capabilityFlags)) {
                        getPMRootDomain()->claimSystemWakeEvent(this, kIOPMWakeEventAOTExit, getName(), NULL);
                }
        }
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
 
        fDesiredPowerState = newPowerState;
 
@@ -5998,11 +5992,9 @@ IOService::pmTellAppWithResponse( OSObject * object, void * arg )
                                proc_suspended = get_task_pidsuspended((task_t) proc->task);
                                if (proc_suspended) {
                                        logClientIDForNotification(object, context, "PMTellAppWithResponse - Suspended");
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
                                } else if (getPMRootDomain()->isAOTMode() && get_task_suspended((task_t) proc->task)) {
                                        proc_suspended = true;
                                        context->skippedInDark++;
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
                                }
                                proc_rele(proc);
                                if (proc_suspended) {
@@ -6213,11 +6205,7 @@ IOService::pmTellCapabilityAppWithResponse( OSObject * object, void * arg )
        }
 
        if (context->us == getPMRootDomain() &&
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
            getPMRootDomain()->isAOTMode()
-#else /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
-           false
-#endif /* (defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
            ) {
                OSNumber                *clientID = NULL;
                boolean_t               proc_suspended = FALSE;
@@ -6605,11 +6593,9 @@ tellAppClientApplier( OSObject * object, void * arg )
                                proc_suspended = get_task_pidsuspended((task_t) proc->task);
                                if (proc_suspended) {
                                        logClientIDForNotification(object, context, "tellAppClientApplier - Suspended");
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
                                } else if (IOService::getPMRootDomain()->isAOTMode() && get_task_suspended((task_t) proc->task)) {
                                        proc_suspended = true;
                                        context->skippedInDark++;
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
                                }
                                proc_rele(proc);
                                if (proc_suspended) {
index 9b3cef8cec56465beeb3491eddb91c646153c9b6..e98f41f36317af252675dbea2e70f80833a9b467 100644 (file)
@@ -548,7 +548,31 @@ extern "C" {
 // functions called from osfmk/device/iokit_rpc.c
 
 void
-iokit_add_reference( io_object_t obj, ipc_kobject_type_t type )
+iokit_port_object_description(io_object_t obj, kobject_description_t desc)
+{
+       IORegistryEntry    * regEntry;
+       IOUserNotification * __unused noti;
+       _IOServiceNotifier * __unused serviceNoti;
+       OSSerialize        * __unused s;
+
+       if ((regEntry = OSDynamicCast(IORegistryEntry, obj))) {
+               snprintf(desc, KOBJECT_DESCRIPTION_LENGTH, "%s(0x%qx)", obj->getMetaClass()->getClassName(), regEntry->getRegistryEntryID());
+#if DEVELOPMENT || DEBUG
+       } else if ((noti = OSDynamicCast(IOUserNotification, obj))
+           && ((serviceNoti = OSDynamicCast(_IOServiceNotifier, noti->holdNotify)))) {
+               s = OSSerialize::withCapacity(page_size);
+               if (s && serviceNoti->matching->serialize(s)) {
+                       snprintf(desc, KOBJECT_DESCRIPTION_LENGTH, "%s(%s)", obj->getMetaClass()->getClassName(), s->text());
+               }
+               OSSafeReleaseNULL(s);
+#endif /* DEVELOPMENT || DEBUG */
+       } else {
+               snprintf(desc, KOBJECT_DESCRIPTION_LENGTH, "%s", obj->getMetaClass()->getClassName());
+       }
+}
+
+void
+iokit_add_reference( io_object_t obj, natural_t type )
 {
        IOUserClient * uc;
 
@@ -5645,28 +5669,7 @@ is_io_catalog_terminate(
        switch (flag) {
 #if !defined(SECURE_KERNEL)
        case kIOCatalogServiceTerminate:
-               OSIterator *        iter;
-               IOService *         service;
-
-               iter = IORegistryIterator::iterateOver(gIOServicePlane,
-                   kIORegistryIterateRecursively);
-               if (!iter) {
-                       return kIOReturnNoMemory;
-               }
-
-               do {
-                       iter->reset();
-                       while ((service = (IOService *)iter->getNextObject())) {
-                               if (service->metaCast(name)) {
-                                       if (!service->terminate( kIOServiceRequired
-                                           | kIOServiceSynchronous)) {
-                                               kr = kIOReturnUnsupported;
-                                               break;
-                                       }
-                               }
-                       }
-               } while (!service && !iter->isValid());
-               iter->release();
+               kr = gIOCatalogue->terminateDrivers(NULL, name);
                break;
 
        case kIOCatalogModuleUnload:
index a436a8cb2a5836aa6d72fb94e5b893bbe50e8b16..52508a7611a58abd2984a027b12360c8948c5427 100644 (file)
@@ -60,6 +60,7 @@
 #include <DriverKit/IOBufferMemoryDescriptor.h>
 #include <DriverKit/IOMemoryMap.h>
 #include <DriverKit/IODataQueueDispatchSource.h>
+#include <DriverKit/IOServiceNotificationDispatchSource.h>
 #include <DriverKit/IOUserServer.h>
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *  * * * * * * * * * * * * * * * * * * * */
@@ -104,8 +105,6 @@ class IOUserService : public IOService
 
        virtual bool
        start(IOService * provider) APPLE_KEXT_OVERRIDE;
-       virtual IOReturn
-       setProperties(OSObject * props) APPLE_KEXT_OVERRIDE;
 };
 
 OSDefineMetaClassAndStructors(IOUserService, IOService)
@@ -147,13 +146,6 @@ IOUserService::start(IOService * provider)
        return ok;
 }
 
-IOReturn
-IOUserService::setProperties(OSObject * properties)
-{
-       setProperty("USER", properties);
-       return kIOReturnSuccess;
-}
-
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *  * * * * * * * * * * * * * * * * * * * */
 
 #undef super
@@ -285,10 +277,41 @@ IMPL(IOService, SetDispatchQueue)
 kern_return_t
 IMPL(IOService, SetProperties)
 {
-       IOReturn ret = kIOReturnUnsupported;
+       IOUserServer   * us;
+       OSDictionary   * dict;
+       IOReturn         ret;
 
        ret = setProperties(properties);
 
+       if (kIOReturnUnsupported == ret) {
+               dict = OSDynamicCast(OSDictionary, properties);
+               us = (typeof(us))thread_iokit_tls_get(0);
+               if (dict && reserved->uvars && (reserved->uvars->userServer == us)) {
+                       ret = runPropertyActionBlock(^IOReturn (void) {
+                               OSDictionary   * userProps;
+                               IOReturn         ret;
+
+                               userProps = OSDynamicCast(OSDictionary, getProperty(gIOUserServicePropertiesKey));
+                               if (userProps) {
+                                       userProps = (typeof(userProps))userProps->copyCollection();
+                               } else {
+                                       userProps = OSDictionary::withCapacity(4);
+                               }
+                               if (!userProps) {
+                                       ret = kIOReturnNoMemory;
+                               } else {
+                                       bool ok = userProps->merge(dict);
+                                       if (ok) {
+                                               ok = setProperty(gIOUserServicePropertiesKey, userProps);
+                                       }
+                                       OSSafeReleaseNULL(userProps);
+                                       ret = ok ? kIOReturnSuccess : kIOReturnNotWritable;
+                               }
+                               return ret;
+                       });
+               }
+       }
+
        return ret;
 }
 
@@ -585,6 +608,12 @@ IODispatchSource::free()
        super::free();
 }
 
+kern_return_t
+IMPL(IODispatchSource, SetEnable)
+{
+       return SetEnableWithCompletion(enable, NULL);
+}
+
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *  * * * * * * * * * * * * * * * * * * * */
 
 struct IOInterruptDispatchSource_IVars {
@@ -662,6 +691,10 @@ IOInterruptDispatchSource::free()
                assert(kIOReturnSuccess == ret);
        }
 
+       if (ivars && ivars->lock) {
+               IOSimpleLockFree(ivars->lock);
+       }
+
        IOSafeDeleteNULL(ivars, IOInterruptDispatchSource_IVars, 1);
 
        super::free();
@@ -711,9 +744,9 @@ IMPL(IOInterruptDispatchSource, SetEnableWithCompletion)
 }
 
 kern_return_t
-IMPL(IODispatchSource, SetEnable)
+IMPL(IOInterruptDispatchSource, Cancel)
 {
-       return SetEnableWithCompletion(enable, NULL);
+       return kIOReturnUnsupported;
 }
 
 kern_return_t
@@ -762,6 +795,266 @@ IMPL(IOInterruptDispatchSource, InterruptOccurred)
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *  * * * * * * * * * * * * * * * * * * * */
 
+enum {
+       kIOServiceNotificationTypeCount = kIOServiceNotificationTypeLast + 1,
+};
+
+struct IOServiceNotificationDispatchSource_IVars {
+       OSObject     * serverName;
+       OSAction     * action;
+       IOLock       * lock;
+       IONotifier   * notifier;
+       OSDictionary * interestNotifiers;
+       OSArray      * pending[kIOServiceNotificationTypeCount];
+       bool           enable;
+};
+
+kern_return_t
+IMPL(IOServiceNotificationDispatchSource, Create)
+{
+       IOUserServer * us;
+       IOReturn       ret;
+       IOServiceNotificationDispatchSource * inst;
+
+       inst = OSTypeAlloc(IOServiceNotificationDispatchSource);
+       if (!inst->init()) {
+               OSSafeReleaseNULL(inst);
+               return kIOReturnNoMemory;
+       }
+
+       us = (typeof(us))thread_iokit_tls_get(0);
+       assert(OSDynamicCast(IOUserServer, us));
+       if (!us) {
+               OSSafeReleaseNULL(inst);
+               return kIOReturnError;
+       }
+       inst->ivars->serverName = us->copyProperty(gIOUserServerNameKey);
+       if (!inst->ivars->serverName) {
+               OSSafeReleaseNULL(inst);
+               return kIOReturnNoMemory;
+       }
+
+       inst->ivars->lock    = IOLockAlloc();
+       if (!inst->ivars->lock) {
+               OSSafeReleaseNULL(inst);
+               return kIOReturnNoMemory;
+       }
+       for (uint32_t idx = 0; idx < kIOServiceNotificationTypeCount; idx++) {
+               inst->ivars->pending[idx] = OSArray::withCapacity(4);
+               if (!inst->ivars->pending[idx]) {
+                       OSSafeReleaseNULL(inst);
+                       return kIOReturnNoMemory;
+               }
+       }
+       inst->ivars->interestNotifiers = OSDictionary::withCapacity(4);
+       if (!inst->ivars->interestNotifiers) {
+               OSSafeReleaseNULL(inst);
+               return kIOReturnNoMemory;
+       }
+
+       inst->ivars->notifier = IOService::addMatchingNotification(gIOMatchedNotification, matching, 0 /*priority*/,
+           ^bool (IOService * newService, IONotifier * notifier) {
+               bool         notifyReady = false;
+               IONotifier * interest;
+               OSObject   * serverName;
+               bool         okToUse;
+
+               serverName = newService->copyProperty(gIOUserServerNameKey);
+               okToUse = (serverName && inst->ivars->serverName->isEqualTo(serverName));
+               OSSafeReleaseNULL(serverName);
+               if (!okToUse) {
+                       return false;
+               }
+
+               IOLockLock(inst->ivars->lock);
+               notifyReady = (0 == inst->ivars->pending[kIOServiceNotificationTypeMatched]->getCount());
+               inst->ivars->pending[kIOServiceNotificationTypeMatched]->setObject(newService);
+               IOLockUnlock(inst->ivars->lock);
+
+               interest = newService->registerInterest(gIOGeneralInterest,
+               ^IOReturn (uint32_t messageType, IOService * provider,
+               void * messageArgument, size_t argSize) {
+                       IONotifier * interest;
+                       bool         notifyReady = false;
+
+                       switch (messageType) {
+                       case kIOMessageServiceIsTerminated:
+                               IOLockLock(inst->ivars->lock);
+                               notifyReady = (0 == inst->ivars->pending[kIOServiceNotificationTypeTerminated]->getCount());
+                               inst->ivars->pending[kIOServiceNotificationTypeTerminated]->setObject(provider);
+                               interest = (typeof(interest))inst->ivars->interestNotifiers->getObject((const OSSymbol *) newService);
+                               assert(interest);
+                               interest->remove();
+                               inst->ivars->interestNotifiers->removeObject((const OSSymbol *) newService);
+                               IOLockUnlock(inst->ivars->lock);
+                               break;
+                       default:
+                               break;
+                       }
+                       if (notifyReady && inst->ivars->action) {
+                               inst->ServiceNotificationReady(inst->ivars->action);
+                       }
+                       return kIOReturnSuccess;
+               });
+               if (interest) {
+                       IOLockLock(inst->ivars->lock);
+                       inst->ivars->interestNotifiers->setObject((const OSSymbol *) newService, interest);
+                       IOLockUnlock(inst->ivars->lock);
+               }
+               if (notifyReady) {
+                       if (inst->ivars->action) {
+                               inst->ServiceNotificationReady(inst->ivars->action);
+                       }
+               }
+               return false;
+       });
+
+       if (!inst->ivars->notifier) {
+               OSSafeReleaseNULL(inst);
+               ret = kIOReturnError;
+       }
+
+       *notification = inst;
+       ret = kIOReturnSuccess;
+
+       return ret;
+}
+
+kern_return_t
+IMPL(IOServiceNotificationDispatchSource, CopyNextNotification)
+{
+       IOService * next;
+       uint32_t    idx;
+
+       IOLockLock(ivars->lock);
+       for (idx = 0; idx < kIOServiceNotificationTypeCount; idx++) {
+               next = (IOService *) ivars->pending[idx]->getObject(0);
+               if (next) {
+                       next->retain();
+                       ivars->pending[idx]->removeObject(0);
+                       break;
+               }
+       }
+       IOLockUnlock(ivars->lock);
+
+       if (idx == kIOServiceNotificationTypeCount) {
+               idx = kIOServiceNotificationTypeNone;
+       }
+       *type    = idx;
+       *service = next;
+       *options = 0;
+
+       return kIOReturnSuccess;
+}
+
+bool
+IOServiceNotificationDispatchSource::init()
+{
+       if (!super::init()) {
+               return false;
+       }
+       ivars = IONewZero(IOServiceNotificationDispatchSource_IVars, 1);
+       if (!ivars) {
+               return false;
+       }
+
+       return true;
+}
+
+void
+IOServiceNotificationDispatchSource::free()
+{
+       if (ivars) {
+               OSSafeReleaseNULL(ivars->serverName);
+               if (ivars->interestNotifiers) {
+                       ivars->interestNotifiers->iterateObjects(^bool (const OSSymbol * key, OSObject * object) {
+                               IONotifier * interest = (typeof(interest))object;
+                               interest->remove();
+                               return false;
+                       });
+                       OSSafeReleaseNULL(ivars->interestNotifiers);
+               }
+               for (uint32_t idx = 0; idx < kIOServiceNotificationTypeCount; idx++) {
+                       OSSafeReleaseNULL(ivars->pending[idx]);
+               }
+               if (ivars->lock) {
+                       IOLockFree(ivars->lock);
+                       ivars->lock = NULL;
+               }
+               if (ivars->notifier) {
+                       ivars->notifier->remove();
+                       ivars->notifier = NULL;
+               }
+               IOSafeDeleteNULL(ivars, IOServiceNotificationDispatchSource_IVars, 1);
+       }
+
+       super::free();
+}
+
+kern_return_t
+IMPL(IOServiceNotificationDispatchSource, SetHandler)
+{
+       IOReturn ret;
+       bool     notifyReady;
+
+       notifyReady = false;
+
+       IOLockLock(ivars->lock);
+       OSSafeReleaseNULL(ivars->action);
+       action->retain();
+       ivars->action = action;
+       if (action) {
+               for (uint32_t idx = 0; idx < kIOServiceNotificationTypeCount; idx++) {
+                       notifyReady = (ivars->pending[idx]->getCount());
+                       if (notifyReady) {
+                               break;
+                       }
+               }
+       }
+       IOLockUnlock(ivars->lock);
+
+       if (notifyReady) {
+               ServiceNotificationReady(action);
+       }
+       ret = kIOReturnSuccess;
+
+       return ret;
+}
+
+kern_return_t
+IMPL(IOServiceNotificationDispatchSource, SetEnableWithCompletion)
+{
+       if (enable == ivars->enable) {
+               return kIOReturnSuccess;
+       }
+
+       IOLockLock(ivars->lock);
+       ivars->enable = enable;
+       IOLockUnlock(ivars->lock);
+
+       return kIOReturnSuccess;
+}
+
+kern_return_t
+IMPL(IOServiceNotificationDispatchSource, Cancel)
+{
+       return kIOReturnUnsupported;
+}
+
+kern_return_t
+IMPL(IOServiceNotificationDispatchSource, CheckForWork)
+{
+       return kIOReturnNotReady;
+}
+
+kern_return_t
+IOServiceNotificationDispatchSource::DeliverNotifications(IOServiceNotificationBlock block)
+{
+       return kIOReturnUnsupported;
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *  * * * * * * * * * * * * * * * * * * * */
+
 kern_return_t
 IOUserServer::waitInterruptTrap(void * p1, void * p2, void * p3, void * p4, void * p5, void * p6)
 {
@@ -907,6 +1200,10 @@ IMPL(IODispatchQueue, Create)
 kern_return_t
 IMPL(IODispatchQueue, SetPort)
 {
+       if (MACH_PORT_NULL != ivars->serverPort) {
+               return kIOReturnNotReady;
+       }
+
        ivars->serverPort = port;
        return kIOReturnSuccess;
 }
@@ -926,6 +1223,10 @@ IODispatchQueue::init()
 void
 IODispatchQueue::free()
 {
+       if (ivars && ivars->serverPort) {
+               ipc_port_release_send(ivars->serverPort);
+               ivars->serverPort = MACH_PORT_NULL;
+       }
        IOSafeDeleteNULL(ivars, IODispatchQueue_IVars, 1);
        super::free();
 }
@@ -1007,7 +1308,11 @@ struct IOPStrings {
 kern_return_t
 OSUserMetaClass::Dispatch(IORPC rpc)
 {
-       return const_cast<OSMetaClass *>(meta)->Dispatch(rpc);
+       if (meta) {
+               return const_cast<OSMetaClass *>(meta)->Dispatch(rpc);
+       } else {
+               return kIOReturnUnsupported;
+       }
 }
 
 void
@@ -1290,7 +1595,7 @@ IOUserServer::copyInStringArray(const char * string, uint32_t userSize)
        array->count = 0;
        cstr = &array->strings[0];
        end =  &array->strings[array->dataSize];
-       while ((len = cstr[0])) {
+       while ((len = (unsigned char)cstr[0])) {
                cstr++;
                if ((cstr + len) >= end) {
                        break;
@@ -1318,7 +1623,7 @@ IOUserServer::stringArrayIndex(IOPStrings * array, const char * look)
        cstr = &array->strings[0];
        end  =  &array->strings[array->dataSize];
        llen = strlen(look);
-       while ((len = cstr[0])) {
+       while ((len = (unsigned char)cstr[0])) {
                cstr++;
                if ((cstr + len) >= end) {
                        break;
@@ -1433,12 +1738,15 @@ IOUserServer::objectInstantiate(OSObject * obj, IORPC rpc, IORPCMessage * messag
                        resultFlags |= kOSObjectRPCRemote;
                }
                if (service->reserved->uvars && service->reserved->uvars->userServer) {
+                       IOLockLock(service->reserved->uvars->userServer->fLock);
                        userMeta = (typeof(userMeta))service->reserved->uvars->userServer->fClasses->getObject(str);
+                       IOLockUnlock(service->reserved->uvars->userServer->fLock);
                }
        }
        if (!str && !userMeta) {
                const OSMetaClass * meta;
                meta = obj->getMetaClass();
+               IOLockLock(fLock);
                while (meta && !userMeta) {
                        str = (OSString *) meta->getClassNameSymbol();
                        userMeta = (typeof(userMeta))fClasses->getObject(str);
@@ -1446,10 +1754,13 @@ IOUserServer::objectInstantiate(OSObject * obj, IORPC rpc, IORPCMessage * messag
                                meta = meta->getSuperClass();
                        }
                }
+               IOLockUnlock(fLock);
        }
        if (str) {
                if (!userMeta) {
+                       IOLockLock(fLock);
                        userMeta = (typeof(userMeta))fClasses->getObject(str);
+                       IOLockUnlock(fLock);
                }
                if (kIODKLogSetup & gIODKDebug) {
                        DKLOG("userMeta %s %p\n", str->getCStringNoCopy(), userMeta);
@@ -1495,7 +1806,7 @@ IOUserServer::objectInstantiate(OSObject * obj, IORPC rpc, IORPCMessage * messag
                                idx = 0;
                                sendPort = NULL;
                                if (queue && (kIODispatchQueueStopped != queue)) {
-                                       sendPort = ipc_port_make_send(queue->ivars->serverPort);
+                                       sendPort = ipc_port_copy_send(queue->ivars->serverPort);
                                }
                                replySize = sizeof(OSObject_Instantiate_Rpl)
                                    + queueCount * sizeof(machReply->objects[0])
@@ -1530,7 +1841,7 @@ IOUserServer::objectInstantiate(OSObject * obj, IORPC rpc, IORPCMessage * messag
                                        queue = uvars->queueArray[idx];
                                        sendPort = NULL;
                                        if (queue) {
-                                               sendPort = ipc_port_make_send(queue->ivars->serverPort);
+                                               sendPort = ipc_port_copy_send(queue->ivars->serverPort);
                                        }
                                        machReply->objects[idx].type        = MACH_MSG_PORT_DESCRIPTOR;
                                        machReply->objects[idx].disposition = MACH_MSG_TYPE_MOVE_SEND;
@@ -1692,6 +2003,9 @@ IOUserServer::server(ipc_kmsg_t requestkmsg, ipc_kmsg_t * pReply)
        if (!message) {
                return kIOReturnIPCError;
        }
+       if (message->objectRefs == 0) {
+               return kIOReturnIPCError;
+       }
        ret = copyInObjects(msgin, message, msgin->msgh.msgh_size, true, false);
        if (kIOReturnSuccess != ret) {
                if (kIODKLogIPC & gIODKDebug) {
@@ -1737,7 +2051,7 @@ IOUserServer::server(ipc_kmsg_t requestkmsg, ipc_kmsg_t * pReply)
                bzero((void *)msgout, replyAlloc);
        }
 
-       IORPC rpc = { .message = msgin, .sendSize = msgin->msgh.msgh_size, .reply = msgout, .replySize = replyAlloc };
+       IORPC rpc = { .message = msgin, .reply = msgout, .sendSize = msgin->msgh.msgh_size, .replySize = replyAlloc };
 
        if (object) {
                thread_iokit_tls_set(0, this);
@@ -1978,7 +2292,7 @@ IOUserServer::rpc(IORPC rpc)
                port = queue->ivars->serverPort;
        }
        if (port) {
-               sendPort = ipc_port_make_send(port);
+               sendPort = ipc_port_copy_send(port);
        }
        IOLockUnlock(gIOUserServerLock);
        if (!sendPort) {
@@ -1990,46 +2304,63 @@ IOUserServer::rpc(IORPC rpc)
        ret = copyOutObjects(mach, message, sendSize, false);
 
        mach->msgh.msgh_bits = MACH_MSGH_BITS_COMPLEX |
-           MACH_MSGH_BITS(MACH_MSG_TYPE_MOVE_SEND, (oneway ? 0 : MACH_MSG_TYPE_MAKE_SEND_ONCE));
+           MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, (oneway ? 0 : MACH_MSG_TYPE_MAKE_SEND_ONCE));
        mach->msgh.msgh_remote_port  = sendPort;
        mach->msgh.msgh_local_port   = (oneway ? MACH_PORT_NULL : mig_get_reply_port());
        mach->msgh.msgh_id           = kIORPCVersionCurrent;
        mach->msgh.msgh_reserved     = 0;
 
+       boolean_t message_moved;
+
        if (oneway) {
-               ret = mach_msg_send_from_kernel(&mach->msgh, sendSize);
+               ret = kernel_mach_msg_send(&mach->msgh, sendSize,
+                   MACH_SEND_MSG | MACH_SEND_ALWAYS | MACH_SEND_NOIMPORTANCE,
+                   0, &message_moved);
        } else {
                assert(replySize >= (sizeof(IORPCMessageMach) + sizeof(IORPCMessage)));
-               ret = mach_msg_rpc_from_kernel(&mach->msgh, sendSize, replySize);
-               if (KERN_SUCCESS == ret) {
-                       if (kIORPCVersionCurrentReply != mach->msgh.msgh_id) {
-                               ret = (MACH_NOTIFY_SEND_ONCE == mach->msgh.msgh_id) ? MIG_SERVER_DIED : MIG_REPLY_MISMATCH;
-                       } else if ((replySize = mach->msgh.msgh_size) < (sizeof(IORPCMessageMach) + sizeof(IORPCMessage))) {
+               ret = kernel_mach_msg_rpc(&mach->msgh, sendSize, replySize, FALSE, &message_moved);
+       }
+
+       ipc_port_release_send(sendPort);
+
+       if (MACH_MSG_SUCCESS != ret) {
+               if (kIODKLogIPC & gIODKDebug) {
+                       DKLOG("mach_msg() failed 0x%x\n", ret);
+               }
+               if (!message_moved) {
+                       // release ports
+                       copyInObjects(mach, message, sendSize, false, true);
+               }
+       }
+
+       if ((KERN_SUCCESS == ret) && !oneway) {
+               if (kIORPCVersionCurrentReply != mach->msgh.msgh_id) {
+                       ret = (MACH_NOTIFY_SEND_ONCE == mach->msgh.msgh_id) ? MIG_SERVER_DIED : MIG_REPLY_MISMATCH;
+               } else if ((replySize = mach->msgh.msgh_size) < (sizeof(IORPCMessageMach) + sizeof(IORPCMessage))) {
 //                             printf("BAD REPLY SIZE\n");
+                       ret = MIG_BAD_ARGUMENTS;
+               } else {
+                       if (!(MACH_MSGH_BITS_COMPLEX & mach->msgh.msgh_bits)) {
+                               mach->msgh_body.msgh_descriptor_count = 0;
+                       }
+                       message = IORPCMessageFromMach(mach, true);
+                       if (!message) {
+                               ret = kIOReturnIPCError;
+                       } else if (message->msgid != msgid) {
+//                                     printf("BAD REPLY ID\n");
                                ret = MIG_BAD_ARGUMENTS;
                        } else {
-                               if (!(MACH_MSGH_BITS_COMPLEX & mach->msgh.msgh_bits)) {
-                                       mach->msgh_body.msgh_descriptor_count = 0;
-                               }
-                               message = IORPCMessageFromMach(mach, true);
-                               if (!message) {
-                                       ret = kIOReturnIPCError;
-                               } else if (message->msgid != msgid) {
-//                                     printf("BAD REPLY ID\n");
-                                       ret = MIG_BAD_ARGUMENTS;
-                               } else {
-                                       bool isError = (0 != (kIORPCMessageError & message->flags));
-                                       ret = copyInObjects(mach, message, replySize, !isError, true);
-                                       if (kIOReturnSuccess != ret) {
-                                               if (kIODKLogIPC & gIODKDebug) {
-                                                       DKLOG("rpc copyin(0x%x) %x\n", ret, mach->msgh.msgh_id);
-                                               }
-                                               return KERN_NOT_SUPPORTED;
-                                       }
-                                       if (isError) {
-                                               IORPCMessageErrorReturnContent * errorMsg = (typeof(errorMsg))message;
-                                               ret = errorMsg->result;
+                               bool isError = (0 != (kIORPCMessageError & message->flags));
+                               ret = copyInObjects(mach, message, replySize, !isError, true);
+                               if (kIOReturnSuccess != ret) {
+                                       if (kIODKLogIPC & gIODKDebug) {
+                                               DKLOG("rpc copyin(0x%x) %x\n", ret, mach->msgh.msgh_id);
                                        }
+                                       return KERN_NOT_SUPPORTED;
+                               }
+                               if (isError) {
+                                       IORPCMessageErrorReturnContent * errorMsg = (typeof(errorMsg))message;
+                                       ret = errorMsg->result;
                                }
                        }
                }
@@ -2471,6 +2802,9 @@ IOUserClient * IOUserServer::withTask(task_t owningTask)
                }
        }
 
+       /* Mark the current task's space as eligible for uext object ports */
+       iokit_label_dext_task(inst->fOwningTask);
+
        inst->fLock     = IOLockAlloc();
        inst->fServices = OSArray::withCapacity(4);
        inst->fClasses  = OSDictionary::withCapacity(16);
@@ -2622,12 +2956,21 @@ IOUserServer::registerClass(OSClassDescription * desc, uint32_t size, OSUserMeta
 
        cls->name = sym;
        cls->meta = OSMetaClass::copyMetaClassWithName(sym);
+       IOLockLock(fLock);
        cls->superMeta = OSDynamicCast(OSUserMetaClass, fClasses->getObject(desc->superName));
-       fClasses->setObject(sym, cls);
+       if (fClasses->getObject(sym) != NULL) {
+               /* class with this name exists */
+               ret = kIOReturnBadArgument;
+       } else {
+               if (fClasses->setObject(sym, cls)) {
+                       *pCls = cls;
+               } else {
+                       /* could not add class to fClasses */
+                       ret = kIOReturnNoMemory;
+               }
+       }
+       IOLockUnlock(fLock);
        cls->release();
-
-       *pCls = cls;
-
        return ret;
 }
 
@@ -2807,13 +3150,6 @@ IOUserServer::serviceNewUserClient(IOService * service, task_t owningTask, void
                }
        }
 
-       ret = userUC->Start(service);
-       if (kIOReturnSuccess != ret) {
-               userUC->detach(this);
-               userUC->release();
-               return ret;
-       }
-
        *handler = userUC;
 
        return ret;
@@ -2971,6 +3307,7 @@ IMPL(IOService, Create)
                        service = OSDynamicCast(IOService, inst);
                        if (service && service->init(properties) && service->attach(this)) {
                                reserved->uvars->userServer->serviceAttach(service, this);
+                               service->reserved->uvars->started = true;
                                ret = kIOReturnSuccess;
                                *result = service;
                        }
@@ -2986,6 +3323,25 @@ IMPL(IOService, Create)
        return ret;
 }
 
+kern_return_t
+IMPL(IOService, Terminate)
+{
+       IOUserServer * us;
+
+       if (options) {
+               return kIOReturnUnsupported;
+       }
+
+       us = (typeof(us))thread_iokit_tls_get(0);
+       if (!reserved->uvars
+           || (reserved->uvars->userServer != us)) {
+               return kIOReturnNotPermitted;
+       }
+       terminate(kIOServiceTerminateNeedWillTerminate);
+
+       return kIOReturnSuccess;
+}
+
 kern_return_t
 IMPL(IOService, NewUserClient)
 {
@@ -3009,6 +3365,80 @@ IMPL(IOService, SearchProperty)
        return object ? kIOReturnSuccess : kIOReturnNotFound;
 }
 
+kern_return_t
+IMPL(IOService, CopyProviderProperties)
+{
+       IOReturn    ret;
+       OSArray   * result;
+       IOService * provider;
+
+       result = OSArray::withCapacity(8);
+       if (!result) {
+               return kIOReturnNoMemory;
+       }
+
+       ret = kIOReturnSuccess;
+       for (provider = this; provider; provider = provider->getProvider()) {
+               OSObject     * obj;
+               OSDictionary * props;
+
+               obj = provider->copyProperty(gIOSupportedPropertiesKey);
+               props = OSDynamicCast(OSDictionary, obj);
+               if (!props) {
+                       OSSafeReleaseNULL(obj);
+                       props = provider->dictionaryWithProperties();
+               }
+               if (!props) {
+                       ret = kIOReturnNoMemory;
+                       break;
+               }
+               bool __block addClass = true;
+               if (propertyKeys) {
+                       OSDictionary * retProps;
+                       retProps = OSDictionary::withCapacity(4);
+                       addClass = false;
+                       if (!retProps) {
+                               ret = kIOReturnNoMemory;
+                               break;
+                       }
+                       propertyKeys->iterateObjects(^bool (OSObject * _key) {
+                               OSString * key = OSDynamicCast(OSString, _key);
+                               if (gIOClassKey->isEqualTo(key)) {
+                                       addClass = true;
+                                       return false;
+                               }
+                               retProps->setObject(key, props->getObject(key));
+                               return false;
+                       });
+                       OSSafeReleaseNULL(props);
+                       props = retProps;
+               }
+               if (addClass) {
+                       OSArray * classes = OSArray::withCapacity(8);
+                       if (!classes) {
+                               ret = kIOReturnNoMemory;
+                               break;
+                       }
+                       for (const OSMetaClass * meta = provider->getMetaClass(); meta; meta = meta->getSuperClass()) {
+                               classes->setObject(meta->getClassNameSymbol());
+                       }
+                       props->setObject(gIOClassKey, classes);
+                       OSSafeReleaseNULL(classes);
+               }
+               bool ok = result->setObject(props);
+               props->release();
+               if (!ok) {
+                       ret = kIOReturnNoMemory;
+                       break;
+               }
+       }
+       if (kIOReturnSuccess != ret) {
+               OSSafeReleaseNULL(result);
+       }
+       *properties = result;
+       return ret;
+}
+
 void
 IOUserServer::systemPower(bool powerOff)
 {
@@ -3182,8 +3612,6 @@ IOUserServer::serviceStop(IOService * service, IOService *)
                return kIOReturnSuccess;
        }
 
-       IOMachPortDestroyUserReferences(service, IKOT_UEXT_OBJECT);
-
        if (uvars->queueArray && uvars->userMeta) {
                queueAlloc = 1;
                if (uvars->userMeta->queueNames) {
@@ -3303,12 +3731,6 @@ IMPL(IOService, Stop)
        return kIOReturnSuccess;
 }
 
-kern_return_t
-IMPL(IOInterruptDispatchSource, Cancel)
-{
-       return kIOReturnUnsupported;
-}
-
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
 #undef super
@@ -3340,7 +3762,7 @@ IOUserUserClient::stop(IOService * provider)
 IOReturn
 IOUserUserClient::clientClose(void)
 {
-       terminate();
+       terminate(kIOServiceTerminateNeedWillTerminate);
        return kIOReturnSuccess;
 }
 
index 9b2578710346dea730f418479d65731b867245a9..77b47578c5b2e5b11593978cdccd5f13af2ddb81 100644 (file)
@@ -21,6 +21,7 @@ OPTIONS/mach_assert                           optional mach_assert
 ./DriverKit/IODispatchQueue.iig.cpp            optional iokitcpp
 ./DriverKit/IOInterruptDispatchSource.iig.cpp          optional iokitcpp
 ./DriverKit/IODataQueueDispatchSource.iig.cpp          optional iokitcpp
+./DriverKit/IOServiceNotificationDispatchSource.iig.cpp                optional iokitcpp
 ./DriverKit/IOUserServer.iig.cpp               optional iokitcpp
 
 # libIOKit
index 0d564de95e67b6b51c40c498a140f7e0e8a29d9d..6015d46838702e693fa9eb62b7a0e1c1debea1be 100644 (file)
@@ -1391,7 +1391,6 @@ OSMetaClass::getSuperClass() const
 }
 
 /*********************************************************************
-* xxx - I want to rename this :-/
 *********************************************************************/
 const OSSymbol *
 OSMetaClass::getKmodName() const
@@ -1403,6 +1402,14 @@ OSMetaClass::getKmodName() const
         return OSSymbol::withCStringNoCopy("unknown");
 }
 
+/*********************************************************************
+*********************************************************************/
+OSKext *
+OSMetaClass::getKext() const
+{
+        return reserved ? reserved->kext : NULL;
+}
+
 /*********************************************************************
 *********************************************************************/
 unsigned int
index b9011d07225ec4bf4506849d66bc3081b8d9ae1d..f602dfd7d343d7a42a3a695239fc1f55cbbcd18b 100644 (file)
@@ -335,6 +335,15 @@ __BEGIN_DECLS
  */
 #define kOSBundleAllowUserLoadKey               "OSBundleAllowUserLoad"
 
+/*!
+ * @define   kOSBundleAllowUserTerminateKey
+ * @abstract A boolean value indicating whether the kextunload tool
+ *           is allowed to issue IOService terminate to classes defined in this kext.
+ * @discussion A boolean value indicating whether the kextunload tool
+ *           is allowed to issue IOService terminate to classes defined in this kext.
+ */
+#define kOSBundleAllowUserTerminateKey          "OSBundleAllowUserTerminate"
+
 /*!
  * @define   kOSKernelResourceKey
  * @abstract A boolean value indicating whether the kext represents a built-in
index 03da0e6c29a9975fc2fe06f23f753bcb34d70719..b9688fd51970fff442ff036cc96caa3cb498367a 100644 (file)
@@ -136,6 +136,7 @@ class OSSerialize;
 #ifdef XNU_KERNEL_PRIVATE
 class OSOrderedSet;
 class OSCollection;
+class OSKext;
 #endif /* XNU_KERNEL_PRIVATE */
 struct IORPC;
 class OSInterface
@@ -1729,6 +1730,7 @@ public:
        virtual OSObject * alloc() const = 0;
 
 #ifdef XNU_KERNEL_PRIVATE
+       OSKext * getKext() const;
        void addInstance(const OSObject * instance, bool super = false) const;
        void removeInstance(const OSObject * instance, bool super = false) const;
        void applyToInstances(OSMetaClassInstanceApplierFunction applier,
index 57d67509466e616870fe0f203631cc25dbd7ca86..62585338d72560d88739fe7e3b7ae20533c4d522 100644 (file)
@@ -638,6 +638,21 @@ mach_port_kobject(
        return rv;
 }
 
+kern_return_t
+mach_port_kobject_description(
+       ipc_space_t task,
+       mach_port_name_t name,
+       natural_t *object_type,
+       mach_vm_address_t *object_addr,
+       kobject_description_t desc)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_kobject_description(task, name, object_type, object_addr, desc);
+
+       return rv;
+}
+
 kern_return_t
 mach_port_construct(
        ipc_space_t             task,
index 30a31c68ccefb8c769e6b7e2df6ffaa06d94a230..8ebcc87c87153ec4da057025d64522e7fbbd1424 100644 (file)
@@ -87,6 +87,18 @@ _pthread_clear_qos_tsd(mach_port_t thread_port)
        }
 }
 
+__attribute__((visibility("hidden")))
+int
+pthread_current_stack_contains_np(const void *addr, size_t len)
+{
+       if (_libkernel_functions->version >= 4 &&
+           _libkernel_functions->pthread_current_stack_contains_np) {
+               return _libkernel_functions->pthread_current_stack_contains_np(addr, len);
+       }
+
+       return 0;
+}
+
 /*
  * Upcalls to optimized libplatform string functions
  */
index 514afef02fc8bc6c2cd9bec3f2cf38f5b8ac831f..42aba7be44738d375daac70f574c29ab3a658aaf 100644 (file)
@@ -64,6 +64,9 @@ typedef const struct _libkernel_functions {
        /* The following functions are included in version 3 of this structure */
        void (*pthread_clear_qos_tsd)(mach_port_t);
 
+       /* The following functions are included in version 4 of this structure */
+       int (*pthread_current_stack_contains_np)(const void *, size_t);
+
        /* Subsequent versions must only add pointers! */
 } *_libkernel_functions_t;
 
index ae5585b048ed24a32e279881c5aea5959995009f..73dfc1a3e02d2c9b3679039f1b6be5cd1b47310e 100644 (file)
@@ -943,6 +943,27 @@ posix_spawnattr_setspecialport_np(
        return posix_spawn_appendportaction_np(attr, &action);
 }
 
+/*
+ * posix_spawnattr_setsuidcredport_np
+ *
+ * Description:        Set an suid cred port to be used to execute with a different UID.
+ *
+ * Parameters: attr                    The spawn attributes object for the
+ *                                      new process
+ *              port                   The suid cred port
+ *
+ * Returns:    0                       Success
+ */
+int
+posix_spawnattr_setsuidcredport_np(posix_spawnattr_t *attr, mach_port_t port)
+{
+       _ps_port_action_t action = {
+               .port_type = PSPA_SUID_CRED,
+               .new_port = port,
+       };
+       return posix_spawn_appendportaction_np(attr, &action);
+}
+
 /*
  * posix_spawnattr_setexceptionports_np
  *
index 1b83c9d960fd75ab764c385b10bafdb15d2ab87d..1bc1171fde1881c2eda7d9a5ca4793184a94d644 100644 (file)
@@ -148,6 +148,8 @@ int     posix_spawnattr_setexceptionports_np(posix_spawnattr_t * __restrict,
 int     posix_spawnattr_setspecialport_np(posix_spawnattr_t * __restrict,
     mach_port_t, int) __API_AVAILABLE(macos(10.5), ios(2.0)) __SPI_AVAILABLE(watchos(2.0), tvos(9.0), bridgeos(1.0));
 
+int     posix_spawnattr_setsuidcredport_np(posix_spawnattr_t * __restrict, mach_port_t) __SPI_AVAILABLE(ios(13.0), macos(10.15));
+
 int     posix_spawn_file_actions_addinherit_np(posix_spawn_file_actions_t *,
     int) __API_AVAILABLE(macos(10.7), ios(4.3)) __SPI_AVAILABLE(watchos(2.0), tvos(9.0), bridgeos(1.0));
 
index dd7719b471d1ebd4f5e8911d2c6ba0275e255311..9b46ed9403abdb0c8528477b2f380e6aa377e212 100644 (file)
 #include <sys/types.h>
 #include <stdint.h>
 #include <signal.h>
+#include <os/reason_private.h>
 #include <unistd.h>
 
+/* Crash simulation */
+
+extern int pthread_current_stack_contains_np(const void *, unsigned long);
+int
+__darwin_check_fd_set_overflow(int n, const void *fd_set, int unlimited_select)
+{
+       if (n < 0) {
+               os_fault_with_payload(OS_REASON_LIBSYSTEM, OS_REASON_LIBSYSTEM_CODE_FAULT,
+                   &n, sizeof(n), "FD_SET underflow", 0);
+               return 0;
+       }
+
+       if (n >= __DARWIN_FD_SETSIZE) {
+               if (pthread_current_stack_contains_np((const void *) fd_set, sizeof(struct fd_set))) {
+                       if (!unlimited_select) {
+                               os_fault_with_payload(OS_REASON_LIBSYSTEM, OS_REASON_LIBSYSTEM_CODE_FAULT,
+                                   &n, sizeof(n), "FD_SET overflow", 0);
+                               return 0;
+                       } else {
+                               return 1;
+                       }
+               } else {
+                       return 1;
+               }
+       }
+
+       return 1;
+}
+
 /* System call entry points */
 int __terminate_with_payload(int pid, uint32_t reason_namespace, uint64_t reason_code,
     void *payload, uint32_t payload_size, const char *reason_string,
index b5ddcad612d446048db93e48e3ee745d72eb8a26..ae727ba022f986ffebb5c1dc460776201c06124f 100644 (file)
@@ -402,7 +402,7 @@ convert_port_to_UNDReply(
                        ip_unlock(port);
                        return UND_REPLY_NULL;
                }
-               reply = (UNDReplyRef) port->ip_kobject;
+               reply = (UNDReplyRef) ip_get_kobject(port);
                assert(reply != UND_REPLY_NULL);
                ip_unlock(port);
                return reply;
index 965ba291d5a20344bcba8bb5bd8cc2e8a1607df0..4c25645deb6ee49b2a1afc9dfdbd0281b29f11d2 100644 (file)
@@ -375,8 +375,10 @@ arm_init(
            + ((uintptr_t)&BootCpuData
            - (uintptr_t)(args->virtBase)));
 
-       thread_bootstrap();
-       thread = current_thread();
+       thread = thread_bootstrap();
+       thread->machine.CpuDatap = &BootCpuData;
+       machine_set_current_thread(thread);
+
        /*
         * Preemption is enabled for this thread so that it can lock mutexes without
         * tripping the preemption check. In reality scheduling is not enabled until
@@ -384,7 +386,6 @@ arm_init(
         * preemption level is not really meaningful for the bootstrap thread.
         */
        thread->machine.preemption_count = 0;
-       thread->machine.CpuDatap = &BootCpuData;
 #if     __arm__ && __ARM_USER_PROTECT__
        {
                unsigned int ttbr0_val, ttbr1_val, ttbcr_val;
index 6f4d332fc5154f481612a6da851477b395736789..f7c8dcbd690a93dccc4363912cd7758278f74ae0 100644 (file)
@@ -155,6 +155,8 @@ LEXT(Switch_context)
        add             r3, r3, SS_R4
        stmia           r3!, {r4-r14}                                   // Save general registers to pcb
 switch_threads:
+       ldr             r3, [r2, ACT_CPUDATAP]
+       str             r2, [r3, CPU_ACTIVE_THREAD]
        ldr             r3, [r2, TH_KSTACKPTR]                          // get kernel stack top
        mcr             p15, 0, r2, c13, c0, 4                          // Write TPIDRPRW
        ldr             r6, [r2, TH_CTH_SELF]
index ce0e69e4258035932a73b3848214c1164cd75d8f..1fbddf86dc9b1761dd934c3a659251f48eec8ffe 100644 (file)
@@ -111,6 +111,8 @@ typedef struct _lck_mtx_ {
 #define LCK_FRAMES_MAX  8
 
 extern uint64_t         MutexSpin;
+extern uint64_t         low_MutexSpin;
+extern int64_t          high_MutexSpin;
 
 typedef struct {
        unsigned int            type;
index 49a261f31c434fbab97525e39bba554e5d6afd28..7fc463e633dc73990458ab47f98200ce58e46e58 100644 (file)
@@ -73,6 +73,9 @@
 #include <kern/debug.h>
 #include <kern/kcdata.h>
 #include <string.h>
+#include <arm/cpu_internal.h>
+#include <os/hash.h>
+#include <arm/cpu_data.h>
 
 #include <arm/cpu_data_internal.h>
 #include <arm/proc_reg.h>
@@ -117,7 +120,10 @@ int lck_mtx_adaptive_spin_mode = 0;
 typedef enum {
        SPINWAIT_ACQUIRED,     /* Got the lock. */
        SPINWAIT_INTERLOCK,    /* Got the interlock, no owner, but caller must finish acquiring the lock. */
-       SPINWAIT_DID_SPIN,     /* Got the interlock, spun, but failed to get the lock. */
+       SPINWAIT_DID_SPIN_HIGH_THR, /* Got the interlock, spun, but failed to get the lock. */
+       SPINWAIT_DID_SPIN_OWNER_NOT_CORE, /* Got the interlock, spun, but failed to get the lock. */
+       SPINWAIT_DID_SPIN_NO_WINDOW_CONTENTION, /* Got the interlock, spun, but failed to get the lock. */
+       SPINWAIT_DID_SPIN_SLIDING_THR,/* Got the interlock, spun, but failed to get the lock. */
        SPINWAIT_DID_NOT_SPIN, /* Got the interlock, did not spin. */
 } spinwait_result_t;
 
@@ -428,32 +434,6 @@ get_preemption_level(void)
        return current_thread()->machine.preemption_count;
 }
 
-#if __SMP__
-static inline boolean_t
-interlock_try_disable_interrupts(
-       lck_mtx_t *mutex,
-       boolean_t *istate)
-{
-       *istate = ml_set_interrupts_enabled(FALSE);
-
-       if (interlock_try(mutex)) {
-               return 1;
-       } else {
-               ml_set_interrupts_enabled(*istate);
-               return 0;
-       }
-}
-
-static inline void
-interlock_unlock_enable_interrupts(
-       lck_mtx_t *mutex,
-       boolean_t istate)
-{
-       interlock_unlock(mutex);
-       ml_set_interrupts_enabled(istate);
-}
-#endif /* __SMP__ */
-
 /*
  *      Routine:        lck_spin_alloc_init
  */
@@ -2293,14 +2273,15 @@ lck_mtx_lock_contended_spinwait_arm(lck_mtx_t *lock, thread_t thread, boolean_t
        int                     has_interlock = (int)interlocked;
 #if __SMP__
        __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
-       thread_t                holder;
-       uint64_t                overall_deadline;
-       uint64_t                check_owner_deadline;
-       uint64_t                cur_time;
-       spinwait_result_t       retval = SPINWAIT_DID_SPIN;
-       int                     loopcount = 0;
-       uintptr_t               state;
-       boolean_t               istate;
+       thread_t        owner, prev_owner;
+       uint64_t        window_deadline, sliding_deadline, high_deadline;
+       uint64_t        start_time, cur_time, avg_hold_time, bias, delta;
+       int             loopcount = 0;
+       uint            i, prev_owner_cpu;
+       int             total_hold_time_samples, window_hold_time_samples, unfairness;
+       bool            owner_on_core, adjust;
+       uintptr_t       state, new_state, waiters;
+       spinwait_result_t       retval = SPINWAIT_DID_SPIN_HIGH_THR;
 
        if (__improbable(!(lck_mtx_adaptive_spin_mode & ADAPTIVE_SPIN_ENABLE))) {
                if (!has_interlock) {
@@ -2310,101 +2291,290 @@ lck_mtx_lock_contended_spinwait_arm(lck_mtx_t *lock, thread_t thread, boolean_t
                return SPINWAIT_DID_NOT_SPIN;
        }
 
-       state = ordered_load_mtx(lock);
-
        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
            trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(LCK_MTX_STATE_TO_THREAD(state)), lock->lck_mtx_waiters, 0, 0);
 
-       cur_time = mach_absolute_time();
-       overall_deadline = cur_time + MutexSpin;
-       check_owner_deadline = cur_time;
-
-       if (has_interlock) {
-               istate = ml_get_interrupts_enabled();
+       start_time = mach_absolute_time();
+       /*
+        * window_deadline represents the "learning" phase.
+        * The thread collects statistics about the lock during
+        * window_deadline and then it makes a decision on whether to spin more
+        * or block according to the concurrency behavior
+        * observed.
+        *
+        * Every thread can spin at least low_MutexSpin.
+        */
+       window_deadline = start_time + low_MutexSpin;
+       /*
+        * Sliding_deadline is the adjusted spin deadline
+        * computed after the "learning" phase.
+        */
+       sliding_deadline = window_deadline;
+       /*
+        * High_deadline is a hard deadline. No thread
+        * can spin more than this deadline.
+        */
+       if (high_MutexSpin >= 0) {
+               high_deadline = start_time + high_MutexSpin;
+       } else {
+               high_deadline = start_time + low_MutexSpin * real_ncpus;
        }
 
+       /*
+        * Do not know yet which is the owner cpu.
+        * Initialize prev_owner_cpu with next cpu.
+        */
+       prev_owner_cpu = (cpu_number() + 1) % real_ncpus;
+       total_hold_time_samples = 0;
+       window_hold_time_samples = 0;
+       avg_hold_time = 0;
+       adjust = TRUE;
+       bias = (os_hash_kernel_pointer(lock) + cpu_number()) % real_ncpus;
+
        /* Snoop the lock state */
        state = ordered_load_mtx(lock);
+       owner = LCK_MTX_STATE_TO_THREAD(state);
+       prev_owner = owner;
+
+       if (has_interlock) {
+               if (owner == NULL) {
+                       retval = SPINWAIT_INTERLOCK;
+                       goto done_spinning;
+               } else {
+                       /*
+                        * We are holding the interlock, so
+                        * we can safely dereference owner.
+                        */
+                       if (!(owner->machine.machine_thread_flags & MACHINE_THREAD_FLAGS_ON_CPU) ||
+                           (owner->state & TH_IDLE)) {
+                               retval = SPINWAIT_DID_NOT_SPIN;
+                               goto done_spinning;
+                       }
+               }
+               interlock_unlock(lock);
+               has_interlock = 0;
+       }
 
        /*
         * Spin while:
         *   - mutex is locked, and
         *   - it's locked as a spin lock, and
         *   - owner is running on another processor, and
-        *   - owner (processor) is not idling, and
         *   - we haven't spun for long enough.
         */
        do {
-               if (!(state & LCK_ILOCK) || has_interlock) {
-                       if (!has_interlock) {
-                               has_interlock = interlock_try_disable_interrupts(lock, &istate);
+               /*
+                * Try to acquire the lock.
+                */
+               owner = LCK_MTX_STATE_TO_THREAD(state);
+               if (owner == NULL) {
+                       waiters = state & ARM_LCK_WAITERS;
+                       if (waiters) {
+                               /*
+                                * preserve the waiter bit
+                                * and try acquire the interlock.
+                                * Note: we will successfully acquire
+                                * the interlock only if we can also
+                                * acquire the lock.
+                                */
+                               new_state = ARM_LCK_WAITERS | LCK_ILOCK;
+                               has_interlock = 1;
+                               retval = SPINWAIT_INTERLOCK;
+                               disable_preemption();
+                       } else {
+                               new_state = LCK_MTX_THREAD_TO_STATE(thread);
+                               retval = SPINWAIT_ACQUIRED;
+                       }
+
+                       /*
+                        * The cmpxchg will succed only if the lock
+                        * is not owned (doesn't have an owner set)
+                        * and it is not interlocked.
+                        * It will not fail if there are waiters.
+                        */
+                       if (os_atomic_cmpxchgv(&lock->lck_mtx_data,
+                           waiters, new_state, &state, acquire)) {
+                               goto done_spinning;
+                       } else {
+                               if (waiters) {
+                                       has_interlock = 0;
+                                       enable_preemption();
+                               }
                        }
+               }
 
-                       if (has_interlock) {
-                               state = ordered_load_mtx(lock);
-                               holder = LCK_MTX_STATE_TO_THREAD(state);
+               cur_time = mach_absolute_time();
 
-                               if (holder == NULL) {
-                                       retval = SPINWAIT_INTERLOCK;
+               /*
+                * Never spin past high_deadline.
+                */
+               if (cur_time >= high_deadline) {
+                       retval = SPINWAIT_DID_SPIN_HIGH_THR;
+                       break;
+               }
 
-                                       if (istate) {
-                                               ml_set_interrupts_enabled(istate);
-                                       }
+               /*
+                * Check if owner is on core. If not block.
+                */
+               owner = LCK_MTX_STATE_TO_THREAD(state);
+               if (owner) {
+                       i = prev_owner_cpu;
+                       owner_on_core = FALSE;
 
-                                       break;
-                               }
+                       disable_preemption();
+                       state = ordered_load_mtx(lock);
+                       owner = LCK_MTX_STATE_TO_THREAD(state);
 
-                               if (!(holder->machine.machine_thread_flags & MACHINE_THREAD_FLAGS_ON_CPU) ||
-                                   (holder->state & TH_IDLE)) {
-                                       if (loopcount == 0) {
-                                               retval = SPINWAIT_DID_NOT_SPIN;
+                       /*
+                        * For scalability we want to check if the owner is on core
+                        * without locking the mutex interlock.
+                        * If we do not lock the mutex interlock, the owner that we see might be
+                        * invalid, so we cannot dereference it. Therefore we cannot check
+                        * any field of the thread to tell us if it is on core.
+                        * Check if the thread that is running on the other cpus matches the owner.
+                        */
+                       if (owner) {
+                               do {
+                                       cpu_data_t *cpu_data_ptr = CpuDataEntries[i].cpu_data_vaddr;
+                                       if ((cpu_data_ptr != NULL) && (cpu_data_ptr->cpu_active_thread == owner)) {
+                                               owner_on_core = TRUE;
+                                               break;
                                        }
-
-                                       if (istate) {
-                                               ml_set_interrupts_enabled(istate);
+                                       if (++i >= real_ncpus) {
+                                               i = 0;
                                        }
-
-                                       break;
+                               } while (i != prev_owner_cpu);
+                               enable_preemption();
+
+                               if (owner_on_core) {
+                                       prev_owner_cpu = i;
+                               } else {
+                                       prev_owner = owner;
+                                       state = ordered_load_mtx(lock);
+                                       owner = LCK_MTX_STATE_TO_THREAD(state);
+                                       if (owner == prev_owner) {
+                                               /*
+                                                * Owner is not on core.
+                                                * Stop spinning.
+                                                */
+                                               if (loopcount == 0) {
+                                                       retval = SPINWAIT_DID_NOT_SPIN;
+                                               } else {
+                                                       retval = SPINWAIT_DID_SPIN_OWNER_NOT_CORE;
+                                               }
+                                               break;
+                                       }
+                                       /*
+                                        * Fall through if the owner changed while we were scanning.
+                                        * The new owner could potentially be on core, so loop
+                                        * again.
+                                        */
                                }
-
-                               interlock_unlock_enable_interrupts(lock, istate);
-                               has_interlock = 0;
+                       } else {
+                               enable_preemption();
                        }
                }
 
-               cur_time = mach_absolute_time();
-
-               if (cur_time >= overall_deadline) {
-                       break;
+               /*
+                * Save how many times we see the owner changing.
+                * We can roughly estimate the the mutex hold
+                * time and the fairness with that.
+                */
+               if (owner != prev_owner) {
+                       prev_owner = owner;
+                       total_hold_time_samples++;
+                       window_hold_time_samples++;
                }
 
-               check_owner_deadline = cur_time + (MutexSpin / SPINWAIT_OWNER_CHECK_COUNT);
+               /*
+                * Learning window expired.
+                * Try to adjust the sliding_deadline.
+                */
+               if (cur_time >= window_deadline) {
+                       /*
+                        * If there was not contention during the window
+                        * stop spinning.
+                        */
+                       if (window_hold_time_samples < 1) {
+                               retval = SPINWAIT_DID_SPIN_NO_WINDOW_CONTENTION;
+                               break;
+                       }
+
+                       if (adjust) {
+                               /*
+                                * For a fair lock, we'd wait for at most (NCPU-1) periods,
+                                * but the lock is unfair, so let's try to estimate by how much.
+                                */
+                               unfairness = total_hold_time_samples / real_ncpus;
+
+                               if (unfairness == 0) {
+                                       /*
+                                        * We observed the owner changing `total_hold_time_samples` times which
+                                        * let us estimate the average hold time of this mutex for the duration
+                                        * of the spin time.
+                                        * avg_hold_time = (cur_time - start_time) / total_hold_time_samples;
+                                        *
+                                        * In this case spin at max avg_hold_time * (real_ncpus - 1)
+                                        */
+                                       delta = cur_time - start_time;
+                                       sliding_deadline = start_time + (delta * (real_ncpus - 1)) / total_hold_time_samples;
+                               } else {
+                                       /*
+                                        * In this case at least one of the other cpus was able to get the lock twice
+                                        * while I was spinning.
+                                        * We could spin longer but it won't necessarily help if the system is unfair.
+                                        * Try to randomize the wait to reduce contention.
+                                        *
+                                        * We compute how much time we could potentially spin
+                                        * and distribute it over the cpus.
+                                        *
+                                        * bias is an integer between 0 and real_ncpus.
+                                        * distributed_increment = ((high_deadline - cur_time) / real_ncpus) * bias
+                                        */
+                                       delta = high_deadline - cur_time;
+                                       sliding_deadline = cur_time + ((delta * bias) / real_ncpus);
+                                       adjust = FALSE;
+                               }
+                       }
 
-               if (cur_time < check_owner_deadline) {
-                       machine_delay_until(check_owner_deadline - cur_time, check_owner_deadline);
+                       window_deadline += low_MutexSpin;
+                       window_hold_time_samples = 0;
                }
 
-               /* Snoop the lock state */
-               state = ordered_load_mtx(lock);
+               /*
+                * Stop spinning if we past
+                * the adjusted deadline.
+                */
+               if (cur_time >= sliding_deadline) {
+                       retval = SPINWAIT_DID_SPIN_SLIDING_THR;
+                       break;
+               }
 
-               if (state == 0) {
-                       /* Try to grab the lock. */
-                       if (os_atomic_cmpxchg(&lock->lck_mtx_data,
-                           0, LCK_MTX_THREAD_TO_STATE(thread), acquire)) {
-                               retval = SPINWAIT_ACQUIRED;
-                               break;
-                       }
+               /*
+                * We want to arm the monitor for wfe,
+                * so load exclusively the lock.
+                *
+                * NOTE:
+                * we rely on the fact that wfe will
+                * eventually return even if the cache line
+                * is not modified. This way we will keep
+                * looping and checking if the deadlines expired.
+                */
+               state = os_atomic_load_exclusive(&lock->lck_mtx_data, relaxed);
+               owner = LCK_MTX_STATE_TO_THREAD(state);
+               if (owner != NULL) {
+                       wait_for_event();
+                       state = ordered_load_mtx(lock);
+               } else {
+                       atomic_exchange_abort();
                }
 
                loopcount++;
        } while (TRUE);
 
+done_spinning:
 #if     CONFIG_DTRACE
        /*
-        * We've already kept a count via overall_deadline of how long we spun.
-        * If dtrace is active, then we compute backwards to decide how
-        * long we spun.
-        *
         * Note that we record a different probe id depending on whether
         * this is a direct or indirect mutex.  This allows us to
         * penalize only lock groups that have debug/stats enabled
@@ -2412,10 +2582,10 @@ lck_mtx_lock_contended_spinwait_arm(lck_mtx_t *lock, thread_t thread, boolean_t
         */
        if (__probable(lock->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)) {
                LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, lock,
-                   mach_absolute_time() - (overall_deadline - MutexSpin));
+                   mach_absolute_time() - start_time);
        } else {
                LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, lock,
-                   mach_absolute_time() - (overall_deadline - MutexSpin));
+                   mach_absolute_time() - start_time);
        }
        /* The lockstat acquire event is recorded by the caller. */
 #endif
@@ -2437,6 +2607,7 @@ lck_mtx_lock_contended_spinwait_arm(lck_mtx_t *lock, thread_t thread, boolean_t
        return retval;
 }
 
+
 /*
  *     Common code for mutex locking as spinlock
  */
index df89b75005c065994c113a1304d5c9042d7d4aa3..1a21043f409d0786f6108821875126df3deedc15 100644 (file)
@@ -68,6 +68,9 @@ uint32_t LockTimeOut;
 uint32_t LockTimeOutUsec;
 uint64_t TLockTimeOut;
 uint64_t MutexSpin;
+uint64_t low_MutexSpin;
+int64_t  high_MutexSpin;
+
 boolean_t is_clock_configured = FALSE;
 
 #if CONFIG_NONFATAL_ASSERTS
@@ -218,6 +221,15 @@ ml_init_lock_timeout(void)
                nanoseconds_to_absolutetime(10 * NSEC_PER_USEC, &abstime);
        }
        MutexSpin = abstime;
+       low_MutexSpin = MutexSpin;
+       /*
+        * high_MutexSpin should be initialized as low_MutexSpin * real_ncpus, but
+        * real_ncpus is not set at this time
+        *
+        * NOTE: active spinning is disabled in arm. It can be activated
+        * by setting high_MutexSpin through the sysctl.
+        */
+       high_MutexSpin = low_MutexSpin;
 }
 
 /*
index 9d1896393b852f57e67fe0443bf9849e0b431cd5..5b475d4fd09ed1e3c9ea9a840c8f7984cc2329f6 100644 (file)
@@ -35,6 +35,8 @@
        .align  2
        .globl  EXT(machine_set_current_thread)
 LEXT(machine_set_current_thread)
+       ldr             r1, [r0, ACT_CPUDATAP]
+       str             r0, [r1, CPU_ACTIVE_THREAD]
        mcr             p15, 0, r0, c13, c0, 4                          // Write TPIDRPRW
        ldr             r1, [r0, TH_CTH_SELF]
        mrc             p15, 0, r2, c13, c0, 3                          // Read TPIDRURO
index b78a1be8ea4a176dddfe2895758e94a06abc87f1..f42c4f4e10a3840542c3bf68e9e8e6d57c04d49c 100644 (file)
@@ -193,6 +193,15 @@ machine_thread_init(void)
                                         "arm debug state");
 }
 
+/*
+ * Routine:    machine_thread_template_init
+ *
+ */
+void
+machine_thread_template_init(thread_t __unused thr_template)
+{
+       /* Nothing to do on this platform. */
+}
 
 /*
  * Routine:    get_useraddr
index 003be491f679ff098aa08563668e41c3609caee3..2f01b468107c17153a4c1c7fcd5436190a1e77e2 100644 (file)
@@ -3923,7 +3923,7 @@ __unused pte_to_xprr_perm(pt_entry_t pte)
        case APRR_USER_RW_INDEX:  return XPRR_USER_RW_PERM;
        case APRR_PPL_RX_INDEX:   return XPRR_PPL_RX_PERM;
        case APRR_KERN_RX_INDEX:  return XPRR_KERN_RX_PERM;
-       case APRR_PPL_RO_INDEX:   return XPRR_PPL_RO_PERM;
+       case APRR_USER_XO_INDEX:  return XPRR_USER_XO_PERM;
        case APRR_KERN_RO_INDEX:  return XPRR_KERN_RO_PERM;
        case APRR_KERN0_RX_INDEX: return XPRR_KERN0_RO_PERM;
        case APRR_KERN0_RO_INDEX: return XPRR_KERN0_RO_PERM;
@@ -3951,7 +3951,7 @@ xprr_perm_to_aprr_index(uint64_t perm)
        case XPRR_USER_RW_PERM:  return APRR_USER_RW_INDEX;
        case XPRR_PPL_RX_PERM:   return APRR_PPL_RX_INDEX;
        case XPRR_KERN_RX_PERM:  return APRR_KERN_RX_INDEX;
-       case XPRR_PPL_RO_PERM:   return APRR_PPL_RO_INDEX;
+       case XPRR_USER_XO_PERM:  return APRR_USER_XO_INDEX;
        case XPRR_KERN_RO_PERM:  return APRR_KERN_RO_INDEX;
        case XPRR_KERN0_RX_PERM: return APRR_KERN0_RO_INDEX;
        case XPRR_KERN0_RO_PERM: return APRR_KERN0_RO_INDEX;
@@ -4643,8 +4643,18 @@ pmap_static_allocations_done(void)
        monitor_start_pa = BootArgs->topOfKernelData;
        monitor_end_pa = BootArgs->topOfKernelData + BOOTSTRAP_TABLE_SIZE;
 
-       /* The bootstrap page tables are mapped RO at boostrap. */
-       pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RO_PERM, XPRR_PPL_RO_PERM);
+       /*
+        * The bootstrap page tables are mapped RO at boostrap.
+        *
+        * Note that this function call requests switching XPRR permissions from
+        * XPRR_KERN_RO_PERM to XPRR_KERN_RO_PERM. Whilst this may seem redundant,
+        * pa_set_range_xprr_perm() does other things too, such as calling
+        * pa_set_range_monitor() on the requested address range and performing a number
+        * of integrity checks on the PTEs. We should still
+        * call this function for all PPL-owned memory, regardless of whether
+        * permissions are required to be changed or not.
+        */
+       pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RO_PERM, XPRR_KERN_RO_PERM);
 
        monitor_start_pa = BootArgs->topOfKernelData + BOOTSTRAP_TABLE_SIZE;
        monitor_end_pa = avail_start;
@@ -4652,10 +4662,20 @@ pmap_static_allocations_done(void)
        /* The other bootstrap allocations are mapped RW at bootstrap. */
        pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_PPL_RW_PERM);
 
-       /* The RO page tables are mapped RW at bootstrap. */
+       /*
+        * The RO page tables are mapped RW at bootstrap and remain RW after the call
+        * to pa_set_range_xprr_perm(). We do this, as opposed to using XPRR_PPL_RW_PERM,
+        * to work around a functional issue on H11 devices where CTRR shifts the APRR
+        * lookup table index to USER_XO before APRR is applied, hence causing the hardware
+        * to believe we are dealing with an user XO page upon performing a translation.
+        *
+        * Note that this workaround does not pose a security risk, because the RO
+        * page tables still remain read-only, due to KTRR/CTRR, and further protecting
+        * them at the APRR level would be unnecessary.
+        */
        monitor_start_pa = kvtophys((vm_offset_t)&ropagetable_begin);
        monitor_end_pa = monitor_start_pa + ((vm_offset_t)&ropagetable_end - (vm_offset_t)&ropagetable_begin);
-       pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_PPL_RW_PERM);
+       pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_KERN_RW_PERM);
 
        monitor_start_pa = kvtophys(segPPLDATAB);
        monitor_end_pa = monitor_start_pa + segSizePPLDATA;
@@ -4701,14 +4721,14 @@ pmap_static_allocations_done(void)
                monitor_start_pa = kvtophys(segPPLDATACONSTB);
                monitor_end_pa = monitor_start_pa + segSizePPLDATACONST;
 
-               pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RO_PERM, XPRR_PPL_RO_PERM);
+               pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RO_PERM, XPRR_KERN_RO_PERM);
        }
 
        /*
         * Mark the original physical aperture mapping for the PPL stack pages RO as an additional security
         * precaution.  The real RW mappings are at a different location with guard pages.
         */
-       pa_set_range_xprr_perm(pmap_stacks_start_pa, pmap_stacks_end_pa, XPRR_PPL_RW_PERM, XPRR_PPL_RO_PERM);
+       pa_set_range_xprr_perm(pmap_stacks_start_pa, pmap_stacks_end_pa, XPRR_PPL_RW_PERM, XPRR_KERN_RO_PERM);
 }
 
 
@@ -5150,6 +5170,11 @@ pmap_create_options_internal(
        if ((p = pmap_alloc_pmap()) == PMAP_NULL) {
                return PMAP_NULL;
        }
+
+       if (ledger) {
+               pmap_ledger_validate(ledger);
+               pmap_ledger_retain(ledger);
+       }
 #else
        /*
         *      Allocate a pmap struct from the pmap_zone.  Then allocate
@@ -5160,6 +5185,8 @@ pmap_create_options_internal(
        }
 #endif
 
+       p->ledger = ledger;
+
        if (flags & PMAP_CREATE_64BIT) {
                p->min = MACH_VM_MIN_ADDRESS;
                p->max = MACH_VM_MAX_ADDRESS;
@@ -5192,14 +5219,6 @@ pmap_create_options_internal(
        }
 
 
-#if XNU_MONITOR
-       if (ledger) {
-               pmap_ledger_validate(ledger);
-               pmap_ledger_retain(ledger);
-       }
-#endif /* XNU_MONITOR */
-
-       p->ledger = ledger;
 
        PMAP_LOCK_INIT(p);
        memset((void *) &p->stats, 0, sizeof(p->stats));
@@ -7294,13 +7313,14 @@ pmap_protect_options_internal(
                        pte_set_was_writeable(tmplate, false);
 
 #if __APRR_SUPPORTED__
-                       if (__improbable(is_pte_xprr_protected(spte) && (pte_to_xprr_perm(spte) != XPRR_USER_JIT_PERM))) {
+                       if (__improbable(is_pte_xprr_protected(spte) && (pte_to_xprr_perm(spte) != XPRR_USER_JIT_PERM)
+                           && (pte_to_xprr_perm(spte) != XPRR_USER_XO_PERM))) {
                                /* Only test for PPL protection here,  User-JIT mappings may be mutated by this function. */
                                panic("%s: modifying a PPL mapping pte_p=%p pmap=%p prot=%d options=%u, pte=0x%llx, tmplate=0x%llx",
                                    __func__, pte_p, pmap, prot, options, (uint64_t)spte, (uint64_t)tmplate);
                        }
 
-                       if (__improbable(is_pte_xprr_protected(tmplate))) {
+                       if (__improbable(is_pte_xprr_protected(tmplate) && (pte_to_xprr_perm(tmplate) != XPRR_USER_XO_PERM))) {
                                panic("%s: creating an xPRR mapping pte_p=%p pmap=%p prot=%d options=%u, pte=0x%llx, tmplate=0x%llx",
                                    __func__, pte_p, pmap, prot, options, (uint64_t)spte, (uint64_t)tmplate);
                        }
@@ -8087,12 +8107,11 @@ Pmap_enter_loop:
 #if XNU_MONITOR
                if (!pmap_ppl_disable && (wimg_bits & PP_ATTR_MONITOR)) {
                        uint64_t xprr_perm = pte_to_xprr_perm(pte);
-                       pte &= ~ARM_PTE_XPRR_MASK;
                        switch (xprr_perm) {
                        case XPRR_KERN_RO_PERM:
-                               pte |= xprr_perm_to_pte(XPRR_PPL_RO_PERM);
                                break;
                        case XPRR_KERN_RW_PERM:
+                               pte &= ~ARM_PTE_XPRR_MASK;
                                pte |= xprr_perm_to_pte(XPRR_PPL_RW_PERM);
                                break;
                        default:
index 48e0879b1cdc352efcb6440a37fb75e87cd52f6b..c9b1893a1f8d63c4b922fe76ab0e7dd7e2b7bb22 100644 (file)
  */
 .macro set_thread_registers
        msr             TPIDR_EL1, $0                                           // Write new thread pointer to TPIDR_EL1
+       ldr             $1, [$0, ACT_CPUDATAP]
+       str             $0, [$1, CPU_ACTIVE_THREAD]
        ldr             $1, [$0, TH_CTH_SELF]                           // Get cthread pointer
        mrs             $2, TPIDRRO_EL0                                         // Extract cpu number from TPIDRRO_EL0
        and             $2, $2, #(MACHDEP_CPUNUM_MASK)
index 3a5a4d444a95260aa1900a2cf29d1824dfa5f088..f19b8696bb797a40baee01a2d9faba9d2d6bc521 100644 (file)
@@ -62,21 +62,6 @@ void kpc_pmi_handler(unsigned int ctr);
 #define PMCR0_PMC_ENABLE_MASK(PMC)  (UINT64_C(0x1) << PMCR_PMC_SHIFT(PMC))
 #define PMCR0_PMC_DISABLE_MASK(PMC) (~PMCR0_PMC_ENABLE_MASK(PMC))
 
-/* how interrupts are generated on PMIs */
-#define PMCR0_INTGEN_SHIFT   (8)
-#define PMCR0_INTGEN_MASK    (UINT64_C(0x7) << PMCR0_INTGEN_SHIFT)
-#define PMCR0_INTGEN_OFF     (UINT64_C(0) << PMCR0_INTGEN_SHIFT)
-#define PMCR0_INTGEN_PMI     (UINT64_C(1) << PMCR0_INTGEN_SHIFT)
-#define PMCR0_INTGEN_AIC     (UINT64_C(2) << PMCR0_INTGEN_SHIFT)
-#define PMCR0_INTGEN_DBG_HLT (UINT64_C(3) << PMCR0_INTGEN_SHIFT)
-#define PMCR0_INTGEN_FIQ     (UINT64_C(4) << PMCR0_INTGEN_SHIFT)
-
-/* 10 unused */
-
-/* set by hardware if PMI was generated */
-#define PMCR0_PMAI_SHIFT (11)
-#define PMCR0_PMAI_MASK  (UINT64_C(1) << PMCR0_PMAI_SHIFT)
-
 /* overflow on a PMC generates an interrupt */
 #define PMCR0_PMI_OFFSET            (12)
 #define PMCR0_PMI_SHIFT(PMC)        (PMCR0_PMI_OFFSET + PMCR_PMC_SHIFT(PMC))
@@ -360,33 +345,22 @@ dump_regs(void)
 static boolean_t
 enable_counter(uint32_t counter)
 {
-       int cpuid = cpu_number();
-       uint64_t pmcr0 = 0, intgen_type;
-       boolean_t counter_running, pmi_enabled, intgen_correct, enabled;
+       uint64_t pmcr0 = 0;
+       boolean_t counter_running, pmi_enabled, enabled;
 
        pmcr0 = SREG_READ(SREG_PMCR0) | 0x3 /* leave the fixed counters enabled for monotonic */;
 
        counter_running = (pmcr0 & PMCR0_PMC_ENABLE_MASK(counter)) != 0;
        pmi_enabled = (pmcr0 & PMCR0_PMI_ENABLE_MASK(counter)) != 0;
 
-       /* TODO this should use the PMI path rather than AIC for the interrupt
-        *      as it is faster
-        */
-       intgen_type = PMCR0_INTGEN_AIC;
-       intgen_correct = (pmcr0 & PMCR0_INTGEN_MASK) == intgen_type;
-
-       enabled = counter_running && pmi_enabled && intgen_correct;
+       enabled = counter_running && pmi_enabled;
 
        if (!enabled) {
                pmcr0 |= PMCR0_PMC_ENABLE_MASK(counter);
                pmcr0 |= PMCR0_PMI_ENABLE_MASK(counter);
-               pmcr0 &= ~PMCR0_INTGEN_MASK;
-               pmcr0 |= intgen_type;
-
                SREG_WRITE(SREG_PMCR0, pmcr0);
        }
 
-       saved_PMCR[cpuid][0] = pmcr0;
        return enabled;
 }
 
@@ -395,7 +369,6 @@ disable_counter(uint32_t counter)
 {
        uint64_t pmcr0;
        boolean_t enabled;
-       int cpuid = cpu_number();
 
        if (counter < 2) {
                return true;
@@ -409,7 +382,6 @@ disable_counter(uint32_t counter)
                SREG_WRITE(SREG_PMCR0, pmcr0);
        }
 
-       saved_PMCR[cpuid][0] = pmcr0;
        return enabled;
 }
 
@@ -547,9 +519,6 @@ save_regs(void)
 
        assert(ml_get_interrupts_enabled() == FALSE);
 
-       /* Save current PMCR0/1 values. PMCR2-4 are in the RAWPMU set. */
-       saved_PMCR[cpuid][0] = SREG_READ(SREG_PMCR0) | 0x3;
-
        /* Save event selections. */
        saved_PMESR[cpuid][0] = SREG_READ(SREG_PMESR0);
        saved_PMESR[cpuid][1] = SREG_READ(SREG_PMESR1);
@@ -583,7 +552,6 @@ restore_regs(void)
 
        /* Restore PMCR0/1 values (with PMCR0 last to enable). */
        SREG_WRITE(SREG_PMCR1, saved_PMCR[cpuid][1] | 0x30303);
-       SREG_WRITE(SREG_PMCR0, saved_PMCR[cpuid][0] | 0x3);
 }
 
 static uint64_t
index 660a59f1b9378d8ae4514eb22e85461202148d1b..875ddb7a54677a7791f17bc4219f4dae2715ac8d 100644 (file)
@@ -725,7 +725,7 @@ check_ktrr_sctlr_trap:
        add             sp, sp, ARM_CONTEXT_SIZE        // Clean up stack
        b.ne    Lel1_sp1_synchronous_vector_continue
        msr             ELR_EL1, lr                                     // Return to caller
-       eret
+       ERET_CONTEXT_SYNCHRONIZING
 #endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
 
 /* 64-bit first level exception handler dispatcher.
@@ -1170,8 +1170,8 @@ Lexception_return_restore_registers:
        and             x1, x4, BA_BOOT_FLAGS_DISABLE_USER_JOP
        cbnz    x1, Ldisable_jop // if global user JOP disabled, always turn off JOP regardless of thread flag (kernel running with JOP on)
        mrs             x2, TPIDR_EL1
-       ldr             x2, [x2, TH_DISABLE_USER_JOP]
-       cbz             x2, Lskip_disable_jop // if thread has JOP enabled, leave it on (kernel running with JOP on)
+       ldr             w2, [x2, TH_DISABLE_USER_JOP]
+       cbz             w2, Lskip_disable_jop // if thread has JOP enabled, leave it on (kernel running with JOP on)
 Ldisable_jop:
        MOV64   x1, SCTLR_JOP_KEYS_ENABLED
        mrs             x4, SCTLR_EL1
@@ -1257,7 +1257,7 @@ Lskip_disable_jop:
 Lskip_ttbr1_switch:
 #endif /* __ARM_KERNEL_PROTECT__ */
 
-       eret
+       ERET_CONTEXT_SYNCHRONIZING
 
 user_take_ast:
        PUSH_FRAME
index 037f34c135fad0fccb42360b36a737a20397e199..b1901d145f284b51d865d3d70c9cd6eaffc86ac2 100644 (file)
@@ -79,6 +79,9 @@ uint32_t LockTimeOut;
 uint32_t LockTimeOutUsec;
 uint64_t TLockTimeOut;
 uint64_t MutexSpin;
+uint64_t low_MutexSpin;
+int64_t high_MutexSpin;
+
 boolean_t is_clock_configured = FALSE;
 
 uint32_t yield_delay_us = 0; /* Must be less than cpu_idle_latency to ensure ml_delay_should_spin is true */
@@ -115,7 +118,7 @@ lockdown_handler_t lockdown_handler;
 void *lockdown_this;
 lck_mtx_t lockdown_handler_lck;
 lck_grp_t *lockdown_handler_grp;
-int lockdown_done;
+uint32_t lockdown_done;
 
 void ml_lockdown_init(void);
 void ml_lockdown_run_handler(void);
@@ -841,6 +844,15 @@ ml_init_lock_timeout(void)
                nanoseconds_to_absolutetime(10 * NSEC_PER_USEC, &abstime);
        }
        MutexSpin = abstime;
+       low_MutexSpin = MutexSpin;
+       /*
+        * high_MutexSpin should be initialized as low_MutexSpin * real_ncpus, but
+        * real_ncpus is not set at this time
+        *
+        * NOTE: active spinning is disabled in arm. It can be activated
+        * by setting high_MutexSpin through the sysctl.
+        */
+       high_MutexSpin = low_MutexSpin;
 }
 
 /*
index 51361f693583131d2a7d10c93841d2fa943418cd..963af5e567030f22b743bf534406151ef97fead8 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Apple Inc. All rights reserved.
+ * Copyright (c) 2017-2020 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -31,6 +31,7 @@
 #include <arm64/monotonic.h>
 #include <kern/assert.h>
 #include <kern/debug.h> /* panic */
+#include <kern/kpc.h>
 #include <kern/monotonic.h>
 #include <machine/atomic.h>
 #include <machine/limits.h> /* CHAR_BIT */
@@ -84,8 +85,19 @@ bool mt_core_supported = true;
 #define PMC5 "s3_2_c15_c5_0"
 #define PMC6 "s3_2_c15_c6_0"
 #define PMC7 "s3_2_c15_c7_0"
+
+#define PMC_0_7(X, A) X(0, A); X(1, A); X(2, A); X(3, A); X(4, A); X(5, A); \
+    X(6, A); X(7, A)
+
+#if CORE_NCTRS > 8
 #define PMC8 "s3_2_c15_c9_0"
 #define PMC9 "s3_2_c15_c10_0"
+#define PMC_8_9(X, A) X(8, A); X(9, A)
+#else // CORE_NCTRS > 8
+#define PMC_8_9(X, A)
+#endif // CORE_NCTRS > 8
+
+#define PMC_ALL(X, A) PMC_0_7(X, A); PMC_8_9(X, A)
 
 #define CTR_MAX ((UINT64_C(1) << 47) - 1)
 
@@ -125,7 +137,7 @@ enum {
        PMCR0_INTGEN_HALT = 3,
        PMCR0_INTGEN_FIQ = 4,
 };
-#define PMCR0_INTGEN_SET(INT) ((uint64_t)(INT) << 8)
+#define PMCR0_INTGEN_SET(X) ((uint64_t)(X) << 8)
 
 #if CPMU_AIC_PMI
 #define PMCR0_INTGEN_INIT PMCR0_INTGEN_SET(PMCR0_INTGEN_AIC)
@@ -133,7 +145,9 @@ enum {
 #define PMCR0_INTGEN_INIT PMCR0_INTGEN_SET(PMCR0_INTGEN_FIQ)
 #endif /* !CPMU_AIC_PMI */
 
-#define PMCR0_PMI_EN(CTR) (UINT64_C(1) << (12 + CTR_POS(CTR)))
+#define PMCR0_PMI_SHIFT (12)
+#define PMCR0_CTR_GE8_PMI_SHIFT (44)
+#define PMCR0_PMI_EN(CTR) (UINT64_C(1) << (PMCR0_PMI_SHIFT + CTR_POS(CTR)))
 /* fixed counters are always counting */
 #define PMCR0_PMI_INIT (PMCR0_PMI_EN(CYCLES) | PMCR0_PMI_EN(INSTRS))
 /* disable counting on a PMI */
@@ -144,8 +158,9 @@ enum {
 #define PMCR0_L2CGLOBAL_EN (UINT64_C(1) << 23)
 /* user mode access to configuration registers */
 #define PMCR0_USEREN_EN (UINT64_C(1) << 30)
+#define PMCR0_CTR_GE8_EN_SHIFT (32)
 
-#define PMCR0_INIT (PMCR0_INTGEN_INIT | PMCR0_PMI_INIT | PMCR0_DISCNT_EN)
+#define PMCR0_INIT (PMCR0_INTGEN_INIT | PMCR0_PMI_INIT)
 
 /*
  * PMCR1 controls which execution modes count events.
@@ -194,6 +209,9 @@ core_init_execution_modes(void)
 
 #define PMSR_OVF(CTR) (1ULL << (CTR))
 
+#define PMESR0 "S3_1_c15_c5_0"
+#define PMESR1 "S3_1_c15_c6_0"
+
 static int
 core_init(__unused mt_device_t dev)
 {
@@ -211,10 +229,9 @@ uint64_t
 mt_core_snap(unsigned int ctr)
 {
        switch (ctr) {
-       case 0:
-               return __builtin_arm_rsr64(PMC0);
-       case 1:
-               return __builtin_arm_rsr64(PMC1);
+#define PMC_RD(CTR, UNUSED) case (CTR): return __builtin_arm_rsr64(PMC ## CTR)
+               PMC_ALL(PMC_RD, 0);
+#undef PMC_RD
        default:
                panic("monotonic: invalid core counter read: %u", ctr);
                __builtin_unreachable();
@@ -242,16 +259,29 @@ core_set_enabled(void)
 {
        uint64_t pmcr0 = __builtin_arm_rsr64(PMCR0);
        pmcr0 |= PMCR0_INIT | PMCR0_FIXED_EN;
-       pmcr0 &= ~PMCR0_PMAI;
+
+       if (kpc_get_running() & KPC_CLASS_CONFIGURABLE_MASK) {
+               uint64_t kpc_ctrs = kpc_get_configurable_pmc_mask(
+                       KPC_CLASS_CONFIGURABLE_MASK) << MT_CORE_NFIXED;
+#if KPC_ARM64_CONFIGURABLE_COUNT > 6
+               uint64_t ctrs_ge8 = kpc_ctrs >> 8;
+               pmcr0 |= ctrs_ge8 << PMCR0_CTR_GE8_EN_SHIFT;
+               pmcr0 |= ctrs_ge8 << PMCR0_CTR_GE8_PMI_SHIFT;
+               kpc_ctrs &= (1ULL << 8) - 1;
+#endif /* KPC_ARM64_CONFIGURABLE_COUNT > 6 */
+               kpc_ctrs |= kpc_ctrs << PMCR0_PMI_SHIFT;
+               pmcr0 |= kpc_ctrs;
+       }
+
        __builtin_arm_wsr64(PMCR0, pmcr0);
 #if MACH_ASSERT
        /*
         * Only check for the values that were ORed in.
         */
        uint64_t pmcr0_check = __builtin_arm_rsr64(PMCR0);
-       if (!(pmcr0_check & (PMCR0_INIT | PMCR0_FIXED_EN))) {
-               panic("monotonic: hardware ignored enable (read %llx)",
-                   pmcr0_check);
+       if ((pmcr0_check & (PMCR0_INIT | PMCR0_FIXED_EN)) != (PMCR0_INIT | PMCR0_FIXED_EN)) {
+               panic("monotonic: hardware ignored enable (read %llx, wrote %llx)",
+                   pmcr0_check, pmcr0);
        }
 #endif /* MACH_ASSERT */
 }
@@ -1293,6 +1323,13 @@ mt_cpu_pmi(cpu_data_t *cpu, uint64_t pmcr0)
        assert(cpu != NULL);
        assert(ml_get_interrupts_enabled() == FALSE);
 
+       __builtin_arm_wsr64(PMCR0, PMCR0_INIT);
+       /*
+        * Ensure the CPMU has flushed any increments at this point, so PMSR is up
+        * to date.
+        */
+       __builtin_arm_isb(ISB_SY);
+
        cpu->cpu_monotonic.mtc_npmis += 1;
        cpu->cpu_stat.pmi_cnt_wake += 1;
 
@@ -1308,10 +1345,14 @@ mt_cpu_pmi(cpu_data_t *cpu, uint64_t pmcr0)
        uint64_t pmsr = __builtin_arm_rsr64(PMSR);
 
 #if MONOTONIC_DEBUG
-       kprintf("monotonic: cpu = %d, PMSR = 0x%llx, PMCR0 = 0x%llx",
+       printf("monotonic: cpu = %d, PMSR = 0x%llx, PMCR0 = 0x%llx\n",
            cpu_number(), pmsr, pmcr0);
 #endif /* MONOTONIC_DEBUG */
 
+#if MACH_ASSERT
+       uint64_t handled = 0;
+#endif /* MACH_ASSERT */
+
        /*
         * monotonic handles any fixed counter PMIs.
         */
@@ -1320,6 +1361,9 @@ mt_cpu_pmi(cpu_data_t *cpu, uint64_t pmcr0)
                        continue;
                }
 
+#if MACH_ASSERT
+               handled |= 1ULL << i;
+#endif /* MACH_ASSERT */
                uint64_t count = mt_cpu_update_count(cpu, i);
                cpu->cpu_monotonic.mtc_counts[i] += count;
                mt_core_set_snap(i, mt_core_reset_values[i]);
@@ -1334,6 +1378,9 @@ mt_cpu_pmi(cpu_data_t *cpu, uint64_t pmcr0)
                        KDBG_RELEASE(KDBG_EVENTID(DBG_MONOTONIC, DBG_MT_DEBUG, 1),
                            mt_microstackshot_ctr, user_mode);
                        mt_microstackshot_pmi_handler(user_mode, mt_microstackshot_ctx);
+               } else if (mt_debug) {
+                       KDBG_RELEASE(KDBG_EVENTID(DBG_MONOTONIC, DBG_MT_DEBUG, 2),
+                           i, count);
                }
        }
 
@@ -1342,14 +1389,31 @@ mt_cpu_pmi(cpu_data_t *cpu, uint64_t pmcr0)
         */
        for (unsigned int i = MT_CORE_NFIXED; i < CORE_NCTRS; i++) {
                if (pmsr & PMSR_OVF(i)) {
+#if MACH_ASSERT
+                       handled |= 1ULL << i;
+#endif /* MACH_ASSERT */
                        extern void kpc_pmi_handler(unsigned int ctr);
                        kpc_pmi_handler(i);
                }
        }
 
 #if MACH_ASSERT
-       pmsr = __builtin_arm_rsr64(PMSR);
-       assert(pmsr == 0);
+       uint64_t pmsr_after_handling = __builtin_arm_rsr64(PMSR);
+       if (pmsr_after_handling != 0) {
+               unsigned int first_ctr_ovf = __builtin_ffsll(pmsr_after_handling) - 1;
+               uint64_t count = 0;
+               const char *extra = "";
+               if (first_ctr_ovf >= CORE_NCTRS) {
+                       extra = " (invalid counter)";
+               } else {
+                       count = mt_core_snap(first_ctr_ovf);
+               }
+
+               panic("monotonic: PMI status not cleared on exit from handler, "
+                   "PMSR = 0x%llx HANDLE -> -> 0x%llx, handled 0x%llx, "
+                   "PMCR0 = 0x%llx, PMC%d = 0x%llx%s", pmsr, pmsr_after_handling,
+                   handled, __builtin_arm_rsr64(PMCR0), first_ctr_ovf, count, extra);
+       }
 #endif /* MACH_ASSERT */
 
        core_set_enabled();
index 3bf15f95afad2aa6781d9c84b2a1402411be896d..ff4efbfdddd123c1e15edb22f63ab43983539e57 100644 (file)
@@ -271,6 +271,15 @@ machine_thread_init(void)
 
 }
 
+/*
+ * Routine:    machine_thread_template_init
+ *
+ */
+void
+machine_thread_template_init(thread_t __unused thr_template)
+{
+       /* Nothing to do on this platform. */
+}
 
 /*
  * Routine: get_useraddr
index f4d967d1461bf13bb0575c9bd709b28fe43bd538..69533e29e132d9c83b7b45ab2cfca0c970c802a1 100644 (file)
@@ -1641,7 +1641,7 @@ typedef enum {
 #define XPRR_KERN0_RW_PERM (6ULL)
 #define XPRR_USER_RW_PERM  (7ULL)
 #define XPRR_PPL_RX_PERM   (8ULL)
-#define XPRR_PPL_RO_PERM   (9ULL)
+#define XPRR_USER_XO_PERM  (9ULL)
 #define XPRR_KERN_RX_PERM  (10ULL)
 #define XPRR_KERN_RO_PERM  (11ULL)
 #define XPRR_KERN0_RX_PERM (12ULL)
@@ -1668,7 +1668,7 @@ typedef enum {
 #define APRR_USER_RW_INDEX  (7ULL)  /* AP_RWRW, PXN, XN */
 #define APRR_PPL_RX_INDEX   (8ULL)  /* AP_RONA, PX, X */
 #define APRR_KERN_RX_INDEX  (9ULL)  /* AP_RONA, PX, XN */
-#define APRR_PPL_RO_INDEX   (10ULL) /* AP_RONA, PXN, X */
+#define APRR_USER_XO_INDEX  (10ULL) /* AP_RONA, PXN, X */
 #define APRR_KERN_RO_INDEX  (11ULL) /* AP_RONA, PXN, XN */
 #define APRR_KERN0_RX_INDEX (12ULL) /* AP_RORO, PX, X */
 #define APRR_KERN0_RO_INDEX (13ULL) /* AP_RORO, PX, XN */
@@ -1693,7 +1693,7 @@ typedef enum {
 #define APRR_USER_RW_SHIFT  (28ULL) /* AP_RWRW, PXN, XN */
 #define APRR_PPL_RX_SHIFT   (32ULL) /* AP_RONA, PX, X */
 #define APRR_KERN_RX_SHIFT  (36ULL) /* AP_RONA, PX, XN */
-#define APRR_PPL_RO_SHIFT   (40ULL) /* AP_RONA, PXN, X */
+#define APRR_USER_XO_SHIFT  (40ULL) /* AP_RONA, PXN, X */
 #define APRR_KERN_RO_SHIFT  (44ULL) /* AP_RONA, PXN, XN */
 #define APRR_KERN0_RX_SHIFT (48ULL) /* AP_RORO, PX, X */
 #define APRR_KERN0_RO_SHIFT (52ULL) /* AP_RORO, PX, XN */
@@ -1731,20 +1731,25 @@ typedef enum {
 #define APRR_EL1_RESET \
        APRR_EL1_UNRESTRICTED
 
+/*
+ * XO mappings bypass PAN protection (rdar://58360875)
+ * Revoke ALL kernel access permissions for XO mappings.
+ */
 #define APRR_EL1_BASE \
-       APRR_EL1_UNRESTRICTED
+       (APRR_EL1_UNRESTRICTED & \
+       APRR_REMOVE(APRR_ATTR_R << APRR_USER_XO_SHIFT))
 
 #if XNU_MONITOR
 #define APRR_EL1_DEFAULT \
        (APRR_EL1_BASE & \
         (APRR_REMOVE((APRR_ATTR_WX << APRR_PPL_RW_SHIFT) | \
-        (APRR_ATTR_WX << APRR_PPL_RO_SHIFT) | \
+        (APRR_ATTR_WX << APRR_USER_XO_SHIFT) | \
         (APRR_ATTR_WX << APRR_PPL_RX_SHIFT))))
 
 #define APRR_EL1_PPL \
        (APRR_EL1_BASE & \
         (APRR_REMOVE((APRR_ATTR_X << APRR_PPL_RW_SHIFT) | \
-        (APRR_ATTR_WX << APRR_PPL_RO_SHIFT) | \
+        (APRR_ATTR_WX << APRR_USER_XO_SHIFT) | \
         (APRR_ATTR_W << APRR_PPL_RX_SHIFT))))
 #else
 #define APRR_EL1_DEFAULT \
@@ -1761,7 +1766,7 @@ typedef enum {
        (APRR_EL0_UNRESTRICTED & \
         (APRR_REMOVE((APRR_ATTR_RWX << APRR_PPL_RW_SHIFT) | \
         (APRR_ATTR_RWX << APRR_PPL_RX_SHIFT) | \
-        (APRR_ATTR_RWX << APRR_PPL_RO_SHIFT))))
+        (APRR_ATTR_RWX << APRR_USER_XO_SHIFT))))
 #else
 #define APRR_EL0_BASE \
        APRR_EL0_UNRESTRICTED
@@ -1910,6 +1915,26 @@ cmp  $0, $1
 b.mi $2                         // Unsigned "strictly less than"
 .endmacro
 
+/*
+ * Macro intended to be used as a replacement for ERET.
+ * It prevents speculation past ERET instructions by padding
+ * up to the decoder width.
+ */
+.macro ERET_CONTEXT_SYNCHRONIZING
+eret
+#if __ARM_SB_AVAILABLE__
+sb                              // Technically unnecessary on Apple micro-architectures, may restrict mis-speculation on other architectures
+#else /* __ARM_SB_AVAILABLE__ */
+isb                             // ISB technically unnecessary on Apple micro-architectures, may restrict mis-speculation on other architectures
+nop                             // Sequence of six NOPs to pad out and terminate instruction decode group */
+nop
+nop
+nop
+nop
+nop
+#endif /* !__ARM_SB_AVAILABLE__ */
+.endmacro
+
 #endif /* __ASSEMBLER__ */
 
 #define MSR(reg, src)  __asm__ volatile ("msr " reg ", %0" :: "r" (src))
index a5d29d6c6a3d0a261077ebb11f24de83002194da..00cba8194dbe5ea232d7f410fda263d1d70eb17e 100644 (file)
@@ -195,8 +195,8 @@ LEXT(reset_vector)
        /* spin until bootstrap core has completed machine lockdown */
        adrp    x17, EXT(lockdown_done)@page
 1:
-       ldr     x18, [x17, EXT(lockdown_done)@pageoff]
-       cbz     x18, 1b
+       ldr     w18, [x17, EXT(lockdown_done)@pageoff]
+       cbz     w18, 1b
 
        // load stashed rorgn_begin
        adrp    x17, EXT(rorgn_begin)@page
@@ -254,8 +254,8 @@ Lfound_cpu_data_entry:
        /* spin until bootstrap core has completed machine lockdown */
        adrp    x17, EXT(lockdown_done)@page
 1:
-       ldr     x18, [x17, EXT(lockdown_done)@pageoff]
-       cbz     x18, 1b
+       ldr     w18, [x17, EXT(lockdown_done)@pageoff]
+       cbz     w18, 1b
 
        // load stashed rorgn_begin
        adrp    x17, EXT(rorgn_begin)@page
index a281a029aea4f0b56d17b95bd66ad551189f5587..5b4ba01d19c33f8c7116ce065c604a11570dacba 100644 (file)
@@ -424,11 +424,12 @@ bank_get_value(
                                panic("Bogus bank type: %d passed in get_value\n", bank_element->be_type);
                        }
 
-                       /* Change the persona-id to holder task's persona-id if the task is not spawned in system persona */
+                       /* Do not replace persona id if the task is not spawned in system persona */
                        if (unique_persona &&
                            bank_merchant->bt_persona_id != persona_get_id(system_persona) &&
-                           bank_merchant->bt_persona_id != persona_get_id(proxy_system_persona)) {
-                               persona_id = bank_merchant->bt_persona_id;
+                           bank_merchant->bt_persona_id != persona_get_id(proxy_system_persona) &&
+                           bank_merchant->bt_persona_id != persona_id) {
+                               return KERN_INVALID_ARGUMENT;
                        }
 
                        if (bank_holder->bt_persona_id == persona_id) {
index 31d2cbbbd9494f9ec667c31d8e296348c156c500..4d3aec134b2fe18c835f78addba232c5cceaa314 100644 (file)
@@ -201,6 +201,7 @@ osfmk/kern/hibernate.c              optional hibernation
 osfmk/kern/remote_time.c        standard
 osfmk/kern/memset_s.c          standard
 osfmk/kern/copyout_shim.c      optional copyout_shim
+osfmk/kern/suid_cred.c         standard
 
 ./mach/clock_server.c                  standard
 ./mach/clock_priv_server.c             standard
index 91d1b477093536094a6342cbb16c76f342f435f6..2eb80ba77793f31185a447af06a6c46a6b04b34e 100644 (file)
@@ -70,6 +70,9 @@
 #include <mach/mach_types.h>
 #include <mach/message.h>
 #include <mach/port.h>
+#ifdef MACH_KERNEL_PRIVATE
+#include <mach_debug/mach_debug_types.h>
+#endif
 
 #if PRIVATE
 #define IOKIT_SERVER_VERSION    20190926
@@ -123,8 +126,10 @@ typedef struct IOObject * io_object_t;
 typedef io_object_t io_connect_t;
 typedef io_object_t uext_object_t;
 
+extern void iokit_add_reference( io_object_t obj, natural_t type );
 extern void iokit_remove_reference( io_object_t obj );
 extern void iokit_remove_connect_reference( io_object_t obj );
+extern void iokit_port_object_description(io_object_t obj, kobject_description_t desc);
 
 extern io_object_t iokit_lookup_object_port( ipc_port_t port );
 extern io_connect_t iokit_lookup_connect_port( ipc_port_t port );
index 15866381eeef43ee44971b7f7fef2f7db23aefce..c4c0bce85b3991982ba4aa207c6b0139b03b40b6 100644 (file)
@@ -86,7 +86,7 @@ iokit_lookup_io_object(ipc_port_t port, ipc_kobject_type_t type)
 
        iokit_lock_port(port);
        if (ip_active(port) && (ip_kotype(port) == type)) {
-               obj = (io_object_t) port->ip_kobject;
+               obj = (io_object_t) ip_get_kobject(port);
                iokit_add_reference( obj, type );
        } else {
                obj = NULL;
@@ -137,7 +137,7 @@ iokit_lookup_object_in_space_with_port_name(mach_port_name_t name, ipc_kobject_t
 
                        iokit_lock_port(port);
                        if (ip_kotype(port) == type) {
-                               obj = (io_object_t) port->ip_kobject;
+                               obj = (io_object_t) ip_get_kobject(port);
                                iokit_add_reference(obj, type);
                        }
                        iokit_unlock_port(port);
@@ -252,7 +252,12 @@ iokit_alloc_object_port( io_object_t obj, ipc_kobject_type_t type )
        if (type == IKOT_IOKIT_CONNECT) {
                options |= IPC_KOBJECT_ALLOC_IMMOVABLE_SEND;
        }
-       return ipc_kobject_alloc_port((ipc_kobject_t) obj, type, options);
+       if (type == IKOT_UEXT_OBJECT) {
+               ipc_label_t label = IPC_LABEL_DEXT;
+               return ipc_kobject_alloc_labeled_port((ipc_kobject_t) obj, type, label, options);
+       } else {
+               return ipc_kobject_alloc_port((ipc_kobject_t) obj, type, options);
+       }
 }
 
 EXTERN kern_return_t
@@ -345,7 +350,7 @@ iokit_no_senders( mach_no_senders_notification_t * notification )
        if (IP_VALID(port)) {
                iokit_lock_port(port);
                if (ip_active(port)) {
-                       obj = (io_object_t) port->ip_kobject;
+                       obj = (io_object_t) ip_get_kobject(port);
                        type = ip_kotype( port );
                        if ((IKOT_IOKIT_OBJECT == type)
                            || (IKOT_IOKIT_CONNECT == type)
@@ -400,6 +405,12 @@ iokit_notify( mach_msg_header_t * msg )
        }
 }
 
+kern_return_t
+iokit_label_dext_task(task_t task)
+{
+       return ipc_space_add_label(task->itk_space, IPC_LABEL_DEXT);
+}
+
 /* need to create a pmap function to generalize */
 unsigned int
 IODefaultCacheBits(addr64_t pa)
index 5aae6aaeedaa76365def0a6e705f2ad7b9c01cd2..267b6259e52fbf9e0bf97494f7cedf666caed92d 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
 #include <libkern/crc.h>
 
 #if     DEBUG || DEVELOPMENT
-#define DPRINTF(x...)   kprintf(x)
+#define DPRINTF(x ...)   kprintf(x)
 #else
-#define DPRINTF(x...)
+#define DPRINTF(x ...)
 #endif
 
 #ifndef ROUNDUP
@@ -411,7 +411,7 @@ efi_set_tables_64(EFI_SYSTEM_TABLE_64 * system_table)
                }
 
                gPEEFIRuntimeServices = runtime;
-       }while (FALSE);
+       } while (FALSE);
 }
 
 static void
@@ -489,7 +489,7 @@ efi_set_tables_32(EFI_SYSTEM_TABLE_32 * system_table)
                DPRINTF("  ResetSystem              : 0x%x\n", runtime->ResetSystem);
 
                gPEEFIRuntimeServices = runtime;
-       }while (FALSE);
+       } while (FALSE);
 }
 
 
@@ -501,7 +501,7 @@ efi_init(void)
 
        kprintf("Initializing EFI runtime services\n");
 
-       do{
+       do {
                vm_offset_t vm_size, vm_addr;
                vm_map_offset_t phys_addr;
                EfiMemoryRange *mptr;
@@ -554,7 +554,7 @@ efi_init(void)
                } else {
                        efi_set_tables_32((EFI_SYSTEM_TABLE_32 *) ml_static_ptovirt(args->efiSystemTable));
                }
-       }while (FALSE);
+       } while (FALSE);
 
        return;
 }
@@ -578,7 +578,7 @@ hibernate_newruntime_map(void * map, vm_size_t map_size, uint32_t system_table_o
 
        kprintf("Reinitializing EFI runtime services\n");
 
-       do{
+       do {
                vm_offset_t vm_size, vm_addr;
                vm_map_offset_t phys_addr;
                EfiMemoryRange *mptr;
@@ -647,7 +647,7 @@ hibernate_newruntime_map(void * map, vm_size_t map_size, uint32_t system_table_o
                } else {
                        efi_set_tables_32((EFI_SYSTEM_TABLE_32 *) ml_static_ptovirt(args->efiSystemTable));
                }
-       }while (FALSE);
+       } while (FALSE);
 
        kprintf("Done reinitializing EFI runtime services\n");
 
@@ -956,7 +956,7 @@ SavePanicInfo(
                /* Special handling of launchd died panics */
                print_launchd_info();
        } else {
-               panic_i386_backtrace(stackptr, ((panic_double_fault_cpu == cn) ? 80: 48), debugger_msg, FALSE, NULL);
+               panic_i386_backtrace(stackptr, ((panic_double_fault_cpu == cn) ? 80 : 48), debugger_msg, FALSE, NULL);
        }
 
        if (panic_options & DEBUGGER_OPTION_COPROC_INITIATED_PANIC) {
@@ -1248,6 +1248,11 @@ panic_i386_backtrace(void *_frame, int nframes, const char *msg, boolean_t regdu
        int cn = cpu_number();
        boolean_t old_doprnt_hide_pointers = doprnt_hide_pointers;
 
+#if DEVELOPMENT || DEBUG
+       /* Turn off I/O tracing now that we're panicking */
+       mmiotrace_enabled = 0;
+#endif
+
        if (pbtcpu != cn) {
                os_atomic_inc(&pbtcnt, relaxed);
                /* Spin on print backtrace lock, which serializes output
index 1f69f62f34b6b67df771512eb6cbe40673b175d9..9f0e07fe8194ddf85b7542bb3a6f2c048ed371cc 100644 (file)
@@ -201,18 +201,24 @@ typedef struct cpu_data {
        struct cpu_data         *cpu_this;              /* pointer to myself */
        thread_t                cpu_active_thread;
        thread_t                cpu_nthread;
-       volatile int            cpu_preemption_level;
        int                     cpu_number;             /* Logical CPU */
        void                    *cpu_int_state;         /* interrupt state */
        vm_offset_t             cpu_active_stack;       /* kernel stack base */
        vm_offset_t             cpu_kernel_stack;       /* kernel stack top */
        vm_offset_t             cpu_int_stack_top;
-       int                     cpu_interrupt_level;
        volatile int            cpu_signals;            /* IPI events */
        volatile int            cpu_prior_signals;      /* Last set of events,
                                                         * debugging
                                                         */
        ast_t                   cpu_pending_ast;
+       /*
+        * Note if rearranging fields:
+        * We want cpu_preemption_level on a different
+        * cache line than cpu_active_thread
+        * for optimizing mtx_spin phase.
+        */
+       int                     cpu_interrupt_level;
+       volatile int            cpu_preemption_level;
        volatile int            cpu_running;
 #if !MONOTONIC
        boolean_t               cpu_fixed_pmcs_enabled;
index 25a26de3bd1d298d1e5bef0efbb6f0de746edfed..ff6c8c1fc472cd8418a0ecd01d5dbe0b64edbb00 100644 (file)
@@ -872,9 +872,7 @@ cpuid_set_cpufamily(i386_cpu_info_t *info_p)
                        break;
                case CPUID_MODEL_SKYLAKE:
                case CPUID_MODEL_SKYLAKE_DT:
-#if !defined(RC_HIDE_XNU_J137)
                case CPUID_MODEL_SKYLAKE_W:
-#endif
                        cpufamily = CPUFAMILY_INTEL_SKYLAKE;
                        break;
                case CPUID_MODEL_KABYLAKE:
index a3a6ad6eeb9359f1096dcb8b82fa5300d6ec8ffa..146e77b1570f67b867313b19c7f56b36dda7cf49 100644 (file)
 #define CPUID_MODEL_SKYLAKE_ULT         0x4E
 #define CPUID_MODEL_SKYLAKE_ULX         0x4E
 #define CPUID_MODEL_SKYLAKE_DT          0x5E
-#if !defined(RC_HIDE_XNU_J137)
 #define CPUID_MODEL_SKYLAKE_W           0x55
 #define PLATID_XEON_SP_1                0x00
 #define PLATID_XEON_SP_2                0x07
 #define PLATID_MAYBE_XEON_SP            0x01
-#endif /* not RC_HIDE_XNU_J137 */
 #define CPUID_MODEL_KABYLAKE            0x8E
 #define CPUID_MODEL_KABYLAKE_ULT        0x8E
 #define CPUID_MODEL_KABYLAKE_ULX        0x8E
index b05c65b9af114eaf176d25d73bdb2d57ed31923a..1d747542959d6cbc12fdd4fbe4f2a2a42d7e0cdc 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2018 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -145,37 +145,26 @@ fxsave64(struct x86_fx_thread_state *a)
        __asm__ __volatile__ ("fxsave64 %0" : "=m" (*a));
 }
 
-#if !defined(RC_HIDE_XNU_J137)
 #define IS_VALID_XSTATE(x)      ((x) == FP || (x) == AVX || (x) == AVX512)
-#else
-#define IS_VALID_XSTATE(x)      ((x) == FP || (x) == AVX)
-#endif
 
 zone_t          ifps_zone[] = {
        [FP]     = NULL,
        [AVX]    = NULL,
-#if !defined(RC_HIDE_XNU_J137)
        [AVX512] = NULL
-#endif
 };
 static uint32_t fp_state_size[] = {
        [FP]     = sizeof(struct x86_fx_thread_state),
        [AVX]    = sizeof(struct x86_avx_thread_state),
-#if !defined(RC_HIDE_XNU_J137)
        [AVX512] = sizeof(struct x86_avx512_thread_state)
-#endif
 };
 
 static const char *xstate_name[] = {
        [UNDEFINED] = "UNDEFINED",
        [FP] = "FP",
        [AVX] = "AVX",
-#if !defined(RC_HIDE_XNU_J137)
        [AVX512] = "AVX512"
-#endif
 };
 
-#if !defined(RC_HIDE_XNU_J137)
 #define fpu_ZMM_capable (fpu_capability == AVX512)
 #define fpu_YMM_capable (fpu_capability == AVX || fpu_capability == AVX512)
 /*
@@ -205,26 +194,16 @@ static const char *xstate_name[] = {
  * Note the initial state value is an AVX512 object but that the AVX initial
  * value is a subset of it.
  */
-#else
-#define fpu_YMM_capable (fpu_capability == AVX)
-#endif
 static uint32_t cpuid_reevaluated = 0;
 
 static void fpu_store_registers(void *, boolean_t);
 static void fpu_load_registers(void *);
 
-#if !defined(RC_HIDE_XNU_J137)
 static const uint32_t xstate_xmask[] = {
        [FP] =          FP_XMASK,
        [AVX] =         AVX_XMASK,
        [AVX512] =      AVX512_XMASK
 };
-#else
-static const uint32_t xstate_xmask[] = {
-       [FP] =          FP_XMASK,
-       [AVX] =         AVX_XMASK,
-};
-#endif
 
 static inline void
 xsave(struct x86_fx_thread_state *a, uint32_t rfbm)
@@ -250,7 +229,6 @@ xrstor64(struct x86_fx_thread_state *a, uint32_t rfbm)
        __asm__ __volatile__ ("xrstor64 %0" ::  "m" (*a), "a"(rfbm), "d"(0));
 }
 
-#if !defined(RC_HIDE_XNU_J137)
 __unused static inline void
 vzeroupper(void)
 {
@@ -349,8 +327,6 @@ DBG_AVX512_STATE(__unused struct x86_avx512_thread_state *sp)
 }
 #endif /* DEBUG_AVX512 */
 
-#endif
-
 #if     DEBUG
 static inline unsigned short
 fnstsw(void)
@@ -389,13 +365,11 @@ configure_mxcsr_capability_mask(x86_ext_thread_state_t *fps)
        /* Clear vector register store */
        bzero(&fps->fx.fx_XMM_reg[0][0], sizeof(fps->fx.fx_XMM_reg));
        bzero(fps->avx.x_YMM_Hi128, sizeof(fps->avx.x_YMM_Hi128));
-#if !defined(RC_HIDE_XNU_J137)
        if (fpu_ZMM_capable) {
                bzero(fps->avx512.x_ZMM_Hi256, sizeof(fps->avx512.x_ZMM_Hi256));
                bzero(fps->avx512.x_Hi16_ZMM, sizeof(fps->avx512.x_Hi16_ZMM));
                bzero(fps->avx512.x_Opmask, sizeof(fps->avx512.x_Opmask));
        }
-#endif
 
        fps->fx.fp_valid = TRUE;
        fps->fx.fp_save_layout = fpu_YMM_capable ? XSAVE32: FXSAVE32;
@@ -458,7 +432,6 @@ init_fpu(void)
 
        PE_parse_boot_argn("fpsimd_fault_popc", &fpsimd_fault_popc, sizeof(fpsimd_fault_popc));
 
-#if !defined(RC_HIDE_XNU_J137)
        static boolean_t is_avx512_enabled = TRUE;
        if (cpu_number() == master_cpu) {
                if (cpuid_leaf7_features() & CPUID_LEAF7_FEATURE_AVX512F) {
@@ -467,14 +440,12 @@ init_fpu(void)
                            is_avx512_enabled ? "and enabled" : "but disabled");
                }
        }
-#endif
 
        /* Configure the XSAVE context mechanism if the processor supports
         * AVX/YMM registers
         */
        if (cpuid_features() & CPUID_FEATURE_XSAVE) {
                cpuid_xsave_leaf_t *xs0p = &cpuid_info()->cpuid_xsave_leaf[0];
-#if !defined(RC_HIDE_XNU_J137)
                if (is_avx512_enabled &&
                    (xs0p->extended_state[eax] & XFEM_ZMM) == XFEM_ZMM) {
                        assert(xs0p->extended_state[eax] & XFEM_SSE);
@@ -495,9 +466,7 @@ init_fpu(void)
                         */
                        xsetbv(0, AVX_XMASK);
                        fpu_default = AVX;
-               } else
-#endif
-               if (xs0p->extended_state[eax] & XFEM_YMM) {
+               } else if (xs0p->extended_state[eax] & XFEM_YMM) {
                        assert(xs0p->extended_state[eax] & XFEM_SSE);
                        fpu_capability = AVX;
                        fpu_default = AVX;
@@ -636,9 +605,7 @@ fpu_store_registers(void *fstate, boolean_t is64)
                }
                break;
        case AVX:
-#if !defined(RC_HIDE_XNU_J137)
        case AVX512:
-#endif
                if (is64) {
                        xsave64(ifps, xstate_xmask[xs]);
                        ifps->fp_save_layout = XSAVE64;
@@ -679,7 +646,6 @@ fpu_module_init(void)
         */
        zone_change(ifps_zone[fpu_default], Z_ALIGNMENT_REQUIRED, TRUE);
 
-#if !defined(RC_HIDE_XNU_J137)
        /*
         * If AVX512 is supported, create a separate savearea zone.
         * with allocation size: 19 pages = 32 * 2668
@@ -691,7 +657,6 @@ fpu_module_init(void)
                    "x86 avx512 save state");
                zone_change(ifps_zone[AVX512], Z_ALIGNMENT_REQUIRED, TRUE);
        }
-#endif
 
        /* Determine MXCSR reserved bits and configure initial FPU state*/
        configure_mxcsr_capability_mask(&initial_fp_state);
@@ -784,6 +749,7 @@ fpu_set_fxstate(
        x86_float_state64_t             *state;
        pcb_t                           pcb;
        boolean_t                       old_valid, fresh_state = FALSE;
+       xstate_t                        thr_xstate;
 
        if (fpu_capability == UNDEFINED) {
                return KERN_FAILURE;
@@ -794,18 +760,22 @@ fpu_set_fxstate(
                return KERN_FAILURE;
        }
 
-#if !defined(RC_HIDE_XNU_J137)
+       assert(thr_act != THREAD_NULL);
+
+       thr_xstate = thread_xstate(thr_act);
+
        if ((f == x86_AVX512_STATE32 || f == x86_AVX512_STATE64) &&
-           thread_xstate(thr_act) == AVX) {
+           thr_xstate == AVX) {
                if (!fpu_thread_promote_avx512(thr_act)) {
                        return KERN_FAILURE;
+               } else {
+                       /* Reload thr_xstate after successful promotion */
+                       thr_xstate = thread_xstate(thr_act);
                }
        }
-#endif
 
        state = (x86_float_state64_t *)tstate;
 
-       assert(thr_act != THREAD_NULL);
        pcb = THREAD_TO_PCB(thr_act);
 
        if (state == NULL) {
@@ -821,7 +791,7 @@ fpu_set_fxstate(
                simple_unlock(&pcb->lock);
 
                if (ifps != 0) {
-                       fp_state_free(ifps, thread_xstate(thr_act));
+                       fp_state_free(ifps, thr_xstate);
                }
        } else {
                /*
@@ -835,13 +805,13 @@ Retry:
                if (ifps == 0) {
                        if (new_ifps == 0) {
                                simple_unlock(&pcb->lock);
-                               new_ifps = fp_state_alloc(thread_xstate(thr_act));
+                               new_ifps = fp_state_alloc(thr_xstate);
                                goto Retry;
                        }
                        ifps = new_ifps;
                        new_ifps = 0;
                        pcb->ifps = ifps;
-                       pcb->xstate = thread_xstate(thr_act);
+                       pcb->xstate = thr_xstate;
                        fresh_state = TRUE;
                }
 
@@ -865,12 +835,12 @@ Retry:
 
                __nochk_bcopy((char *)&state->fpu_fcw, (char *)ifps, fp_state_size[FP]);
 
-               switch (thread_xstate(thr_act)) {
+               switch (thr_xstate) {
                case UNDEFINED_FULL:
                case FP_FULL:
                case AVX_FULL:
                case AVX512_FULL:
-                       panic("fpu_set_fxstate() INVALID xstate: 0x%x", thread_xstate(thr_act));
+                       panic("fpu_set_fxstate() INVALID xstate: 0x%x", thr_xstate);
                        break;
 
                case UNDEFINED:
@@ -899,7 +869,6 @@ Retry:
                        }
                        break;
                }
-#if !defined(RC_HIDE_XNU_J137)
                case AVX512: {
                        struct x86_avx512_thread_state *iavx = (void *) ifps;
                        union {
@@ -938,7 +907,6 @@ Retry:
                        }
                        break;
                }
-#endif
                }
 
                ifps->fp_valid = old_valid;
@@ -957,7 +925,7 @@ Retry:
                simple_unlock(&pcb->lock);
 
                if (new_ifps != 0) {
-                       fp_state_free(new_ifps, thread_xstate(thr_act));
+                       fp_state_free(new_ifps, thr_xstate);
                }
        }
        return KERN_SUCCESS;
@@ -979,6 +947,7 @@ fpu_get_fxstate(
        x86_float_state64_t             *state;
        kern_return_t                   ret = KERN_FAILURE;
        pcb_t                           pcb;
+       xstate_t                        thr_xstate = thread_xstate(thr_act);
 
        if (fpu_capability == UNDEFINED) {
                return KERN_FAILURE;
@@ -989,12 +958,10 @@ fpu_get_fxstate(
                return KERN_FAILURE;
        }
 
-#if !defined(RC_HIDE_XNU_J137)
        if ((f == x86_AVX512_STATE32 || f == x86_AVX512_STATE64) &&
-           thread_xstate(thr_act) != AVX512) {
+           thr_xstate != AVX512) {
                return KERN_FAILURE;
        }
-#endif
 
        state = (x86_float_state64_t *)tstate;
 
@@ -1033,12 +1000,12 @@ fpu_get_fxstate(
        }
        if (ifps->fp_valid) {
                __nochk_bcopy((char *)ifps, (char *)&state->fpu_fcw, fp_state_size[FP]);
-               switch (thread_xstate(thr_act)) {
+               switch (thr_xstate) {
                case UNDEFINED_FULL:
                case FP_FULL:
                case AVX_FULL:
                case AVX512_FULL:
-                       panic("fpu_get_fxstate() INVALID xstate: 0x%x", thread_xstate(thr_act));
+                       panic("fpu_get_fxstate() INVALID xstate: 0x%x", thr_xstate);
                        break;
 
                case UNDEFINED:
@@ -1056,7 +1023,6 @@ fpu_get_fxstate(
                        }
                        break;
                }
-#if !defined(RC_HIDE_XNU_J137)
                case AVX512: {
                        struct x86_avx512_thread_state *iavx = (void *) ifps;
                        union {
@@ -1087,7 +1053,6 @@ fpu_get_fxstate(
                        }
                        break;
                }
-#endif
                }
 
                ret = KERN_SUCCESS;
@@ -1460,12 +1425,12 @@ fpSSEexterrflt(void)
 }
 
 
-#if !defined(RC_HIDE_XNU_J137)
 /*
  * If a thread is using an AVX-sized savearea:
  * - allocate a new AVX512-sized  area,
  * - copy the 256-bit state into the 512-bit area,
  * - deallocate the smaller area
+ * ASSUMES: thread is the current thread.
  */
 static void
 fpu_savearea_promote_avx512(thread_t thread)
@@ -1474,8 +1439,11 @@ fpu_savearea_promote_avx512(thread_t thread)
        struct x86_avx512_thread_state  *ifps512 = NULL;
        pcb_t                           pcb = THREAD_TO_PCB(thread);
        boolean_t                       do_avx512_alloc = FALSE;
+       boolean_t                       intr;
 
-       DBG("fpu_upgrade_savearea(%p)\n", thread);
+       assert(thread == current_thread());
+
+       DBG("fpu_savearea_promote_avx512(%p)\n", thread);
 
        simple_lock(&pcb->lock, LCK_GRP_NULL);
 
@@ -1483,11 +1451,21 @@ fpu_savearea_promote_avx512(thread_t thread)
        if (ifps == NULL) {
                pcb->xstate = AVX512;
                simple_unlock(&pcb->lock);
-               if (thread != current_thread()) {
-                       /* nothing to be done */
+               /*
+                * Now that the PCB xstate has been promoted, set XCR0 so
+                * that we don't re-trip #UD on the next AVX-512 instruction.
+                *
+                * Since this branch is taken when the first FP instruction
+                * attempted by this thread is an AVX-512 instruction, we
+                * call fpnoextflt() to allocate an appropriately-sized
+                * AVX-512 save-area, thereby avoiding the overhead of another
+                * fault that would be triggered immediately on return.
+                */
+               intr = ml_set_interrupts_enabled(FALSE);
+               xsetbv(0, AVX512_XMASK);
+               current_cpu_datap()->cpu_xstate = AVX512;
+               (void)ml_set_interrupts_enabled(intr);
 
-                       return;
-               }
                fpnoextflt();
                return;
        }
@@ -1495,6 +1473,7 @@ fpu_savearea_promote_avx512(thread_t thread)
        if (pcb->xstate != AVX512) {
                do_avx512_alloc = TRUE;
        }
+
        simple_unlock(&pcb->lock);
 
        if (do_avx512_alloc == TRUE) {
@@ -1502,19 +1481,17 @@ fpu_savearea_promote_avx512(thread_t thread)
        }
 
        simple_lock(&pcb->lock, LCK_GRP_NULL);
-       if (thread == current_thread()) {
-               boolean_t       intr;
 
-               intr = ml_set_interrupts_enabled(FALSE);
+       intr = ml_set_interrupts_enabled(FALSE);
 
-               clear_ts();
-               fp_save(thread);
-               clear_fpu();
+       clear_ts();
+       fp_save(thread);
+       clear_fpu();
+
+       xsetbv(0, AVX512_XMASK);
+       current_cpu_datap()->cpu_xstate = AVX512;
+       (void)ml_set_interrupts_enabled(intr);
 
-               xsetbv(0, AVX512_XMASK);
-               current_cpu_datap()->cpu_xstate = AVX512;
-               (void)ml_set_interrupts_enabled(intr);
-       }
        assert(ifps->fp.fp_valid);
 
        /* Allocate an AVX512 savearea and copy AVX state into it */
@@ -1568,9 +1545,10 @@ fpu_thread_promote_avx512(thread_t thread)
  * If the user is attempting an AVX512 instruction on a machine
  * that supports this, we switch the calling thread to use
  * a larger savearea, set its XCR0 bit mask to enable AVX512 and
- * return directly via thread_exception_return().
- * Otherwise simply return.
+ * return to user_trap() with a 0 return value.
+ * Otherwise, simply return a nonzero value.
  */
+
 #define MAX_X86_INSN_LENGTH (15)
 int
 fpUDflt(user_addr_t rip)
@@ -1632,7 +1610,6 @@ fpUDflt(user_addr_t rip)
 
        return 0;
 }
-#endif /* !defined(RC_HIDE_XNU_J137) */
 
 void
 fp_setvalid(boolean_t value)
@@ -1657,13 +1634,11 @@ ml_fpu_avx_enabled(void)
        return fpu_capability >= AVX;
 }
 
-#if !defined(RC_HIDE_XNU_J137)
 boolean_t
 ml_fpu_avx512_enabled(void)
 {
        return fpu_capability == AVX512;
 }
-#endif
 
 static xstate_t
 task_xstate(task_t task)
index 8f20ce6eb6f2e274810823e24fec3ca509c912e2..70fa633d7c95449b35cd22dd028132081739d0a9 100644 (file)
@@ -829,7 +829,8 @@ i386_init(void)
 #endif /* MONOTONIC */
 
        processor_bootstrap();
-       thread_bootstrap();
+       thread_t thread = thread_bootstrap();
+       machine_set_current_thread(thread);
 
        pstate_trace();
        kernel_debug_string_early("machine_startup");
index e553bc4a0098bffcb26dca722afe3f47c8051ce4..760a167cf9d380a8c5e918cb3081879bdc59a538 100644 (file)
@@ -115,11 +115,18 @@ typedef struct _lck_mtx_ {
 
 /* Adaptive spin before blocking */
 extern uint64_t         MutexSpin;
+extern uint64_t         low_MutexSpin;
+extern int64_t         high_MutexSpin;
 
 typedef enum lck_mtx_spinwait_ret_type {
        LCK_MTX_SPINWAIT_ACQUIRED = 0,
-       LCK_MTX_SPINWAIT_SPUN = 1,
-       LCK_MTX_SPINWAIT_NO_SPIN = 2,
+
+       LCK_MTX_SPINWAIT_SPUN_HIGH_THR = 1,
+       LCK_MTX_SPINWAIT_SPUN_OWNER_NOT_CORE = 2,
+       LCK_MTX_SPINWAIT_SPUN_NO_WINDOW_CONTENTION = 3,
+       LCK_MTX_SPINWAIT_SPUN_SLIDING_THR = 4,
+
+       LCK_MTX_SPINWAIT_NO_SPIN = 5,
 } lck_mtx_spinwait_ret_type_t;
 
 extern lck_mtx_spinwait_ret_type_t              lck_mtx_lock_spinwait_x86(lck_mtx_t *mutex);
index c5b0d303748e16513487e649c2403d71f5df1b48..25b32934510f7e6672349ebc5dd2ef3a4f750aa4 100644 (file)
 #include <machine/atomic.h>
 #include <sys/kdebug.h>
 #include <i386/locks_i386_inlines.h>
+#include <kern/cpu_number.h>
+#include <os/hash.h>
 
-#if    CONFIG_DTRACE
-#define DTRACE_RW_SHARED       0x0     //reader
-#define DTRACE_RW_EXCL         0x1     //writer
-#define DTRACE_NO_FLAG         0x0     //not applicable
+#if     CONFIG_DTRACE
+#define DTRACE_RW_SHARED        0x0     //reader
+#define DTRACE_RW_EXCL          0x1     //writer
+#define DTRACE_NO_FLAG          0x0     //not applicable
 #endif /* CONFIG_DTRACE */
 
-#define        LCK_RW_LCK_EXCLUSIVE_CODE       0x100
-#define        LCK_RW_LCK_EXCLUSIVE1_CODE      0x101
-#define        LCK_RW_LCK_SHARED_CODE          0x102
-#define        LCK_RW_LCK_SH_TO_EX_CODE        0x103
-#define        LCK_RW_LCK_SH_TO_EX1_CODE       0x104
-#define        LCK_RW_LCK_EX_TO_SH_CODE        0x105
+#define LCK_RW_LCK_EXCLUSIVE_CODE       0x100
+#define LCK_RW_LCK_EXCLUSIVE1_CODE      0x101
+#define LCK_RW_LCK_SHARED_CODE          0x102
+#define LCK_RW_LCK_SH_TO_EX_CODE        0x103
+#define LCK_RW_LCK_SH_TO_EX1_CODE       0x104
+#define LCK_RW_LCK_EX_TO_SH_CODE        0x105
 
-#define LCK_RW_LCK_EX_WRITER_SPIN_CODE 0x106
-#define LCK_RW_LCK_EX_WRITER_WAIT_CODE 0x107
-#define LCK_RW_LCK_EX_READER_SPIN_CODE 0x108
-#define LCK_RW_LCK_EX_READER_WAIT_CODE 0x109
-#define LCK_RW_LCK_SHARED_SPIN_CODE    0x110
-#define LCK_RW_LCK_SHARED_WAIT_CODE    0x111
-#define LCK_RW_LCK_SH_TO_EX_SPIN_CODE  0x112
-#define LCK_RW_LCK_SH_TO_EX_WAIT_CODE  0x113
+#define LCK_RW_LCK_EX_WRITER_SPIN_CODE  0x106
+#define LCK_RW_LCK_EX_WRITER_WAIT_CODE  0x107
+#define LCK_RW_LCK_EX_READER_SPIN_CODE  0x108
+#define LCK_RW_LCK_EX_READER_WAIT_CODE  0x109
+#define LCK_RW_LCK_SHARED_SPIN_CODE     0x110
+#define LCK_RW_LCK_SHARED_WAIT_CODE     0x111
+#define LCK_RW_LCK_SH_TO_EX_SPIN_CODE   0x112
+#define LCK_RW_LCK_SH_TO_EX_WAIT_CODE   0x113
 
 
-#define        ANY_LOCK_DEBUG  (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
+#define ANY_LOCK_DEBUG  (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
 
-unsigned int LcksOpts=0;
+unsigned int LcksOpts = 0;
 
 #if DEVELOPMENT || DEBUG
 unsigned int LckDisablePreemptCheck = 0;
@@ -118,15 +120,15 @@ unsigned int LckDisablePreemptCheck = 0;
 
 /* Forwards */
 
-#if    USLOCK_DEBUG
+#if     USLOCK_DEBUG
 /*
  *     Perform simple lock checks.
  */
-int    uslock_check = 1;
-int    max_lock_loops  = 100000000;
-decl_simple_lock_data(extern , printf_lock);
-decl_simple_lock_data(extern , panic_lock);
-#endif /* USLOCK_DEBUG */
+int     uslock_check = 1;
+int     max_lock_loops  = 100000000;
+decl_simple_lock_data(extern, printf_lock);
+decl_simple_lock_data(extern, panic_lock);
+#endif  /* USLOCK_DEBUG */
 
 extern unsigned int not_in_kdp;
 
@@ -135,23 +137,23 @@ extern unsigned int not_in_kdp;
  *     of the various lock routines.  However, this information
  *     is only used for debugging and statistics.
  */
-typedef void   *pc_t;
-#define        INVALID_PC      ((void *) VM_MAX_KERNEL_ADDRESS)
-#define        INVALID_THREAD  ((void *) VM_MAX_KERNEL_ADDRESS)
-#if    ANY_LOCK_DEBUG
-#define        OBTAIN_PC(pc)   ((pc) = GET_RETURN_PC())
-#define DECL_PC(pc)    pc_t pc;
-#else  /* ANY_LOCK_DEBUG */
+typedef void    *pc_t;
+#define INVALID_PC      ((void *) VM_MAX_KERNEL_ADDRESS)
+#define INVALID_THREAD  ((void *) VM_MAX_KERNEL_ADDRESS)
+#if     ANY_LOCK_DEBUG
+#define OBTAIN_PC(pc)   ((pc) = GET_RETURN_PC())
+#define DECL_PC(pc)     pc_t pc;
+#else   /* ANY_LOCK_DEBUG */
 #define DECL_PC(pc)
-#ifdef lint
+#ifdef  lint
 /*
  *     Eliminate lint complaints about unused local pc variables.
  */
-#define        OBTAIN_PC(pc)   ++pc
-#else  /* lint */
-#define        OBTAIN_PC(pc)
-#endif /* lint */
-#endif /* USLOCK_DEBUG */
+#define OBTAIN_PC(pc)   ++pc
+#else   /* lint */
+#define OBTAIN_PC(pc)
+#endif  /* lint */
+#endif  /* USLOCK_DEBUG */
 
 /*
  * atomic exchange API is a low level abstraction of the operations
@@ -166,9 +168,9 @@ typedef void        *pc_t;
 static uint32_t
 atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord)
 {
-       uint32_t        val;
+       uint32_t        val;
 
-       (void)ord;                      // Memory order not used
+       (void)ord;                      // Memory order not used
        val = os_atomic_load(target, relaxed);
        *previous = val;
        return val;
@@ -181,25 +183,29 @@ atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval,
 }
 
 static void
-atomic_exchange_abort(void) { }
+atomic_exchange_abort(void)
+{
+}
 
 static boolean_t
 atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
 {
-       uint32_t        value, prev;
+       uint32_t        value, prev;
 
-       for ( ; ; ) {
+       for (;;) {
                value = atomic_exchange_begin32(target, &prev, ord);
                if (value & test_mask) {
-                       if (wait)
+                       if (wait) {
                                cpu_pause();
-                       else
+                       } else {
                                atomic_exchange_abort();
+                       }
                        return FALSE;
                }
                value |= set_mask;
-               if (atomic_exchange_complete32(target, prev, value, ord))
+               if (atomic_exchange_complete32(target, prev, value, ord)) {
                        return TRUE;
+               }
        }
 }
 
@@ -213,18 +219,18 @@ hw_atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask
  *     Portable lock package implementation of usimple_locks.
  */
 
-#if    USLOCK_DEBUG
-#define        USLDBG(stmt)    stmt
-void           usld_lock_init(usimple_lock_t, unsigned short);
-void           usld_lock_pre(usimple_lock_t, pc_t);
-void           usld_lock_post(usimple_lock_t, pc_t);
-void           usld_unlock(usimple_lock_t, pc_t);
-void           usld_lock_try_pre(usimple_lock_t, pc_t);
-void           usld_lock_try_post(usimple_lock_t, pc_t);
-int            usld_lock_common_checks(usimple_lock_t, char *);
-#else  /* USLOCK_DEBUG */
-#define        USLDBG(stmt)
-#endif /* USLOCK_DEBUG */
+#if     USLOCK_DEBUG
+#define USLDBG(stmt)    stmt
+void            usld_lock_init(usimple_lock_t, unsigned short);
+void            usld_lock_pre(usimple_lock_t, pc_t);
+void            usld_lock_post(usimple_lock_t, pc_t);
+void            usld_unlock(usimple_lock_t, pc_t);
+void            usld_lock_try_pre(usimple_lock_t, pc_t);
+void            usld_lock_try_post(usimple_lock_t, pc_t);
+int             usld_lock_common_checks(usimple_lock_t, char *);
+#else   /* USLOCK_DEBUG */
+#define USLDBG(stmt)
+#endif  /* USLOCK_DEBUG */
 
 /*
  * Forward definitions
@@ -243,7 +249,6 @@ static boolean_t lck_rw_grab_shared(lck_rw_t *lock);
 static void lck_mtx_unlock_wakeup_tail(lck_mtx_t *mutex, uint32_t state, boolean_t indirect);
 static void lck_mtx_interlock_lock(lck_mtx_t *mutex, uint32_t *new_state);
 static void lck_mtx_interlock_lock_clear_flags(lck_mtx_t *mutex, uint32_t and_flags, uint32_t *new_state);
-static int lck_mtx_interlock_try_lock(lck_mtx_t *mutex, uint32_t *new_state);
 static int lck_mtx_interlock_try_lock_set_flags(lck_mtx_t *mutex, uint32_t or_flags, uint32_t *new_state);
 static boolean_t lck_mtx_lock_wait_interlock_to_clear(lck_mtx_t *lock, uint32_t *new_state);
 static boolean_t lck_mtx_try_lock_wait_interlock_to_clear(lck_mtx_t *lock, uint32_t *new_state);
@@ -254,15 +259,16 @@ static boolean_t lck_mtx_try_lock_wait_interlock_to_clear(lck_mtx_t *lock, uint3
  */
 lck_spin_t *
 lck_spin_alloc_init(
-       lck_grp_t       *grp,
-       lck_attr_t      *attr)
+       lck_grp_t       *grp,
+       lck_attr_t      *attr)
 {
-       lck_spin_t      *lck;
+       lck_spin_t      *lck;
 
-       if ((lck = (lck_spin_t *)kalloc(sizeof(lck_spin_t))) != 0)
+       if ((lck = (lck_spin_t *)kalloc(sizeof(lck_spin_t))) != 0) {
                lck_spin_init(lck, grp, attr);
+       }
 
-       return(lck);
+       return lck;
 }
 
 /*
@@ -270,8 +276,8 @@ lck_spin_alloc_init(
  */
 void
 lck_spin_free(
-       lck_spin_t      *lck,
-       lck_grp_t       *grp)
+       lck_spin_t      *lck,
+       lck_grp_t       *grp)
 {
        lck_spin_destroy(lck, grp);
        kfree(lck, sizeof(lck_spin_t));
@@ -282,9 +288,9 @@ lck_spin_free(
  */
 void
 lck_spin_init(
-       lck_spin_t      *lck,
-       lck_grp_t       *grp,
-       __unused lck_attr_t     *attr)
+       lck_spin_t      *lck,
+       lck_grp_t       *grp,
+       __unused lck_attr_t     *attr)
 {
        usimple_lock_init((usimple_lock_t) lck, 0);
        if (grp) {
@@ -298,11 +304,12 @@ lck_spin_init(
  */
 void
 lck_spin_destroy(
-       lck_spin_t      *lck,
-       lck_grp_t       *grp)
+       lck_spin_t      *lck,
+       lck_grp_t       *grp)
 {
-       if (lck->interlock == LCK_SPIN_TAG_DESTROYED)
+       if (lck->interlock == LCK_SPIN_TAG_DESTROYED) {
                return;
+       }
        lck->interlock = LCK_SPIN_TAG_DESTROYED;
        if (grp) {
                lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
@@ -316,8 +323,8 @@ lck_spin_destroy(
  */
 void
 lck_spin_lock_grp(
-       lck_spin_t      *lck,
-       lck_grp_t       *grp)
+       lck_spin_t      *lck,
+       lck_grp_t       *grp)
 {
 #pragma unused(grp)
        usimple_lock((usimple_lock_t) lck, grp);
@@ -325,7 +332,7 @@ lck_spin_lock_grp(
 
 void
 lck_spin_lock(
-       lck_spin_t      *lck)
+       lck_spin_t      *lck)
 {
        usimple_lock((usimple_lock_t) lck, NULL);
 }
@@ -335,24 +342,24 @@ lck_spin_lock(
  */
 void
 lck_spin_unlock(
-       lck_spin_t      *lck)
+       lck_spin_t      *lck)
 {
        usimple_unlock((usimple_lock_t) lck);
 }
 
 boolean_t
 lck_spin_try_lock_grp(
-       lck_spin_t      *lck,
-       lck_grp_t       *grp)
+       lck_spin_t      *lck,
+       lck_grp_t       *grp)
 {
 #pragma unused(grp)
        boolean_t lrval = (boolean_t)usimple_lock_try((usimple_lock_t) lck, grp);
-#if    DEVELOPMENT || DEBUG
+#if     DEVELOPMENT || DEBUG
        if (lrval) {
                pltrace(FALSE);
        }
 #endif
-       return(lrval);
+       return lrval;
 }
 
 
@@ -361,15 +368,15 @@ lck_spin_try_lock_grp(
  */
 boolean_t
 lck_spin_try_lock(
-       lck_spin_t      *lck)
+       lck_spin_t      *lck)
 {
        boolean_t lrval = (boolean_t)usimple_lock_try((usimple_lock_t) lck, LCK_GRP_NULL);
-#if    DEVELOPMENT || DEBUG
+#if     DEVELOPMENT || DEBUG
        if (lrval) {
                pltrace(FALSE);
        }
 #endif
-       return(lrval);
+       return lrval;
 }
 
 /*
@@ -410,7 +417,8 @@ lck_spin_assert(lck_spin_t *lock, unsigned int type)
  *      Returns: TRUE if lock is acquired.
  */
 boolean_t
-kdp_lck_spin_is_acquired(lck_spin_t *lck) {
+kdp_lck_spin_is_acquired(lck_spin_t *lck)
+{
        if (not_in_kdp) {
                panic("panic: spinlock acquired check done outside of kernel debugger");
        }
@@ -424,21 +432,23 @@ kdp_lck_spin_is_acquired(lck_spin_t *lck) {
  */
 void
 usimple_lock_init(
-       usimple_lock_t  l,
-       __unused unsigned short tag)
+       usimple_lock_t  l,
+       __unused unsigned short tag)
 {
-#ifndef        MACHINE_SIMPLE_LOCK
+#ifndef MACHINE_SIMPLE_LOCK
        USLDBG(usld_lock_init(l, tag));
        hw_lock_init(&l->interlock);
 #else
-       simple_lock_init((simple_lock_t)l,tag);
+       simple_lock_init((simple_lock_t)l, tag);
 #endif
 }
 
 volatile uint32_t spinlock_owner_cpu = ~0;
 volatile usimple_lock_t spinlock_timed_out;
 
-uint32_t spinlock_timeout_NMI(uintptr_t thread_addr) {
+uint32_t
+spinlock_timeout_NMI(uintptr_t thread_addr)
+{
        uint32_t i;
 
        for (i = 0; i < real_ncpus; i++) {
@@ -464,21 +474,22 @@ uint32_t spinlock_timeout_NMI(uintptr_t thread_addr) {
  */
 void
 (usimple_lock)(
-       usimple_lock_t  l
+       usimple_lock_t  l
        LCK_GRP_ARG(lck_grp_t *grp))
 {
-#ifndef        MACHINE_SIMPLE_LOCK
+#ifndef MACHINE_SIMPLE_LOCK
        DECL_PC(pc);
 
        OBTAIN_PC(pc);
        USLDBG(usld_lock_pre(l, pc));
 
-       if(__improbable(hw_lock_to(&l->interlock, LockTimeOutTSC, grp) == 0))   {
+       if (__improbable(hw_lock_to(&l->interlock, LockTimeOutTSC, grp) == 0)) {
                boolean_t uslock_acquired = FALSE;
                while (machine_timeout_suspended()) {
                        enable_preemption();
-                       if ((uslock_acquired = hw_lock_to(&l->interlock, LockTimeOutTSC, grp)))
+                       if ((uslock_acquired = hw_lock_to(&l->interlock, LockTimeOutTSC, grp))) {
                                break;
+                       }
                }
 
                if (uslock_acquired == FALSE) {
@@ -487,11 +498,11 @@ void
                        spinlock_timed_out = l;
                        lock_cpu = spinlock_timeout_NMI(lowner);
                        panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x, current owner: 0x%lx, time: %llu",
-                             l, lowner,  current_thread(), lock_cpu, (uintptr_t)l->interlock.lock_data, mach_absolute_time());
+                           l, lowner, current_thread(), lock_cpu, (uintptr_t)l->interlock.lock_data, mach_absolute_time());
                }
        }
 #if DEVELOPMENT || DEBUG
-               pltrace(FALSE);
+       pltrace(FALSE);
 #endif
 
        USLDBG(usld_lock_post(l, pc));
@@ -513,15 +524,15 @@ void
  */
 void
 usimple_unlock(
-       usimple_lock_t  l)
+       usimple_lock_t  l)
 {
-#ifndef        MACHINE_SIMPLE_LOCK
+#ifndef MACHINE_SIMPLE_LOCK
        DECL_PC(pc);
 
        OBTAIN_PC(pc);
        USLDBG(usld_unlock(l, pc));
 #if DEVELOPMENT || DEBUG
-               pltrace(TRUE);
+       pltrace(TRUE);
 #endif
        hw_lock_unlock(&l->interlock);
 #else
@@ -544,11 +555,11 @@ usimple_unlock(
  */
 unsigned int
 usimple_lock_try(
-       usimple_lock_t  l,
+       usimple_lock_t  l,
        lck_grp_t *grp)
 {
-#ifndef        MACHINE_SIMPLE_LOCK
-       unsigned int    success;
+#ifndef MACHINE_SIMPLE_LOCK
+       unsigned int    success;
        DECL_PC(pc);
 
        OBTAIN_PC(pc);
@@ -557,11 +568,11 @@ usimple_lock_try(
 #if DEVELOPMENT || DEBUG
                pltrace(FALSE);
 #endif
-       USLDBG(usld_lock_try_post(l, pc));
+               USLDBG(usld_lock_try_post(l, pc));
        }
        return success;
 #else
-       return(simple_lock_try((simple_lock_t)l, grp));
+       return simple_lock_try((simple_lock_t)l, grp);
 #endif
 }
 
@@ -570,10 +581,11 @@ usimple_lock_try(
  * and spinning on a lock.
  *
  */
-unsigned int
+unsigned
+int
 (usimple_lock_try_lock_mp_signal_safe_loop_deadline)(usimple_lock_t l,
-       uint64_t deadline
-       LCK_GRP_ARG(lck_grp_t *grp))
+    uint64_t deadline
+    LCK_GRP_ARG(lck_grp_t *grp))
 {
        boolean_t istate = ml_get_interrupts_enabled();
 
@@ -582,9 +594,10 @@ unsigned int
        }
 
        while (!simple_lock_try(l, grp)) {
-               if (!istate)
+               if (!istate) {
                        cpu_signal_handler(NULL);
-               
+               }
+
                if (deadline < mach_absolute_time()) {
                        return 0;
                }
@@ -597,15 +610,16 @@ unsigned int
 
 void
 (usimple_lock_try_lock_loop)(usimple_lock_t l
-       LCK_GRP_ARG(lck_grp_t *grp))
+    LCK_GRP_ARG(lck_grp_t *grp))
 {
        usimple_lock_try_lock_mp_signal_safe_loop_deadline(l, ULLONG_MAX, grp);
 }
 
-unsigned int
+unsigned
+int
 (usimple_lock_try_lock_mp_signal_safe_loop_duration)(usimple_lock_t l,
-       uint64_t duration
-       LCK_GRP_ARG(lck_grp_t *grp))
+    uint64_t duration
+    LCK_GRP_ARG(lck_grp_t *grp))
 {
        uint64_t deadline;
        uint64_t base_at = mach_absolute_time();
@@ -621,17 +635,17 @@ unsigned int
        return usimple_lock_try_lock_mp_signal_safe_loop_deadline(l, deadline, grp);
 }
 
-#if    USLOCK_DEBUG
+#if     USLOCK_DEBUG
 /*
  *     States of a usimple_lock.  The default when initializing
  *     a usimple_lock is setting it up for debug checking.
  */
-#define        USLOCK_CHECKED          0x0001          /* lock is being checked */
-#define        USLOCK_TAKEN            0x0002          /* lock has been taken */
-#define        USLOCK_INIT             0xBAA0          /* lock has been initialized */
-#define        USLOCK_INITIALIZED      (USLOCK_INIT|USLOCK_CHECKED)
-#define        USLOCK_CHECKING(l)      (uslock_check &&                        \
-                                ((l)->debug.state & USLOCK_CHECKED))
+#define USLOCK_CHECKED          0x0001          /* lock is being checked */
+#define USLOCK_TAKEN            0x0002          /* lock has been taken */
+#define USLOCK_INIT             0xBAA0          /* lock has been initialized */
+#define USLOCK_INITIALIZED      (USLOCK_INIT|USLOCK_CHECKED)
+#define USLOCK_CHECKING(l)      (uslock_check &&                        \
+                                ((l)->debug.state & USLOCK_CHECKED))
 
 /*
  *     Initialize the debugging information contained
@@ -639,11 +653,12 @@ unsigned int
  */
 void
 usld_lock_init(
-       usimple_lock_t  l,
-       __unused unsigned short tag)
+       usimple_lock_t  l,
+       __unused unsigned short tag)
 {
-       if (l == USIMPLE_LOCK_NULL)
+       if (l == USIMPLE_LOCK_NULL) {
                panic("lock initialization:  null lock pointer");
+       }
        l->lock_type = USLOCK_TAG;
        l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0;
        l->debug.lock_cpu = l->debug.unlock_cpu = 0;
@@ -662,15 +677,18 @@ usld_lock_init(
  */
 int
 usld_lock_common_checks(
-       usimple_lock_t  l,
-       char            *caller)
+       usimple_lock_t  l,
+       char            *caller)
 {
-       if (l == USIMPLE_LOCK_NULL)
+       if (l == USIMPLE_LOCK_NULL) {
                panic("%s:  null lock pointer", caller);
-       if (l->lock_type != USLOCK_TAG)
+       }
+       if (l->lock_type != USLOCK_TAG) {
                panic("%s:  %p is not a usimple lock, 0x%x", caller, l, l->lock_type);
-       if (!(l->debug.state & USLOCK_INIT))
+       }
+       if (!(l->debug.state & USLOCK_INIT)) {
                panic("%s:  %p is not an initialized lock, 0x%x", caller, l, l->debug.state);
+       }
        return USLOCK_CHECKING(l);
 }
 
@@ -682,14 +700,15 @@ usld_lock_common_checks(
 /* ARGSUSED */
 void
 usld_lock_pre(
-       usimple_lock_t  l,
-       pc_t            pc)
+       usimple_lock_t  l,
+       pc_t            pc)
 {
-       char    caller[] = "usimple_lock";
+       char    caller[] = "usimple_lock";
 
 
-       if (!usld_lock_common_checks(l, caller))
+       if (!usld_lock_common_checks(l, caller)) {
                return;
+       }
 
 /*
  *     Note that we have a weird case where we are getting a lock when we are]
@@ -702,9 +721,9 @@ usld_lock_pre(
        if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread &&
            l->debug.lock_thread == (void *) current_thread()) {
                printf("%s:  lock %p already locked (at %p) by",
-                     caller, l, l->debug.lock_pc);
+                   caller, l, l->debug.lock_pc);
                printf(" current thread %p (new attempt at pc %p)\n",
-                      l->debug.lock_thread, pc);
+                   l->debug.lock_thread, pc);
                panic("%s", caller);
        }
        mp_disable_preemption();
@@ -720,22 +739,25 @@ usld_lock_pre(
  */
 void
 usld_lock_post(
-       usimple_lock_t  l,
-       pc_t            pc)
+       usimple_lock_t  l,
+       pc_t            pc)
 {
-       int     mycpu;
-       char    caller[] = "successful usimple_lock";
+       int     mycpu;
+       char    caller[] = "successful usimple_lock";
 
 
-       if (!usld_lock_common_checks(l, caller))
+       if (!usld_lock_common_checks(l, caller)) {
                return;
+       }
 
-       if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
+       if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) {
                panic("%s:  lock %p became uninitialized",
-                     caller, l);
-       if ((l->debug.state & USLOCK_TAKEN))
+                   caller, l);
+       }
+       if ((l->debug.state & USLOCK_TAKEN)) {
                panic("%s:  lock 0x%p became TAKEN by someone else",
-                     caller, l);
+                   caller, l);
+       }
 
        mycpu = cpu_number();
        l->debug.lock_thread = (void *)current_thread();
@@ -755,27 +777,30 @@ usld_lock_post(
  */
 void
 usld_unlock(
-       usimple_lock_t  l,
-       pc_t            pc)
+       usimple_lock_t  l,
+       pc_t            pc)
 {
-       int     mycpu;
-       char    caller[] = "usimple_unlock";
+       int     mycpu;
+       char    caller[] = "usimple_unlock";
 
 
-       if (!usld_lock_common_checks(l, caller))
+       if (!usld_lock_common_checks(l, caller)) {
                return;
+       }
 
        mycpu = cpu_number();
 
-       if (!(l->debug.state & USLOCK_TAKEN))
+       if (!(l->debug.state & USLOCK_TAKEN)) {
                panic("%s:  lock 0x%p hasn't been taken",
-                     caller, l);
-       if (l->debug.lock_thread != (void *) current_thread())
+                   caller, l);
+       }
+       if (l->debug.lock_thread != (void *) current_thread()) {
                panic("%s:  unlocking lock 0x%p, owned by thread %p",
-                     caller, l, l->debug.lock_thread);
+                   caller, l, l->debug.lock_thread);
+       }
        if (l->debug.lock_cpu != mycpu) {
                printf("%s:  unlocking lock 0x%p on cpu 0x%x",
-                      caller, l, mycpu);
+                   caller, l, mycpu);
                printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
                panic("%s", caller);
        }
@@ -796,13 +821,14 @@ usld_unlock(
  */
 void
 usld_lock_try_pre(
-       usimple_lock_t  l,
-       __unused pc_t   pc)
+       usimple_lock_t  l,
+       __unused pc_t   pc)
 {
-       char    caller[] = "usimple_lock_try";
+       char    caller[] = "usimple_lock_try";
 
-       if (!usld_lock_common_checks(l, caller))
+       if (!usld_lock_common_checks(l, caller)) {
                return;
+       }
 }
 
 
@@ -816,21 +842,24 @@ usld_lock_try_pre(
  */
 void
 usld_lock_try_post(
-       usimple_lock_t  l,
-       pc_t            pc)
+       usimple_lock_t  l,
+       pc_t            pc)
 {
-       int     mycpu;
-       char    caller[] = "successful usimple_lock_try";
+       int     mycpu;
+       char    caller[] = "successful usimple_lock_try";
 
-       if (!usld_lock_common_checks(l, caller))
+       if (!usld_lock_common_checks(l, caller)) {
                return;
+       }
 
-       if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
+       if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) {
                panic("%s:  lock 0x%p became uninitialized",
-                     caller, l);
-       if ((l->debug.state & USLOCK_TAKEN))
+                   caller, l);
+       }
+       if ((l->debug.state & USLOCK_TAKEN)) {
                panic("%s:  lock 0x%p became TAKEN by someone else",
-                     caller, l);
+                   caller, l);
+       }
 
        mycpu = cpu_number();
        l->debug.lock_thread = (void *) current_thread();
@@ -838,23 +867,24 @@ usld_lock_try_post(
        l->debug.lock_pc = pc;
        l->debug.lock_cpu = mycpu;
 }
-#endif /* USLOCK_DEBUG */
+#endif  /* USLOCK_DEBUG */
 
 /*
  *      Routine:        lck_rw_alloc_init
  */
 lck_rw_t *
 lck_rw_alloc_init(
-       lck_grp_t       *grp,
-       lck_attr_t      *attr) {
-       lck_rw_t        *lck;
+       lck_grp_t       *grp,
+       lck_attr_t      *attr)
+{
+       lck_rw_t        *lck;
 
        if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0) {
                bzero(lck, sizeof(lck_rw_t));
                lck_rw_init(lck, grp, attr);
        }
 
-       return(lck);
+       return lck;
 }
 
 /*
@@ -862,8 +892,9 @@ lck_rw_alloc_init(
  */
 void
 lck_rw_free(
-       lck_rw_t        *lck,
-       lck_grp_t       *grp) {
+       lck_rw_t        *lck,
+       lck_grp_t       *grp)
+{
        lck_rw_destroy(lck, grp);
        kfree(lck, sizeof(lck_rw_t));
 }
@@ -873,12 +904,12 @@ lck_rw_free(
  */
 void
 lck_rw_init(
-       lck_rw_t        *lck,
-       lck_grp_t       *grp,
-       lck_attr_t      *attr)
+       lck_rw_t        *lck,
+       lck_grp_t       *grp,
+       lck_attr_t      *attr)
 {
-       lck_attr_t      *lck_attr = (attr != LCK_ATTR_NULL) ?
-                                       attr : &LockDefaultLckAttr;
+       lck_attr_t      *lck_attr = (attr != LCK_ATTR_NULL) ?
+           attr : &LockDefaultLckAttr;
 
        hw_lock_byte_init(&lck->lck_rw_interlock);
        lck->lck_rw_want_write = FALSE;
@@ -888,7 +919,7 @@ lck_rw_init(
        lck->lck_r_waiting = lck->lck_w_waiting = 0;
        lck->lck_rw_tag = 0;
        lck->lck_rw_priv_excl = ((lck_attr->lck_attr_val &
-                               LCK_ATTR_RW_SHARED_PRIORITY) == 0);
+           LCK_ATTR_RW_SHARED_PRIORITY) == 0);
 
        lck_grp_reference(grp);
        lck_grp_lckcnt_incr(grp, LCK_TYPE_RW);
@@ -899,11 +930,12 @@ lck_rw_init(
  */
 void
 lck_rw_destroy(
-       lck_rw_t        *lck,
-       lck_grp_t       *grp)
+       lck_rw_t        *lck,
+       lck_grp_t       *grp)
 {
-       if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED)
+       if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED) {
                return;
+       }
 #if MACH_LDEBUG
        lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD);
 #endif
@@ -929,7 +961,7 @@ lck_rw_destroy(
 static inline boolean_t
 lck_interlock_lock(lck_rw_t *lck)
 {
-       boolean_t       istate;
+       boolean_t       istate;
 
        istate = ml_set_interrupts_enabled(FALSE);
        hw_lock_byte_lock(&lck->lck_rw_interlock);
@@ -952,16 +984,18 @@ lck_interlock_unlock(lck_rw_t *lck, boolean_t istate)
 static inline void
 lck_rw_lock_pause(boolean_t interrupts_enabled)
 {
-       if (!interrupts_enabled)
+       if (!interrupts_enabled) {
                handle_pending_TLB_flushes();
+       }
        cpu_pause();
 }
 
 static inline boolean_t
 lck_rw_held_read_or_upgrade(lck_rw_t *lock)
 {
-       if (ordered_load(&lock->data) & (LCK_RW_SHARED_MASK | LCK_RW_INTERLOCK | LCK_RW_WANT_UPGRADE))
+       if (ordered_load(&lock->data) & (LCK_RW_SHARED_MASK | LCK_RW_INTERLOCK | LCK_RW_WANT_UPGRADE)) {
                return TRUE;
+       }
        return FALSE;
 }
 
@@ -976,7 +1010,7 @@ lck_rw_deadline_for_spin(lck_rw_t *lck)
                if (lck->lck_r_waiting || lck->lck_w_waiting || lck->lck_rw_shared_count > machine_info.max_cpus) {
                        /*
                         * there are already threads waiting on this lock... this
-                        * implies that they have spun beyond their deadlines waiting for 
+                        * implies that they have spun beyond their deadlines waiting for
                         * the desired state to show up so we will not bother spinning at this time...
                         *   or
                         * the current number of threads sharing this lock exceeds our capacity to run them
@@ -984,11 +1018,12 @@ lck_rw_deadline_for_spin(lck_rw_t *lck)
                         * to be at 0, we'll not bother spinning since the latency for this to happen is
                         * unpredictable...
                         */
-                       return (mach_absolute_time());
+                       return mach_absolute_time();
                }
-               return (mach_absolute_time() + MutexSpin);
-       } else
-               return (mach_absolute_time() + (100000LL * 1000000000LL));
+               return mach_absolute_time() + MutexSpin;
+       } else {
+               return mach_absolute_time() + (100000LL * 1000000000LL);
+       }
 }
 
 
@@ -1007,12 +1042,13 @@ lck_rw_interlock_spin(lck_rw_t *lock)
 static boolean_t
 lck_rw_grab_want(lck_rw_t *lock)
 {
-       uint32_t        data, prev;
+       uint32_t        data, prev;
 
-       for ( ; ; ) {
+       for (;;) {
                data = atomic_exchange_begin32(&lock->data, &prev, memory_order_relaxed);
-               if ((data & LCK_RW_INTERLOCK) == 0)
+               if ((data & LCK_RW_INTERLOCK) == 0) {
                        break;
+               }
                atomic_exchange_abort();
                lck_rw_interlock_spin(lock);
        }
@@ -1027,12 +1063,13 @@ lck_rw_grab_want(lck_rw_t *lock)
 static boolean_t
 lck_rw_grab_shared(lck_rw_t *lock)
 {
-       uint32_t        data, prev;
+       uint32_t        data, prev;
 
-       for ( ; ; ) {
+       for (;;) {
                data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
-               if ((data & LCK_RW_INTERLOCK) == 0)
+               if ((data & LCK_RW_INTERLOCK) == 0) {
                        break;
+               }
                atomic_exchange_abort();
                lck_rw_interlock_spin(lock);
        }
@@ -1051,19 +1088,19 @@ lck_rw_grab_shared(lck_rw_t *lock)
  */
 static void
 lck_rw_lock_exclusive_gen(
-       lck_rw_t        *lck)
+       lck_rw_t        *lck)
 {
-       __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
-       uint64_t        deadline = 0;
-       int             slept = 0;
-       int             gotlock = 0;
-       int             lockheld = 0;
-       wait_result_t   res = 0;
-       boolean_t       istate = -1;
+       __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
+       uint64_t        deadline = 0;
+       int             slept = 0;
+       int             gotlock = 0;
+       int             lockheld = 0;
+       wait_result_t   res = 0;
+       boolean_t       istate = -1;
 
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
        boolean_t dtrace_ls_initialized = FALSE;
-       boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled= FALSE;
+       boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled = FALSE;
        uint64_t wait_interval = 0;
        int readers_at_sleep = 0;
 #endif
@@ -1071,9 +1108,8 @@ lck_rw_lock_exclusive_gen(
        /*
         *      Try to acquire the lck_rw_want_write bit.
         */
-       while ( !lck_rw_grab_want(lck)) {
-
-#if    CONFIG_DTRACE
+       while (!lck_rw_grab_want(lck)) {
+#if     CONFIG_DTRACE
                if (dtrace_ls_initialized == FALSE) {
                        dtrace_ls_initialized = TRUE;
                        dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
@@ -1089,38 +1125,39 @@ lck_rw_lock_exclusive_gen(
                        }
                }
 #endif
-               if (istate == -1)
+               if (istate == -1) {
                        istate = ml_get_interrupts_enabled();
+               }
 
                deadline = lck_rw_deadline_for_spin(lck);
 
                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
 
-               while (((gotlock = lck_rw_grab_want(lck)) == 0) && mach_absolute_time() < deadline)
+               while (((gotlock = lck_rw_grab_want(lck)) == 0) && mach_absolute_time() < deadline) {
                        lck_rw_lock_pause(istate);
+               }
 
                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, gotlock, 0);
 
-               if (gotlock)
+               if (gotlock) {
                        break;
+               }
                /*
                 * if we get here, the deadline has expired w/o us
                 * being able to grab the lock exclusively
                 * check to see if we're allowed to do a thread_block
                 */
                if (lck->lck_rw_can_sleep) {
-
                        istate = lck_interlock_lock(lck);
 
                        if (lck->lck_rw_want_write) {
-
                                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
 
                                lck->lck_w_waiting = TRUE;
 
                                thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
                                res = assert_wait(RW_LOCK_WRITER_EVENT(lck),
-                                               THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
+                                   THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
                                lck_interlock_unlock(lck, istate);
 
                                if (res == THREAD_WAITING) {
@@ -1147,8 +1184,7 @@ lck_rw_lock_exclusive_gen(
         * and the interlock not held, we are safe to proceed
         */
        while (lck_rw_held_read_or_upgrade(lck)) {
-
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
                /*
                 * Either sleeping or spinning is happening, start
                 * a timing of our delay interval now.  If we set it
@@ -1170,27 +1206,29 @@ lck_rw_lock_exclusive_gen(
                        }
                }
 #endif
-               if (istate == -1)
+               if (istate == -1) {
                        istate = ml_get_interrupts_enabled();
+               }
 
                deadline = lck_rw_deadline_for_spin(lck);
 
                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
 
-               while ((lockheld = lck_rw_held_read_or_upgrade(lck)) && mach_absolute_time() < deadline)
+               while ((lockheld = lck_rw_held_read_or_upgrade(lck)) && mach_absolute_time() < deadline) {
                        lck_rw_lock_pause(istate);
+               }
 
                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, lockheld, 0);
 
-               if ( !lockheld)
+               if (!lockheld) {
                        break;
+               }
                /*
                 * if we get here, the deadline has expired w/o us
                 * being able to grab the lock exclusively
                 * check to see if we're allowed to do a thread_block
                 */
                if (lck->lck_rw_can_sleep) {
-
                        istate = lck_interlock_lock(lck);
 
                        if (lck->lck_rw_shared_count != 0 || lck->lck_rw_want_upgrade) {
@@ -1200,7 +1238,7 @@ lck_rw_lock_exclusive_gen(
 
                                thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
                                res = assert_wait(RW_LOCK_WRITER_EVENT(lck),
-                                               THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
+                                   THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
                                lck_interlock_unlock(lck, istate);
 
                                if (res == THREAD_WAITING) {
@@ -1220,7 +1258,7 @@ lck_rw_lock_exclusive_gen(
                }
        }
 
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
        /*
         * Decide what latencies we suffered that are Dtrace events.
         * If we have set wait_interval, then we either spun or slept.
@@ -1254,40 +1292,46 @@ lck_rw_lock_exclusive_gen(
  *      Routine:        lck_rw_done
  */
 
-lck_rw_type_t lck_rw_done(lck_rw_t *lock)
+lck_rw_type_t
+lck_rw_done(lck_rw_t *lock)
 {
-       uint32_t        data, prev;
+       uint32_t        data, prev;
 
-       for ( ; ; ) {
+       for (;;) {
                data = atomic_exchange_begin32(&lock->data, &prev, memory_order_release_smp);
-               if (data & LCK_RW_INTERLOCK) {          /* wait for interlock to clear */
+               if (data & LCK_RW_INTERLOCK) {          /* wait for interlock to clear */
                        atomic_exchange_abort();
                        lck_rw_interlock_spin(lock);
                        continue;
                }
                if (data & LCK_RW_SHARED_MASK) {
                        data -= LCK_RW_SHARED_READER;
-                       if ((data & LCK_RW_SHARED_MASK) == 0)   /* if reader count has now gone to 0, check for waiters */
+                       if ((data & LCK_RW_SHARED_MASK) == 0) { /* if reader count has now gone to 0, check for waiters */
                                goto check_waiters;
-               } else {                                        /* if reader count == 0, must be exclusive lock */
+                       }
+               } else {                                        /* if reader count == 0, must be exclusive lock */
                        if (data & LCK_RW_WANT_UPGRADE) {
                                data &= ~(LCK_RW_WANT_UPGRADE);
                        } else {
-                               if (data & LCK_RW_WANT_WRITE)
+                               if (data & LCK_RW_WANT_WRITE) {
                                        data &= ~(LCK_RW_WANT_EXCL);
-                               else                                    /* lock is not 'owned', panic */
+                               } else {                                /* lock is not 'owned', panic */
                                        panic("Releasing non-exclusive RW lock without a reader refcount!");
+                               }
                        }
 check_waiters:
                        if (prev & LCK_RW_W_WAITING) {
                                data &= ~(LCK_RW_W_WAITING);
-                               if ((prev & LCK_RW_PRIV_EXCL) == 0)
+                               if ((prev & LCK_RW_PRIV_EXCL) == 0) {
                                        data &= ~(LCK_RW_R_WAITING);
-                       } else
+                               }
+                       } else {
                                data &= ~(LCK_RW_R_WAITING);
+                       }
                }
-               if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_release_smp))
+               if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_release_smp)) {
                        break;
+               }
                cpu_pause();
        }
        return lck_rw_done_gen(lock, prev);
@@ -1298,13 +1342,13 @@ check_waiters:
  *
  *     called from lck_rw_done()
  *     prior_lock_state is the value in the 1st
- *     word of the lock at the time of a successful
+ *      word of the lock at the time of a successful
  *     atomic compare and exchange with the new value...
- *     it represents the state of the lock before we
+ *      it represents the state of the lock before we
  *     decremented the rw_shared_count or cleared either
- *     rw_want_upgrade or rw_want_write and
+ *      rw_want_upgrade or rw_want_write and
  *     the lck_x_waiting bits...  since the wrapper
- *     routine has already changed the state atomically, 
+ *      routine has already changed the state atomically,
  *     we just need to decide if we should
  *     wake up anyone and what value to return... we do
  *     this by examining the state of the lock before
@@ -1372,15 +1416,16 @@ lck_rw_done_gen(
  */
 void
 lck_rw_unlock(
-       lck_rw_t        *lck,
-       lck_rw_type_t   lck_rw_type)
+       lck_rw_t        *lck,
+       lck_rw_type_t   lck_rw_type)
 {
-       if (lck_rw_type == LCK_RW_TYPE_SHARED)
+       if (lck_rw_type == LCK_RW_TYPE_SHARED) {
                lck_rw_unlock_shared(lck);
-       else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
+       } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
                lck_rw_unlock_exclusive(lck);
-       else
+       } else {
                panic("lck_rw_unlock(): Invalid RW lock type: %d\n", lck_rw_type);
+       }
 }
 
 
@@ -1389,15 +1434,16 @@ lck_rw_unlock(
  */
 void
 lck_rw_unlock_shared(
-       lck_rw_t        *lck)
+       lck_rw_t        *lck)
 {
-       lck_rw_type_t   ret;
+       lck_rw_type_t   ret;
 
        assertf(lck->lck_rw_shared_count > 0, "lck %p has shared_count=0x%x", lck, lck->lck_rw_shared_count);
        ret = lck_rw_done(lck);
 
-       if (ret != LCK_RW_TYPE_SHARED)
+       if (ret != LCK_RW_TYPE_SHARED) {
                panic("lck_rw_unlock_shared(): lock %p held in mode: %d\n", lck, ret);
+       }
 }
 
 
@@ -1406,14 +1452,15 @@ lck_rw_unlock_shared(
  */
 void
 lck_rw_unlock_exclusive(
-       lck_rw_t        *lck)
+       lck_rw_t        *lck)
 {
-       lck_rw_type_t   ret;
+       lck_rw_type_t   ret;
 
        ret = lck_rw_done(lck);
 
-       if (ret != LCK_RW_TYPE_EXCLUSIVE)
+       if (ret != LCK_RW_TYPE_EXCLUSIVE) {
                panic("lck_rw_unlock_exclusive(): lock held in mode: %d\n", ret);
+       }
 }
 
 
@@ -1422,15 +1469,16 @@ lck_rw_unlock_exclusive(
  */
 void
 lck_rw_lock(
-       lck_rw_t        *lck,
-       lck_rw_type_t   lck_rw_type)
+       lck_rw_t        *lck,
+       lck_rw_type_t   lck_rw_type)
 {
-       if (lck_rw_type == LCK_RW_TYPE_SHARED)
+       if (lck_rw_type == LCK_RW_TYPE_SHARED) {
                lck_rw_lock_shared(lck);
-       else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
+       } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
                lck_rw_lock_exclusive(lck);
-       else
+       } else {
                panic("lck_rw_lock(): Invalid RW lock type: %x\n", lck_rw_type);
+       }
 }
 
 /*
@@ -1439,10 +1487,10 @@ lck_rw_lock(
 void
 lck_rw_lock_shared(lck_rw_t *lock)
 {
-       uint32_t        data, prev;
+       uint32_t        data, prev;
 
        current_thread()->rwlock_count++;
-       for ( ; ; ) {
+       for (;;) {
                data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
                if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK)) {
                        atomic_exchange_abort();
@@ -1455,13 +1503,14 @@ lck_rw_lock_shared(lck_rw_t *lock)
                        break;
                }
                data += LCK_RW_SHARED_READER;
-               if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
+               if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp)) {
                        break;
+               }
                cpu_pause();
        }
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
        LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
-#endif /* CONFIG_DTRACE */
+#endif  /* CONFIG_DTRACE */
        return;
 }
 
@@ -1474,25 +1523,24 @@ lck_rw_lock_shared(lck_rw_t *lock)
  */
 static void
 lck_rw_lock_shared_gen(
-       lck_rw_t        *lck)
+       lck_rw_t        *lck)
 {
-       __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
-       uint64_t        deadline = 0;
-       int             gotlock = 0;
-       int             slept = 0;
-       wait_result_t   res = 0;
-       boolean_t       istate = -1;
+       __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
+       uint64_t        deadline = 0;
+       int             gotlock = 0;
+       int             slept = 0;
+       wait_result_t   res = 0;
+       boolean_t       istate = -1;
 
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
        uint64_t wait_interval = 0;
        int readers_at_sleep = 0;
        boolean_t dtrace_ls_initialized = FALSE;
        boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
 #endif
 
-       while ( !lck_rw_grab_shared(lck)) {
-
-#if    CONFIG_DTRACE
+       while (!lck_rw_grab_shared(lck)) {
+#if     CONFIG_DTRACE
                if (dtrace_ls_initialized == FALSE) {
                        dtrace_ls_initialized = TRUE;
                        dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
@@ -1508,42 +1556,43 @@ lck_rw_lock_shared_gen(
                        }
                }
 #endif
-               if (istate == -1)
+               if (istate == -1) {
                        istate = ml_get_interrupts_enabled();
+               }
 
                deadline = lck_rw_deadline_for_spin(lck);
 
                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
-                            trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
+                   trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
 
-               while (((gotlock = lck_rw_grab_shared(lck)) == 0) && mach_absolute_time() < deadline)
+               while (((gotlock = lck_rw_grab_shared(lck)) == 0) && mach_absolute_time() < deadline) {
                        lck_rw_lock_pause(istate);
+               }
 
                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
-                            trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, gotlock, 0);
+                   trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, gotlock, 0);
 
-               if (gotlock)
+               if (gotlock) {
                        break;
+               }
                /*
                 * if we get here, the deadline has expired w/o us
                 * being able to grab the lock for read
                 * check to see if we're allowed to do a thread_block
                 */
                if (lck->lck_rw_can_sleep) {
-
                        istate = lck_interlock_lock(lck);
 
                        if ((lck->lck_rw_want_write || lck->lck_rw_want_upgrade) &&
                            ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl)) {
-
                                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
-                                            trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
+                                   trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
 
                                lck->lck_r_waiting = TRUE;
 
                                thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead);
                                res = assert_wait(RW_LOCK_READER_EVENT(lck),
-                                               THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
+                                   THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
                                lck_interlock_unlock(lck, istate);
 
                                if (res == THREAD_WAITING) {
@@ -1551,7 +1600,7 @@ lck_rw_lock_shared_gen(
                                        slept++;
                                }
                                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
-                                            trace_lck, res, slept, 0, 0);
+                                   trace_lck, res, slept, 0, 0);
                        } else {
                                lck->lck_rw_shared_count++;
                                lck_interlock_unlock(lck, istate);
@@ -1560,7 +1609,7 @@ lck_rw_lock_shared_gen(
                }
        }
 
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
        if (dtrace_ls_enabled == TRUE) {
                if (slept == 0) {
                        LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
@@ -1584,13 +1633,14 @@ lck_rw_lock_exclusive(lck_rw_t *lock)
 {
        current_thread()->rwlock_count++;
        if (atomic_test_and_set32(&lock->data,
-               (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK),
-               LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE)) {
-#if    CONFIG_DTRACE
+           (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK),
+           LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE)) {
+#if     CONFIG_DTRACE
                LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
-#endif /* CONFIG_DTRACE */
-       } else
+#endif  /* CONFIG_DTRACE */
+       } else {
                lck_rw_lock_exclusive_gen(lock);
+       }
 }
 
 
@@ -1603,9 +1653,9 @@ lck_rw_lock_exclusive(lck_rw_t *lock)
 boolean_t
 lck_rw_lock_shared_to_exclusive(lck_rw_t *lock)
 {
-       uint32_t        data, prev;
+       uint32_t        data, prev;
 
-       for ( ; ; ) {
+       for (;;) {
                data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
                if (data & LCK_RW_INTERLOCK) {
                        atomic_exchange_abort();
@@ -1614,22 +1664,26 @@ lck_rw_lock_shared_to_exclusive(lck_rw_t *lock)
                }
                if (data & LCK_RW_WANT_UPGRADE) {
                        data -= LCK_RW_SHARED_READER;
-                       if ((data & LCK_RW_SHARED_MASK) == 0)           /* we were the last reader */
-                               data &= ~(LCK_RW_W_WAITING);            /* so clear the wait indicator */
-                       if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
+                       if ((data & LCK_RW_SHARED_MASK) == 0) {         /* we were the last reader */
+                               data &= ~(LCK_RW_W_WAITING);            /* so clear the wait indicator */
+                       }
+                       if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp)) {
                                return lck_rw_lock_shared_to_exclusive_failure(lock, prev);
+                       }
                } else {
-                       data |= LCK_RW_WANT_UPGRADE;            /* ask for WANT_UPGRADE */
-                       data -= LCK_RW_SHARED_READER;           /* and shed our read count */
-                       if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
+                       data |= LCK_RW_WANT_UPGRADE;            /* ask for WANT_UPGRADE */
+                       data -= LCK_RW_SHARED_READER;           /* and shed our read count */
+                       if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp)) {
                                break;
+                       }
                }
                cpu_pause();
        }
-                                               /* we now own the WANT_UPGRADE */
-       if (data & LCK_RW_SHARED_MASK)          /* check to see if all of the readers are drained */
-               lck_rw_lock_shared_to_exclusive_success(lock);  /* if not, we need to go wait */
-#if    CONFIG_DTRACE
+       /* we now own the WANT_UPGRADE */
+       if (data & LCK_RW_SHARED_MASK) {        /* check to see if all of the readers are drained */
+               lck_rw_lock_shared_to_exclusive_success(lock);  /* if not, we need to go wait */
+       }
+#if     CONFIG_DTRACE
        LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 0);
 #endif
        return TRUE;
@@ -1646,12 +1700,12 @@ lck_rw_lock_shared_to_exclusive(lck_rw_t *lock)
  */
 static boolean_t
 lck_rw_lock_shared_to_exclusive_failure(
-       lck_rw_t        *lck,
-       uint32_t        prior_lock_state)
+       lck_rw_t        *lck,
+       uint32_t        prior_lock_state)
 {
-       lck_rw_t        *fake_lck;
-       thread_t        thread = current_thread();
-       uint32_t        rwlock_count;
+       lck_rw_t        *fake_lck;
+       thread_t        thread = current_thread();
+       uint32_t        rwlock_count;
 
        /* Check if dropping the lock means that we need to unpromote */
        rwlock_count = thread->rwlock_count--;
@@ -1677,9 +1731,9 @@ lck_rw_lock_shared_to_exclusive_failure(
        }
 
        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
-                    VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
+           VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
 
-       return (FALSE);
+       return FALSE;
 }
 
 
@@ -1693,16 +1747,16 @@ lck_rw_lock_shared_to_exclusive_failure(
  */
 static boolean_t
 lck_rw_lock_shared_to_exclusive_success(
-       lck_rw_t        *lck)
+       lck_rw_t        *lck)
 {
-       __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
-       uint64_t        deadline = 0;
-       int             slept = 0;
-       int             still_shared = 0;
-       wait_result_t   res;
-       boolean_t       istate = -1;
+       __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
+       uint64_t        deadline = 0;
+       int             slept = 0;
+       int             still_shared = 0;
+       wait_result_t   res;
+       boolean_t       istate = -1;
 
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
        uint64_t wait_interval = 0;
        int readers_at_sleep = 0;
        boolean_t dtrace_ls_initialized = FALSE;
@@ -1710,8 +1764,7 @@ lck_rw_lock_shared_to_exclusive_success(
 #endif
 
        while (lck->lck_rw_shared_count != 0) {
-
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
                if (dtrace_ls_initialized == FALSE) {
                        dtrace_ls_initialized = TRUE;
                        dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
@@ -1727,40 +1780,42 @@ lck_rw_lock_shared_to_exclusive_success(
                        }
                }
 #endif
-               if (istate == -1)
+               if (istate == -1) {
                        istate = ml_get_interrupts_enabled();
+               }
 
                deadline = lck_rw_deadline_for_spin(lck);
 
                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
-                            trace_lck, lck->lck_rw_shared_count, 0, 0, 0);
+                   trace_lck, lck->lck_rw_shared_count, 0, 0, 0);
 
-               while ((still_shared = lck->lck_rw_shared_count) && mach_absolute_time() < deadline)
+               while ((still_shared = lck->lck_rw_shared_count) && mach_absolute_time() < deadline) {
                        lck_rw_lock_pause(istate);
+               }
 
                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
-                            trace_lck, lck->lck_rw_shared_count, 0, 0, 0);
+                   trace_lck, lck->lck_rw_shared_count, 0, 0, 0);
 
-               if ( !still_shared)
+               if (!still_shared) {
                        break;
+               }
                /*
                 * if we get here, the deadline has expired w/o
                 * the rw_shared_count having drained to 0
                 * check to see if we're allowed to do a thread_block
                 */
                if (lck->lck_rw_can_sleep) {
-
                        istate = lck_interlock_lock(lck);
 
                        if (lck->lck_rw_shared_count != 0) {
                                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
-                                            trace_lck, lck->lck_rw_shared_count, 0, 0, 0);
+                                   trace_lck, lck->lck_rw_shared_count, 0, 0, 0);
 
                                lck->lck_w_waiting = TRUE;
 
                                thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade);
                                res = assert_wait(RW_LOCK_WRITER_EVENT(lck),
-                                               THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
+                                   THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
                                lck_interlock_unlock(lck, istate);
 
                                if (res == THREAD_WAITING) {
@@ -1768,14 +1823,14 @@ lck_rw_lock_shared_to_exclusive_success(
                                        slept++;
                                }
                                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
-                                            trace_lck, res, slept, 0, 0);
+                                   trace_lck, res, slept, 0, 0);
                        } else {
                                lck_interlock_unlock(lck, istate);
                                break;
                        }
                }
        }
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
        /*
         * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
         */
@@ -1790,33 +1845,37 @@ lck_rw_lock_shared_to_exclusive_success(
        }
        LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lck, 1);
 #endif
-       return (TRUE);
+       return TRUE;
 }
 
 /*
  *     Routine:        lck_rw_lock_exclusive_to_shared
  */
 
-void lck_rw_lock_exclusive_to_shared(lck_rw_t *lock)
+void
+lck_rw_lock_exclusive_to_shared(lck_rw_t *lock)
 {
-       uint32_t        data, prev;
+       uint32_t        data, prev;
 
-       for ( ; ; ) {
+       for (;;) {
                data = atomic_exchange_begin32(&lock->data, &prev, memory_order_release_smp);
                if (data & LCK_RW_INTERLOCK) {
                        atomic_exchange_abort();
-                       lck_rw_interlock_spin(lock);    /* wait for interlock to clear */
+                       lck_rw_interlock_spin(lock);    /* wait for interlock to clear */
                        continue;
                }
                data += LCK_RW_SHARED_READER;
-               if (data & LCK_RW_WANT_UPGRADE)
+               if (data & LCK_RW_WANT_UPGRADE) {
                        data &= ~(LCK_RW_WANT_UPGRADE);
-               else
+               } else {
                        data &= ~(LCK_RW_WANT_EXCL);
-               if (!((prev & LCK_RW_W_WAITING) && (prev & LCK_RW_PRIV_EXCL)))
+               }
+               if (!((prev & LCK_RW_W_WAITING) && (prev & LCK_RW_PRIV_EXCL))) {
                        data &= ~(LCK_RW_W_WAITING);
-               if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_release_smp))
+               }
+               if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_release_smp)) {
                        break;
+               }
                cpu_pause();
        }
        return lck_rw_lock_exclusive_to_shared_gen(lock, prev);
@@ -1825,7 +1884,7 @@ void lck_rw_lock_exclusive_to_shared(lck_rw_t *lock)
 
 /*
  *      Routine:        lck_rw_lock_exclusive_to_shared_gen
- *     Function:
+ *      Function:
  *             assembly fast path has already dropped
  *             our exclusive state and bumped lck_rw_shared_count
  *             all we need to do here is determine if anyone
@@ -1833,16 +1892,16 @@ void lck_rw_lock_exclusive_to_shared(lck_rw_t *lock)
  */
 static void
 lck_rw_lock_exclusive_to_shared_gen(
-       lck_rw_t        *lck,
-       uint32_t        prior_lock_state)
+       lck_rw_t        *lck,
+       uint32_t        prior_lock_state)
 {
-       __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
-       lck_rw_t                *fake_lck;
+       __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
+       lck_rw_t                *fake_lck;
 
        fake_lck = (lck_rw_t *)&prior_lock_state;
 
        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
-                            trace_lck, fake_lck->lck_rw_want_write, fake_lck->lck_rw_want_upgrade, 0, 0);
+           trace_lck, fake_lck->lck_rw_want_write, fake_lck->lck_rw_want_upgrade, 0, 0);
 
        /*
         * don't wake up anyone waiting to take the lock exclusively
@@ -1852,11 +1911,12 @@ lck_rw_lock_exclusive_to_shared_gen(
         * wake up any waiting readers if we don't have any writers waiting,
         * or the lock is NOT marked as rw_priv_excl (writers have privilege)
         */
-       if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting)
+       if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting) {
                thread_wakeup(RW_LOCK_READER_EVENT(lck));
+       }
 
        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
-                            trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
+           trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
 
 #if CONFIG_DTRACE
        LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
@@ -1869,27 +1929,29 @@ lck_rw_lock_exclusive_to_shared_gen(
  */
 boolean_t
 lck_rw_try_lock(
-       lck_rw_t        *lck,
-       lck_rw_type_t   lck_rw_type)
-{
-       if (lck_rw_type == LCK_RW_TYPE_SHARED)
-               return(lck_rw_try_lock_shared(lck));
-       else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
-               return(lck_rw_try_lock_exclusive(lck));
-       else
+       lck_rw_t        *lck,
+       lck_rw_type_t   lck_rw_type)
+{
+       if (lck_rw_type == LCK_RW_TYPE_SHARED) {
+               return lck_rw_try_lock_shared(lck);
+       } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
+               return lck_rw_try_lock_exclusive(lck);
+       } else {
                panic("lck_rw_try_lock(): Invalid rw lock type: %x\n", lck_rw_type);
-       return(FALSE);
+       }
+       return FALSE;
 }
 
 /*
  *     Routine:        lck_rw_try_lock_shared
  */
 
-boolean_t lck_rw_try_lock_shared(lck_rw_t *lock)
+boolean_t
+lck_rw_try_lock_shared(lck_rw_t *lock)
 {
-       uint32_t        data, prev;
+       uint32_t        data, prev;
 
-       for ( ; ; ) {
+       for (;;) {
                data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
                if (data & LCK_RW_INTERLOCK) {
                        atomic_exchange_abort();
@@ -1898,18 +1960,19 @@ boolean_t lck_rw_try_lock_shared(lck_rw_t *lock)
                }
                if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
                        atomic_exchange_abort();
-                       return FALSE;                   /* lock is busy */
+                       return FALSE;                   /* lock is busy */
                }
-               data += LCK_RW_SHARED_READER;           /* Increment reader refcount */
-               if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
+               data += LCK_RW_SHARED_READER;           /* Increment reader refcount */
+               if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp)) {
                        break;
+               }
                cpu_pause();
        }
        current_thread()->rwlock_count++;
        /* There is a 3 instr window where preemption may not notice rwlock_count after cmpxchg */
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
        LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
-#endif /* CONFIG_DTRACE */
+#endif  /* CONFIG_DTRACE */
        return TRUE;
 }
 
@@ -1918,11 +1981,12 @@ boolean_t lck_rw_try_lock_shared(lck_rw_t *lock)
  *     Routine:        lck_rw_try_lock_exclusive
  */
 
-boolean_t lck_rw_try_lock_exclusive(lck_rw_t *lock)
+boolean_t
+lck_rw_try_lock_exclusive(lck_rw_t *lock)
 {
-       uint32_t        data, prev;
+       uint32_t        data, prev;
 
-       for ( ; ; ) {
+       for (;;) {
                data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
                if (data & LCK_RW_INTERLOCK) {
                        atomic_exchange_abort();
@@ -1931,26 +1995,27 @@ boolean_t lck_rw_try_lock_exclusive(lck_rw_t *lock)
                }
                if (data & (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
                        atomic_exchange_abort();
-                       return FALSE;                           /* can't get it */
+                       return FALSE;                           /* can't get it */
                }
                data |= LCK_RW_WANT_EXCL;
-               if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
+               if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp)) {
                        break;
+               }
                cpu_pause();
        }
 
        current_thread()->rwlock_count++;
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
        LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
-#endif /* CONFIG_DTRACE */
+#endif  /* CONFIG_DTRACE */
        return TRUE;
 }
 
 
 void
 lck_rw_assert(
-       lck_rw_t        *lck,
-       unsigned int    type)
+       lck_rw_t        *lck,
+       unsigned int    type)
 {
        switch (type) {
        case LCK_RW_ASSERT_SHARED:
@@ -1960,7 +2025,7 @@ lck_rw_assert(
                break;
        case LCK_RW_ASSERT_EXCLUSIVE:
                if ((lck->lck_rw_want_write ||
-                    lck->lck_rw_want_upgrade) &&
+                   lck->lck_rw_want_upgrade) &&
                    lck->lck_rw_shared_count == 0) {
                        return;
                }
@@ -1974,8 +2039,8 @@ lck_rw_assert(
                break;
        case LCK_RW_ASSERT_NOTHELD:
                if (!(lck->lck_rw_want_write ||
-                         lck->lck_rw_want_upgrade ||
-                         lck->lck_rw_shared_count != 0)) {
+                   lck->lck_rw_want_upgrade ||
+                   lck->lck_rw_shared_count != 0)) {
                        return;
                }
                break;
@@ -2023,7 +2088,8 @@ lck_rw_lock_yield_shared(lck_rw_t *lck, boolean_t force_yield)
  * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
  */
 boolean_t
-kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t *lck) {
+kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t *lck)
+{
        if (not_in_kdp) {
                panic("panic: rw lock exclusive check done outside of kernel debugger");
        }
@@ -2072,7 +2138,7 @@ kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t *lck) {
  *       on acquire.
  */
 
-#ifdef MUTEX_ZONE
+#ifdef  MUTEX_ZONE
 extern zone_t lck_mtx_zone;
 #endif
 
@@ -2081,18 +2147,20 @@ extern zone_t lck_mtx_zone;
  */
 lck_mtx_t *
 lck_mtx_alloc_init(
-       lck_grp_t       *grp,
-       lck_attr_t      *attr)
+       lck_grp_t       *grp,
+       lck_attr_t      *attr)
 {
-       lck_mtx_t       *lck;
-#ifdef MUTEX_ZONE
-       if ((lck = (lck_mtx_t *)zalloc(lck_mtx_zone)) != 0)
+       lck_mtx_t       *lck;
+#ifdef  MUTEX_ZONE
+       if ((lck = (lck_mtx_t *)zalloc(lck_mtx_zone)) != 0) {
                lck_mtx_init(lck, grp, attr);
+       }
 #else
-       if ((lck = (lck_mtx_t *)kalloc(sizeof(lck_mtx_t))) != 0)
+       if ((lck = (lck_mtx_t *)kalloc(sizeof(lck_mtx_t))) != 0) {
                lck_mtx_init(lck, grp, attr);
+       }
 #endif
-       return(lck);
+       return lck;
 }
 
 /*
@@ -2100,11 +2168,11 @@ lck_mtx_alloc_init(
  */
 void
 lck_mtx_free(
-       lck_mtx_t       *lck,
-       lck_grp_t       *grp)
+       lck_mtx_t       *lck,
+       lck_grp_t       *grp)
 {
        lck_mtx_destroy(lck, grp);
-#ifdef MUTEX_ZONE
+#ifdef  MUTEX_ZONE
        zfree(lck_mtx_zone, lck);
 #else
        kfree(lck, sizeof(lck_mtx_t));
@@ -2116,9 +2184,9 @@ lck_mtx_free(
  */
 static void
 lck_mtx_ext_init(
-       lck_mtx_ext_t   *lck,
-       lck_grp_t       *grp,
-       lck_attr_t      *attr)
+       lck_mtx_ext_t   *lck,
+       lck_grp_t       *grp,
+       lck_attr_t      *attr)
 {
        bzero((void *)lck, sizeof(lck_mtx_ext_t));
 
@@ -2129,8 +2197,9 @@ lck_mtx_ext_init(
 
        lck->lck_mtx_grp = grp;
 
-       if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT)
+       if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT) {
                lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
+       }
 
        lck->lck_mtx.lck_mtx_is_ext = 1;
        lck->lck_mtx.lck_mtx_pad32 = 0xFFFFFFFF;
@@ -2141,17 +2210,18 @@ lck_mtx_ext_init(
  */
 void
 lck_mtx_init(
-       lck_mtx_t       *lck,
-       lck_grp_t       *grp,
-       lck_attr_t      *attr)
+       lck_mtx_t       *lck,
+       lck_grp_t       *grp,
+       lck_attr_t      *attr)
 {
-       lck_mtx_ext_t   *lck_ext;
-       lck_attr_t      *lck_attr;
+       lck_mtx_ext_t   *lck_ext;
+       lck_attr_t      *lck_attr;
 
-       if (attr != LCK_ATTR_NULL)
+       if (attr != LCK_ATTR_NULL) {
                lck_attr = attr;
-       else
+       } else {
                lck_attr = &LockDefaultLckAttr;
+       }
 
        if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
                if ((lck_ext = (lck_mtx_ext_t *)kalloc(sizeof(lck_mtx_ext_t))) != 0) {
@@ -2173,17 +2243,18 @@ lck_mtx_init(
  */
 void
 lck_mtx_init_ext(
-       lck_mtx_t       *lck,
-       lck_mtx_ext_t   *lck_ext,
-       lck_grp_t       *grp,
-       lck_attr_t      *attr)
+       lck_mtx_t       *lck,
+       lck_mtx_ext_t   *lck_ext,
+       lck_grp_t       *grp,
+       lck_attr_t      *attr)
 {
-       lck_attr_t      *lck_attr;
+       lck_attr_t      *lck_attr;
 
-       if (attr != LCK_ATTR_NULL)
+       if (attr != LCK_ATTR_NULL) {
                lck_attr = attr;
-       else
+       } else {
                lck_attr = &LockDefaultLckAttr;
+       }
 
        if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
                lck_mtx_ext_init(lck_ext, grp, lck_attr);
@@ -2225,13 +2296,14 @@ lck_mtx_lock_mark_destroyed(
  */
 void
 lck_mtx_destroy(
-       lck_mtx_t       *lck,
-       lck_grp_t       *grp)
+       lck_mtx_t       *lck,
+       lck_grp_t       *grp)
 {
        boolean_t indirect;
 
-       if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED)
+       if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED) {
                return;
+       }
 #if MACH_LDEBUG
        lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED);
 #endif
@@ -2239,8 +2311,9 @@ lck_mtx_destroy(
 
        lck_mtx_lock_mark_destroyed(lck, indirect);
 
-       if (indirect)
+       if (indirect) {
                kfree(lck->lck_mtx_ptr, sizeof(lck_mtx_ext_t));
+       }
        lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX);
        lck_grp_deallocate(grp);
        return;
@@ -2262,7 +2335,7 @@ __attribute__((always_inline))
 static boolean_t
 get_indirect_mutex(
        lck_mtx_t       **lock,
-       uint32_t        *state)
+       uint32_t        *state)
 {
        *lock = &((*lock)->lck_mtx_ptr->lck_mtx);
        *state = ordered_load_mtx_state(*lock);
@@ -2270,7 +2343,7 @@ get_indirect_mutex(
 }
 
 /*
- * Routine:    lck_mtx_unlock_slow
+ * Routine:     lck_mtx_unlock_slow
  *
  * Unlocks a mutex held by current thread.
  *
@@ -2281,11 +2354,11 @@ get_indirect_mutex(
 __attribute__((noinline))
 void
 lck_mtx_unlock_slow(
-       lck_mtx_t       *lock)
+       lck_mtx_t       *lock)
 {
-       thread_t        thread;
-       uint32_t        state, prev;
-       boolean_t       indirect = FALSE;
+       thread_t        thread;
+       uint32_t        state, prev;
+       boolean_t       indirect = FALSE;
 
        state = ordered_load_mtx_state(lock);
 
@@ -2298,13 +2371,15 @@ lck_mtx_unlock_slow(
 
 #if DEVELOPMENT | DEBUG
        thread_t owner = (thread_t)lock->lck_mtx_owner;
-       if(__improbable(owner != thread))
+       if (__improbable(owner != thread)) {
                lck_mtx_owner_check_panic(lock);
+       }
 #endif
 
        /* check if it is held as a spinlock */
-       if (__improbable((state & LCK_MTX_MLOCKED_MSK) == 0))
+       if (__improbable((state & LCK_MTX_MLOCKED_MSK) == 0)) {
                goto unlock;
+       }
 
        lck_mtx_interlock_lock_clear_flags(lock, LCK_MTX_MLOCKED_MSK, &state);
 
@@ -2318,21 +2393,23 @@ unlock:
 
        if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
 #if     MACH_LDEBUG
-               if (thread)
+               if (thread) {
                        thread->mutex_count--;
+               }
 #endif
                return lck_mtx_unlock_wakeup_tail(lock, state, indirect);
        }
 
        /* release interlock, promotion and clear spin flag */
        state &= (~(LCK_MTX_ILOCKED_MSK | LCK_MTX_SPIN_MSK));
-       ordered_store_mtx_state_release(lock, state);           /* since I own the interlock, I don't need an atomic update */
+       ordered_store_mtx_state_release(lock, state);           /* since I own the interlock, I don't need an atomic update */
 
-#if    MACH_LDEBUG
+#if     MACH_LDEBUG
        /* perform lock statistics after drop to prevent delay */
-       if (thread)
-               thread->mutex_count--;          /* lock statistic */
-#endif /* MACH_LDEBUG */
+       if (thread) {
+               thread->mutex_count--;          /* lock statistic */
+       }
+#endif  /* MACH_LDEBUG */
 
        /* re-enable preemption */
        lck_mtx_unlock_finish_inline(lock, FALSE);
@@ -2340,11 +2417,11 @@ unlock:
        return;
 }
 
-#define        LCK_MTX_LCK_WAIT_CODE           0x20
-#define        LCK_MTX_LCK_WAKEUP_CODE         0x21
-#define        LCK_MTX_LCK_SPIN_CODE           0x22
-#define        LCK_MTX_LCK_ACQUIRE_CODE        0x23
-#define LCK_MTX_LCK_DEMOTE_CODE                0x24
+#define LCK_MTX_LCK_WAIT_CODE           0x20
+#define LCK_MTX_LCK_WAKEUP_CODE         0x21
+#define LCK_MTX_LCK_SPIN_CODE           0x22
+#define LCK_MTX_LCK_ACQUIRE_CODE        0x23
+#define LCK_MTX_LCK_DEMOTE_CODE         0x24
 
 /*
  * Routine:    lck_mtx_unlock_wakeup_tail
@@ -2368,18 +2445,18 @@ unlock:
  */
 __attribute__((noinline))
 static void
-lck_mtx_unlock_wakeup_tail (
-       lck_mtx_t       *mutex,
+lck_mtx_unlock_wakeup_tail(
+       lck_mtx_t       *mutex,
        uint32_t        state,
-       boolean_t       indirect)
+       boolean_t       indirect)
 {
        struct turnstile *ts;
 
-       __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
+       __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
        kern_return_t did_wake;
 
        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_START,
-               trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
+           trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
 
        ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
 
@@ -2396,7 +2473,7 @@ lck_mtx_unlock_wakeup_tail (
        turnstile_complete((uintptr_t)mutex, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
 
        state -= LCK_MTX_WAITER;
-        state &= (~(LCK_MTX_SPIN_MSK | LCK_MTX_ILOCKED_MSK));
+       state &= (~(LCK_MTX_SPIN_MSK | LCK_MTX_ILOCKED_MSK));
        ordered_store_mtx_state_release(mutex, state);
 
        assert(current_thread()->turnstile != NULL);
@@ -2404,13 +2481,13 @@ lck_mtx_unlock_wakeup_tail (
        turnstile_cleanup();
 
        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_END,
-                 trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
+           trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
 
        lck_mtx_unlock_finish_inline(mutex, indirect);
 }
 
 /*
- * Routine:    lck_mtx_lock_acquire_x86
+ * Routine:     lck_mtx_lock_acquire_x86
  *
  * Invoked on acquiring the mutex when there is
  * contention (i.e. the assembly routine sees that
@@ -2421,13 +2498,13 @@ lck_mtx_unlock_wakeup_tail (
 __attribute__((always_inline))
 static void
 lck_mtx_lock_acquire_inline(
-       lck_mtx_t       *mutex,
+       lck_mtx_t       *mutex,
        struct turnstile *ts)
 {
-       __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
+       __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
 
        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_START,
-                    trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
+           trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
 
        thread_t thread = (thread_t)mutex->lck_mtx_owner;       /* faster than current_thread() */
        assert(thread->waiting_for_mutex == NULL);
@@ -2448,12 +2525,12 @@ lck_mtx_lock_acquire_inline(
        assert(current_thread()->turnstile != NULL);
 
        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_END,
-                    trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
+           trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
 }
 
 void
 lck_mtx_lock_acquire_x86(
-       lck_mtx_t       *mutex)
+       lck_mtx_t       *mutex)
 {
        return lck_mtx_lock_acquire_inline(mutex, NULL);
 }
@@ -2467,8 +2544,8 @@ lck_mtx_lock_acquire_x86(
 __attribute__((noinline))
 static void
 lck_mtx_lock_acquire_tail(
-       lck_mtx_t       *mutex,
-       boolean_t       indirect,
+       lck_mtx_t       *mutex,
+       boolean_t       indirect,
        struct turnstile *ts)
 {
        lck_mtx_lock_acquire_inline(mutex, ts);
@@ -2478,7 +2555,7 @@ lck_mtx_lock_acquire_tail(
 __attribute__((noinline))
 static boolean_t
 lck_mtx_try_lock_acquire_tail(
-       lck_mtx_t       *mutex)
+       lck_mtx_t       *mutex)
 {
        lck_mtx_lock_acquire_inline(mutex, NULL);
        lck_mtx_try_lock_finish_inline(mutex, ordered_load_mtx_state(mutex));
@@ -2489,7 +2566,7 @@ lck_mtx_try_lock_acquire_tail(
 __attribute__((noinline))
 static void
 lck_mtx_convert_spin_acquire_tail(
-       lck_mtx_t       *mutex)
+       lck_mtx_t       *mutex)
 {
        lck_mtx_lock_acquire_inline(mutex, NULL);
        lck_mtx_convert_spin_finish_inline(mutex, ordered_load_mtx_state(mutex));
@@ -2513,7 +2590,7 @@ lck_mtx_interlock_lock_set_and_clear_flags(
        uint32_t state, prev;
        state = *new_state;
 
-       for ( ; ; ) {
+       for (;;) {
                /* have to wait for interlock to clear */
                while (__improbable(state & (LCK_MTX_ILOCKED_MSK | xor_flags))) {
                        cpu_pause();
@@ -2521,11 +2598,12 @@ lck_mtx_interlock_lock_set_and_clear_flags(
                }
                prev = state;                                   /* prev contains snapshot for exchange */
                state |= LCK_MTX_ILOCKED_MSK | xor_flags;       /* pick up interlock */
-               state &= ~and_flags;                            /* clear flags */
+               state &= ~and_flags;                            /* clear flags */
 
                disable_preemption();
-               if (os_atomic_cmpxchg(&mutex->lck_mtx_state, prev, state, acquire))
+               if (os_atomic_cmpxchg(&mutex->lck_mtx_state, prev, state, acquire)) {
                        break;
+               }
                enable_preemption();
                cpu_pause();
                state = ordered_load_mtx_state(mutex);
@@ -2564,53 +2642,18 @@ lck_mtx_interlock_try_lock_set_flags(
        if (state & (LCK_MTX_ILOCKED_MSK | or_flags)) {
                return 0;
        }
-       prev = state;                                   /* prev contains snapshot for exchange */
-       state |= LCK_MTX_ILOCKED_MSK | or_flags;        /* pick up interlock */
+       prev = state;                                   /* prev contains snapshot for exchange */
+       state |= LCK_MTX_ILOCKED_MSK | or_flags;        /* pick up interlock */
        disable_preemption();
        if (os_atomic_cmpxchg(&mutex->lck_mtx_state, prev, state, acquire)) {
-                       *new_state = state;
-                       return 1;
+               *new_state = state;
+               return 1;
        }
 
        enable_preemption();
        return 0;
 }
 
-static inline int
-lck_mtx_interlock_try_lock(
-       lck_mtx_t *mutex,
-       uint32_t *new_state)
-{
-       return lck_mtx_interlock_try_lock_set_flags(mutex, 0, new_state);
-}
-
-static inline int
-lck_mtx_interlock_try_lock_disable_interrupts(
-       lck_mtx_t *mutex,
-       boolean_t *istate)
-{
-       uint32_t        state;
-
-       *istate = ml_set_interrupts_enabled(FALSE);
-       state = ordered_load_mtx_state(mutex);
-
-       if (lck_mtx_interlock_try_lock(mutex, &state)) {
-               return 1;
-       } else {
-               ml_set_interrupts_enabled(*istate);
-               return 0;
-       }
-}
-
-static inline void
-lck_mtx_interlock_unlock_enable_interrupts(
-       lck_mtx_t *mutex,
-       boolean_t istate)
-{
-       lck_mtx_ilk_unlock(mutex);
-       ml_set_interrupts_enabled(istate);
-}
-
 __attribute__((noinline))
 static void
 lck_mtx_lock_contended(
@@ -2641,8 +2684,11 @@ try_again:
                        lck_grp_mtx_update_direct_wait((struct _lck_mtx_ext_*)lock);
                }
 
-               /* just fall through case LCK_MTX_SPINWAIT_SPUN */
-       case LCK_MTX_SPINWAIT_SPUN:
+       /* just fall through case LCK_MTX_SPINWAIT_SPUN */
+       case LCK_MTX_SPINWAIT_SPUN_HIGH_THR:
+       case LCK_MTX_SPINWAIT_SPUN_OWNER_NOT_CORE:
+       case LCK_MTX_SPINWAIT_SPUN_NO_WINDOW_CONTENTION:
+       case LCK_MTX_SPINWAIT_SPUN_SLIDING_THR:
                /*
                 * mutex not acquired but lck_mtx_lock_spinwait_x86 tried to spin
                 * interlock not held
@@ -2660,7 +2706,6 @@ try_again:
                         */
                        goto try_again;
                } else {
-
                        /* grab the mutex */
                        state |= LCK_MTX_MLOCKED_MSK;
                        ordered_store_mtx_state_release(lock, state);
@@ -2740,7 +2785,7 @@ lck_mtx_lock_wait_interlock_to_clear(
 {
        uint32_t state;
 
-       for ( ; ; ) {
+       for (;;) {
                cpu_pause();
                state = ordered_load_mtx_state(lock);
                if (!(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
@@ -2762,7 +2807,7 @@ lck_mtx_try_lock_wait_interlock_to_clear(
 {
        uint32_t state;
 
-       for ( ; ; ) {
+       for (;;) {
                cpu_pause();
                state = ordered_load_mtx_state(lock);
                if (state & (LCK_MTX_MLOCKED_MSK | LCK_MTX_SPIN_MSK)) {
@@ -2790,9 +2835,9 @@ void
 lck_mtx_lock_slow(
        lck_mtx_t       *lock)
 {
-       boolean_t       indirect = FALSE;
-       uint32_t        state;
-       int             first_miss = 0;
+       boolean_t       indirect = FALSE;
+       uint32_t        state;
+       int             first_miss = 0;
 
        state = ordered_load_mtx_state(lock);
 
@@ -2806,7 +2851,7 @@ lck_mtx_lock_slow(
 
 
                /* is the mutex already held and not indirect */
-               if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))){
+               if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
                        /* no, must have been the mutex */
                        return lck_mtx_lock_contended(lock, indirect, &first_miss);
                }
@@ -2824,7 +2869,7 @@ lck_mtx_lock_slow(
                        lck_grp_mtx_update_held((struct _lck_mtx_ext_*)lock);
 
                        if (state & LCK_MTX_SPIN_MSK) {
-                                /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
+                               /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
                                assert(state & LCK_MTX_ILOCKED_MSK);
                                lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
                        }
@@ -2850,7 +2895,7 @@ lck_mtx_lock_slow(
 
 #if MACH_LDEBUG
        if (thread) {
-               thread->mutex_count++;          /* lock statistic */
+               thread->mutex_count++;          /* lock statistic */
        }
 #endif
        /*
@@ -2887,7 +2932,7 @@ lck_mtx_try_lock_slow(
                 */
 
                /* is the mutex already held and not indirect */
-               if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))){
+               if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
                        return FALSE;
                }
 
@@ -2905,8 +2950,9 @@ lck_mtx_try_lock_slow(
                }
 
                if (!lck_mtx_try_lock_wait_interlock_to_clear(lock, &state)) {
-                       if (indirect)
+                       if (indirect) {
                                lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
+                       }
                        return FALSE;
                }
        }
@@ -2914,8 +2960,9 @@ lck_mtx_try_lock_slow(
        /* no - can't be INDIRECT, DESTROYED or locked */
        while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_MLOCKED_MSK, &state))) {
                if (!lck_mtx_try_lock_wait_interlock_to_clear(lock, &state)) {
-                       if (indirect)
+                       if (indirect) {
                                lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
+                       }
                        return FALSE;
                }
        }
@@ -2928,7 +2975,7 @@ lck_mtx_try_lock_slow(
 
 #if MACH_LDEBUG
        if (thread) {
-               thread->mutex_count++;          /* lock statistic */
+               thread->mutex_count++;          /* lock statistic */
        }
 #endif
        /*
@@ -2943,13 +2990,12 @@ lck_mtx_try_lock_slow(
        lck_mtx_try_lock_finish_inline(lock, ordered_load_mtx_state(lock));
 
        return TRUE;
-
 }
 
 __attribute__((noinline))
 void
 lck_mtx_lock_spin_slow(
-       lck_mtx_t       *lock)
+       lck_mtx_t       *lock)
 {
        boolean_t       indirect = FALSE;
        uint32_t        state;
@@ -2967,7 +3013,7 @@ lck_mtx_lock_spin_slow(
 
 
                /* is the mutex already held and not indirect */
-               if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))){
+               if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
                        /* no, must have been the mutex */
                        return lck_mtx_lock_contended(lock, indirect, &first_miss);
                }
@@ -2985,7 +3031,7 @@ lck_mtx_lock_spin_slow(
                        lck_grp_mtx_update_held((struct _lck_mtx_ext_*)lock);
 
                        if (state & LCK_MTX_SPIN_MSK) {
-                                /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
+                               /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
                                assert(state & LCK_MTX_ILOCKED_MSK);
                                lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
                        }
@@ -2997,7 +3043,7 @@ lck_mtx_lock_spin_slow(
        }
 
        /* no - can't be INDIRECT, DESTROYED or locked */
-       while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_SPIN_MSK, &state) )) {
+       while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_SPIN_MSK, &state))) {
                if (!lck_mtx_lock_wait_interlock_to_clear(lock, &state)) {
                        return lck_mtx_lock_contended(lock, indirect, &first_miss);
                }
@@ -3015,7 +3061,7 @@ lck_mtx_lock_spin_slow(
        }
 #endif
 
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
        LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, lock, 0);
 #endif
        /* return with the interlock held and preemption disabled */
@@ -3042,7 +3088,7 @@ lck_mtx_try_lock_spin_slow(
                 */
 
                /* is the mutex already held and not indirect */
-               if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))){
+               if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
                        return FALSE;
                }
 
@@ -3060,8 +3106,9 @@ lck_mtx_try_lock_spin_slow(
                }
 
                if (!lck_mtx_try_lock_wait_interlock_to_clear(lock, &state)) {
-                       if (indirect)
+                       if (indirect) {
                                lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
+                       }
                        return FALSE;
                }
        }
@@ -3069,8 +3116,9 @@ lck_mtx_try_lock_spin_slow(
        /* no - can't be INDIRECT, DESTROYED or locked */
        while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_SPIN_MSK, &state))) {
                if (!lck_mtx_try_lock_wait_interlock_to_clear(lock, &state)) {
-                       if (indirect)
+                       if (indirect) {
                                lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
+                       }
                        return FALSE;
                }
        }
@@ -3083,7 +3131,7 @@ lck_mtx_try_lock_spin_slow(
 
 #if MACH_LDEBUG
        if (thread) {
-               thread->mutex_count++;          /* lock statistic */
+               thread->mutex_count++;          /* lock statistic */
        }
 #endif
 
@@ -3091,13 +3139,12 @@ lck_mtx_try_lock_spin_slow(
        LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, lock, 0);
 #endif
        return TRUE;
-
 }
 
 __attribute__((noinline))
 void
 lck_mtx_convert_spin(
-       lck_mtx_t       *lock)
+       lck_mtx_t       *lock)
 {
        uint32_t state;
 
@@ -3135,7 +3182,7 @@ lck_mtx_convert_spin(
 
 static inline boolean_t
 lck_mtx_lock_grab_mutex(
-       lck_mtx_t       *lock)
+       lck_mtx_t       *lock)
 {
        uint32_t state;
 
@@ -3153,7 +3200,7 @@ lck_mtx_lock_grab_mutex(
 
 #if MACH_LDEBUG
        if (thread) {
-               thread->mutex_count++;          /* lock statistic */
+               thread->mutex_count++;          /* lock statistic */
        }
 #endif
        return TRUE;
@@ -3162,8 +3209,8 @@ lck_mtx_lock_grab_mutex(
 __attribute__((noinline))
 void
 lck_mtx_assert(
-       lck_mtx_t       *lock,
-       unsigned int    type)
+       lck_mtx_t       *lock,
+       unsigned int    type)
 {
        thread_t thread, owner;
        uint32_t state;
@@ -3178,17 +3225,19 @@ lck_mtx_assert(
        owner = (thread_t)lock->lck_mtx_owner;
 
        if (type == LCK_MTX_ASSERT_OWNED) {
-               if (owner != thread || !(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK)))
+               if (owner != thread || !(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
                        panic("mutex (%p) not owned\n", lock);
+               }
        } else {
-               assert (type == LCK_MTX_ASSERT_NOTOWNED);
-               if (owner == thread)
+               assert(type == LCK_MTX_ASSERT_NOTOWNED);
+               if (owner == thread) {
                        panic("mutex (%p) owned\n", lock);
+               }
        }
 }
 
 /*
- * Routine:    lck_mtx_lock_spinwait_x86
+ * Routine:     lck_mtx_lock_spinwait_x86
  *
  * Invoked trying to acquire a mutex when there is contention but
  * the holder is running on another processor. We spin for up to a maximum
@@ -3202,104 +3251,245 @@ lck_mtx_assert(
 __attribute__((noinline))
 lck_mtx_spinwait_ret_type_t
 lck_mtx_lock_spinwait_x86(
-       lck_mtx_t       *mutex)
+       lck_mtx_t       *mutex)
 {
-       __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
-       thread_t        holder;
-       uint64_t        overall_deadline;
-       uint64_t        check_owner_deadline;
-       uint64_t        cur_time;
-       lck_mtx_spinwait_ret_type_t             retval = LCK_MTX_SPINWAIT_SPUN;
-       int             loopcount = 0;
+       __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
+       thread_t        owner, prev_owner;
+       uint64_t        window_deadline, sliding_deadline, high_deadline;
+       uint64_t        start_time, cur_time, avg_hold_time, bias, delta;
+       lck_mtx_spinwait_ret_type_t             retval = LCK_MTX_SPINWAIT_SPUN_HIGH_THR;
+       int             loopcount = 0;
+       int             total_hold_time_samples, window_hold_time_samples, unfairness;
+       uint            i, prev_owner_cpu;
+       bool            owner_on_core, adjust;
 
        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
-                    trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, 0, 0);
+           trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, 0, 0);
 
-       cur_time = mach_absolute_time();
-       overall_deadline = cur_time + MutexSpin;
-       check_owner_deadline = cur_time;
+       start_time = mach_absolute_time();
+       /*
+        * window_deadline represents the "learning" phase.
+        * The thread collects statistics about the lock during
+        * window_deadline and then it makes a decision on whether to spin more
+        * or block according to the concurrency behavior
+        * observed.
+        *
+        * Every thread can spin at least low_MutexSpin.
+        */
+       window_deadline = start_time + low_MutexSpin;
+       /*
+        * Sliding_deadline is the adjusted spin deadline
+        * computed after the "learning" phase.
+        */
+       sliding_deadline = window_deadline;
+       /*
+        * High_deadline is a hard deadline. No thread
+        * can spin more than this deadline.
+        */
+       if (high_MutexSpin >= 0) {
+               high_deadline = start_time + high_MutexSpin;
+       } else {
+               high_deadline = start_time + low_MutexSpin * real_ncpus;
+       }
 
+       /*
+        * Do not know yet which is the owner cpu.
+        * Initialize prev_owner_cpu with next cpu.
+        */
+       prev_owner_cpu = (cpu_number() + 1) % real_ncpus;
+       total_hold_time_samples = 0;
+       window_hold_time_samples = 0;
+       avg_hold_time = 0;
+       adjust = TRUE;
+       bias = (os_hash_kernel_pointer(mutex) + cpu_number()) % real_ncpus;
+
+       prev_owner = (thread_t) mutex->lck_mtx_owner;
        /*
         * Spin while:
         *   - mutex is locked, and
-        *   - its locked as a spin lock, and
+        *   - it's locked as a spin lock, and
         *   - owner is running on another processor, and
-        *   - owner (processor) is not idling, and
         *   - we haven't spun for long enough.
         */
        do {
+               /*
+                * Try to acquire the lock.
+                */
                if (__probable(lck_mtx_lock_grab_mutex(mutex))) {
                        retval = LCK_MTX_SPINWAIT_ACQUIRED;
                        break;
                }
+
                cur_time = mach_absolute_time();
 
-               if (cur_time >= overall_deadline)
+               /*
+                * Never spin past high_deadline.
+                */
+               if (cur_time >= high_deadline) {
+                       retval = LCK_MTX_SPINWAIT_SPUN_HIGH_THR;
                        break;
+               }
 
-               if (cur_time >= check_owner_deadline && mutex->lck_mtx_owner) {
-                       boolean_t       istate;
+               /*
+                * Check if owner is on core. If not block.
+                */
+               owner = (thread_t) mutex->lck_mtx_owner;
+               if (owner) {
+                       i = prev_owner_cpu;
+                       owner_on_core = FALSE;
+
+                       disable_preemption();
+                       owner = (thread_t) mutex->lck_mtx_owner;
 
                        /*
-                        * We will repeatedly peek at the state of the lock while spinning,
-                        * and we will acquire the interlock to do so.
-                        * The thread that will unlock the mutex will also need to acquire
-                        * the interlock, and we want to avoid to slow it down.
-                        * To avoid to get an interrupt while holding the interlock
-                        * and increase the time we are holding it, we
-                        * will try to acquire the interlock with interrupts disabled.
-                        * This is safe because it is a "try_lock", if we can't acquire
-                        * the interlock we re-enable the interrupts and fail, so it is
-                        * ok to call it even if the interlock was already held.
-                       */
-                       if (lck_mtx_interlock_try_lock_disable_interrupts(mutex, &istate)) {
-
-                               if ((holder = (thread_t) mutex->lck_mtx_owner) != NULL) {
-
-                                       if ( !(holder->machine.specFlags & OnProc) ||
-                                            (holder->state & TH_IDLE)) {
-
-                                               lck_mtx_interlock_unlock_enable_interrupts(mutex, istate);
-
-                                               if (loopcount == 0)
+                        * For scalability we want to check if the owner is on core
+                        * without locking the mutex interlock.
+                        * If we do not lock the mutex interlock, the owner that we see might be
+                        * invalid, so we cannot dereference it. Therefore we cannot check
+                        * any field of the thread to tell us if it is on core.
+                        * Check if the thread that is running on the other cpus matches the owner.
+                        */
+                       if (owner) {
+                               do {
+                                       if ((cpu_data_ptr[i] != NULL) && (cpu_data_ptr[i]->cpu_active_thread == owner)) {
+                                               owner_on_core = TRUE;
+                                               break;
+                                       }
+                                       if (++i >= real_ncpus) {
+                                               i = 0;
+                                       }
+                               } while (i != prev_owner_cpu);
+                               enable_preemption();
+
+                               if (owner_on_core) {
+                                       prev_owner_cpu = i;
+                               } else {
+                                       prev_owner = owner;
+                                       owner = (thread_t) mutex->lck_mtx_owner;
+                                       if (owner == prev_owner) {
+                                               /*
+                                                * Owner is not on core.
+                                                * Stop spinning.
+                                                */
+                                               if (loopcount == 0) {
                                                        retval = LCK_MTX_SPINWAIT_NO_SPIN;
+                                               } else {
+                                                       retval = LCK_MTX_SPINWAIT_SPUN_OWNER_NOT_CORE;
+                                               }
                                                break;
                                        }
+                                       /*
+                                        * Fall through if the owner changed while we were scanning.
+                                        * The new owner could potentially be on core, so loop
+                                        * again.
+                                        */
                                }
-                               lck_mtx_interlock_unlock_enable_interrupts(mutex, istate);
+                       } else {
+                               enable_preemption();
+                       }
+               }
 
-                               check_owner_deadline = cur_time + (MutexSpin / 4);
+               /*
+                * Save how many times we see the owner changing.
+                * We can roughly estimate the mutex hold
+                * time and the fairness with that.
+                */
+               if (owner != prev_owner) {
+                       prev_owner = owner;
+                       total_hold_time_samples++;
+                       window_hold_time_samples++;
+               }
+
+               /*
+                * Learning window expired.
+                * Try to adjust the sliding_deadline.
+                */
+               if (cur_time >= window_deadline) {
+                       /*
+                        * If there was not contention during the window
+                        * stop spinning.
+                        */
+                       if (window_hold_time_samples < 1) {
+                               retval = LCK_MTX_SPINWAIT_SPUN_NO_WINDOW_CONTENTION;
+                               break;
                        }
+
+                       if (adjust) {
+                               /*
+                                * For a fair lock, we'd wait for at most (NCPU-1) periods,
+                                * but the lock is unfair, so let's try to estimate by how much.
+                                */
+                               unfairness = total_hold_time_samples / real_ncpus;
+
+                               if (unfairness == 0) {
+                                       /*
+                                        * We observed the owner changing `total_hold_time_samples` times which
+                                        * let us estimate the average hold time of this mutex for the duration
+                                        * of the spin time.
+                                        * avg_hold_time = (cur_time - start_time) / total_hold_time_samples;
+                                        *
+                                        * In this case spin at max avg_hold_time * (real_ncpus - 1)
+                                        */
+                                       delta = cur_time - start_time;
+                                       sliding_deadline = start_time + (delta * (real_ncpus - 1)) / total_hold_time_samples;
+                               } else {
+                                       /*
+                                        * In this case at least one of the other cpus was able to get the lock twice
+                                        * while I was spinning.
+                                        * We could spin longer but it won't necessarily help if the system is unfair.
+                                        * Try to randomize the wait to reduce contention.
+                                        *
+                                        * We compute how much time we could potentially spin
+                                        * and distribute it over the cpus.
+                                        *
+                                        * bias is an integer between 0 and real_ncpus.
+                                        * distributed_increment = ((high_deadline - cur_time) / real_ncpus) * bias
+                                        */
+                                       delta = high_deadline - cur_time;
+                                       sliding_deadline = cur_time + ((delta * bias) / real_ncpus);
+                                       adjust = FALSE;
+                               }
+                       }
+
+                       window_deadline += low_MutexSpin;
+                       window_hold_time_samples = 0;
                }
-               cpu_pause();
 
-               loopcount++;
+               /*
+                * Stop spinning if we past
+                * the adjusted deadline.
+                */
+               if (cur_time >= sliding_deadline) {
+                       retval = LCK_MTX_SPINWAIT_SPUN_SLIDING_THR;
+                       break;
+               }
+
+               if ((thread_t) mutex->lck_mtx_owner != NULL) {
+                       cpu_pause();
+               }
 
+               loopcount++;
        } while (TRUE);
 
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
        /*
-        * We've already kept a count via overall_deadline of how long we spun.
-        * If dtrace is active, then we compute backwards to decide how
-        * long we spun.
-        *
         * Note that we record a different probe id depending on whether
-        * this is a direct or indirect mutex.  This allows us to 
+        * this is a direct or indirect mutex.  This allows us to
         * penalize only lock groups that have debug/stats enabled
         * with dtrace processing if desired.
         */
        if (__probable(mutex->lck_mtx_is_ext == 0)) {
                LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, mutex,
-                       mach_absolute_time() - (overall_deadline - MutexSpin));
+                   mach_absolute_time() - start_time);
        } else {
                LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, mutex,
-                       mach_absolute_time() - (overall_deadline - MutexSpin));
+                   mach_absolute_time() - start_time);
        }
        /* The lockstat acquire event is recorded by the assembly code beneath us. */
 #endif
 
        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
-                    trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, retval, 0);
+           trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, retval, 0);
 
        return retval;
 }
@@ -3307,7 +3497,7 @@ lck_mtx_lock_spinwait_x86(
 
 
 /*
- * Routine:    lck_mtx_lock_wait_x86
+ * Routine:     lck_mtx_lock_wait_x86
  *
  * Invoked in order to wait on contention.
  *
@@ -3334,13 +3524,13 @@ lck_mtx_lock_spinwait_x86(
  */
 __attribute__((noinline))
 void
-lck_mtx_lock_wait_x86 (
-       lck_mtx_t       *mutex,
+lck_mtx_lock_wait_x86(
+       lck_mtx_t       *mutex,
        struct turnstile **ts)
 {
        thread_t self = current_thread();
 
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
        uint64_t sleep_start = 0;
 
        if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
@@ -3350,8 +3540,8 @@ lck_mtx_lock_wait_x86 (
        __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
 
        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
-                    trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner),
-                    mutex->lck_mtx_waiters, 0, 0);
+           trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner),
+           mutex->lck_mtx_waiters, 0, 0);
 
        assert(self->waiting_for_mutex == NULL);
        self->waiting_for_mutex = mutex;
@@ -3384,10 +3574,10 @@ lck_mtx_lock_wait_x86 (
        self->waiting_for_mutex = NULL;
 
        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END,
-                    trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner),
-                    mutex->lck_mtx_waiters, 0, 0);
+           trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner),
+           mutex->lck_mtx_waiters, 0, 0);
 
-#if    CONFIG_DTRACE
+#if     CONFIG_DTRACE
        /*
         * Record the Dtrace lockstat probe for blocking, block time
         * measured from when we were entered.
@@ -3410,7 +3600,7 @@ lck_mtx_lock_wait_x86 (
  *      Returns: TRUE if lock is acquired.
  */
 boolean_t
-kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t    *lck)
+kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t     *lck)
 {
        if (not_in_kdp) {
                panic("panic: kdp_lck_mtx_lock_spin_is_acquired called outside of kernel debugger");
@@ -3436,17 +3626,17 @@ void
 kdp_rwlck_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
 {
        lck_rw_t *rwlck = NULL;
-       switch(waitinfo->wait_type) {
-               case kThreadWaitKernelRWLockRead:
-                       rwlck = READ_EVENT_TO_RWLOCK(event);
-                       break;
-               case kThreadWaitKernelRWLockWrite:
-               case kThreadWaitKernelRWLockUpgrade:
-                       rwlck = WRITE_EVENT_TO_RWLOCK(event);
-                       break;
-               default:
-                       panic("%s was called with an invalid blocking type", __FUNCTION__);
-                       break;
+       switch (waitinfo->wait_type) {
+       case kThreadWaitKernelRWLockRead:
+               rwlck = READ_EVENT_TO_RWLOCK(event);
+               break;
+       case kThreadWaitKernelRWLockWrite:
+       case kThreadWaitKernelRWLockUpgrade:
+               rwlck = WRITE_EVENT_TO_RWLOCK(event);
+               break;
+       default:
+               panic("%s was called with an invalid blocking type", __FUNCTION__);
+               break;
        }
        waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(rwlck);
        waitinfo->owner = 0;
index 5720cf7e22850d1b062b36a96c563704fb6a0537..60fceb2a498470656fcf67316dde034e078843c8 100644 (file)
@@ -248,6 +248,10 @@ lck_mtx_lock_spin_always(
         * well as destroyed mutexes.
         */
 
+       if (state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_SPIN_MSK)) {
+               return lck_mtx_lock_spin_slow(lock);
+       }
+
        /* Note LCK_MTX_SPIN_MSK is set only if LCK_MTX_ILOCKED_MSK is set */
        prev = state & ~(LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK);
        state = prev | LCK_MTX_ILOCKED_MSK | LCK_MTX_SPIN_MSK;
index 84bfb4c405a531181cf2d0a3f8fd0f61cc6595be..e27b01b22ccca5be8d6a9bf1bd2750c4ffe2a2d0 100644 (file)
@@ -78,6 +78,8 @@ uint64_t        TLBTimeOut;
 uint64_t        LockTimeOutTSC;
 uint32_t        LockTimeOutUsec;
 uint64_t        MutexSpin;
+uint64_t        low_MutexSpin;
+int64_t         high_MutexSpin;
 uint64_t        LastDebuggerEntryAllowance;
 uint64_t        delay_spin_threshold;
 
@@ -788,6 +790,12 @@ ml_init_lock_timeout(void)
                nanoseconds_to_absolutetime(10 * NSEC_PER_USEC, &abstime);
        }
        MutexSpin = (unsigned int)abstime;
+       low_MutexSpin = MutexSpin;
+       /*
+        * high_MutexSpin should be initialized as low_MutexSpin * real_ncpus, but
+        * real_ncpus is not set at this time
+        */
+       high_MutexSpin = -1;
 
        nanoseconds_to_absolutetime(4ULL * NSEC_PER_SEC, &LastDebuggerEntryAllowance);
        if (PE_parse_boot_argn("panic_restart_timeout", &prt, sizeof(prt))) {
@@ -823,6 +831,7 @@ MACRO_END
                VIRTUAL_TIMEOUT_INFLATE64(LockTimeOutTSC);
                VIRTUAL_TIMEOUT_INFLATE64(TLBTimeOut);
                VIRTUAL_TIMEOUT_INFLATE64(MutexSpin);
+               VIRTUAL_TIMEOUT_INFLATE64(low_MutexSpin);
                VIRTUAL_TIMEOUT_INFLATE64(reportphyreaddelayabs);
        }
 
index b2f1e478fd9834583e812cdd70ea1327bd2eb0a4..4f605d3784e3adf6f6356a18276e0663517a092d 100644 (file)
@@ -372,9 +372,7 @@ pmap_verify_noncacheable(uintptr_t vaddr);
 #ifdef  XNU_KERNEL_PRIVATE
 
 boolean_t ml_fpu_avx_enabled(void);
-#if !defined(RC_HIDE_XNU_J137)
 boolean_t ml_fpu_avx512_enabled(void);
-#endif
 
 void interrupt_latency_tracker_setup(void);
 void interrupt_reset_latency_stats(void);
index 9ece881bd134066069eb2a935eed3b78757fe424..1f154580b1262c48216055cccd66f25cd562e3ce 100644 (file)
@@ -1961,7 +1961,17 @@ machine_thread_init(void)
        fpu_module_init();
 }
 
+/*
+ * machine_thread_template_init: Initialize machine-specific portion of
+ * the thread template.
+ */
+void
+machine_thread_template_init(thread_t thr_template)
+{
+       assert(fpu_default != UNDEFINED);
 
+       THREAD_TO_PCB(thr_template)->xstate = fpu_default;
+}
 
 user_addr_t
 get_useraddr(void)
index 022491a8972d7219683f9df088e82f0566691fdc..4ff579713150b220e085d570516be7c8e33f67eb 100644 (file)
 #define XCR0_YMM        (1ULL << 2)     /* YMM state available */
 #define XCR0_BNDREGS    (1ULL << 3)     /* MPX Bounds register state */
 #define XCR0_BNDCSR     (1ULL << 4)     /* MPX Bounds configuration/state  */
-#if !defined(RC_HIDE_XNU_J137)
 #define XCR0_OPMASK     (1ULL << 5)     /* Opmask register state */
 #define XCR0_ZMM_HI256  (1ULL << 6)     /* ZMM upper 256-bit state */
 #define XCR0_HI16_ZMM   (1ULL << 7)     /* ZMM16..ZMM31 512-bit state */
-#endif /* not RC_HIDE_XNU_J137 */
 #define XFEM_X87        XCR0_X87
 #define XFEM_SSE        XCR0_SSE
 #define XFEM_YMM        XCR0_YMM
 #define XFEM_BNDREGS    XCR0_BNDREGS
 #define XFEM_BNDCSR     XCR0_BNDCSR
-#if !defined(XNU_HODE_J137)
 #define XFEM_OPMASK     XCR0_OPMASK
 #define XFEM_ZMM_HI256  XCR0_ZMM_HI256
 #define XFEM_HI16_ZMM   XCR0_HI16_ZMM
 #define XFEM_ZMM        (XFEM_ZMM_HI256 | XFEM_HI16_ZMM | XFEM_OPMASK)
-#endif /* not XNU_HODE_J137 */
 #define XCR0 (0)
 
 #define PMAP_PCID_PRESERVE (1ULL << 63)
index fa5c0ce237307b22538541a60ff4aaf89bdbda95..14f6816282293a49865b8ae96ae8400d4758143d 100644 (file)
@@ -77,9 +77,6 @@
 #include <i386/seg.h>
 #include <i386/thread.h>
 
-#include <IOKit/IOBSD.h> /* for IOTaskHasEntitlement */
-#include <sys/csr.h> /* for csr_check */
-
 #include <sys/errno.h>
 
 static void user_ldt_set_action(void *);
@@ -88,8 +85,6 @@ static int i386_set_ldt_impl(uint32_t *retval, uint64_t start_sel, uint64_t desc
 static int i386_get_ldt_impl(uint32_t *retval, uint64_t start_sel, uint64_t descs,
     uint64_t num_sels);
 
-#define LDT_IN_64BITPROC_ENTITLEMENT "com.apple.security.ldt-in-64bit-process"
-
 /*
  * Add the descriptors to the LDT, starting with
  * the descriptor for 'first_selector'.
@@ -444,11 +439,6 @@ i386_set_ldt64(
        uint64_t                descs,  /* out */
        uint64_t                num_sels)
 {
-       if (csr_check(CSR_ALLOW_UNTRUSTED_KEXTS) != 0 &&
-           !IOTaskHasEntitlement(current_task(), LDT_IN_64BITPROC_ENTITLEMENT)) {
-               return EPERM;
-       }
-
        return i386_set_ldt_impl(retval, start_sel, descs, num_sels);
 }
 
@@ -472,10 +462,5 @@ i386_get_ldt64(
        uint64_t                descs,  /* out */
        uint64_t                num_sels)
 {
-       if (csr_check(CSR_ALLOW_UNTRUSTED_KEXTS) != 0 &&
-           !IOTaskHasEntitlement(current_task(), LDT_IN_64BITPROC_ENTITLEMENT)) {
-               return EPERM;
-       }
-
        return i386_get_ldt_impl(retval, start_sel, descs, num_sels);
 }
index 44d1efed80fcbe0237e64c769033a238b4aa28f6..c403b9f4654a03f2c4ddfbc5ade69519cc1098e0 100644 (file)
@@ -2578,7 +2578,7 @@ ipc_importance_send(
                ipc_voucher_t voucher;
 
                assert(ip_kotype(kmsg->ikm_voucher) == IKOT_VOUCHER);
-               voucher = (ipc_voucher_t)kmsg->ikm_voucher->ip_kobject;
+               voucher = (ipc_voucher_t)ip_get_kobject(kmsg->ikm_voucher);
 
                /* check to see if the voucher has an importance attribute */
                val_count = MACH_VOUCHER_ATTR_VALUE_MAX_NESTED;
@@ -3190,7 +3190,7 @@ ipc_importance_receive(
 
                /* set up recipe to copy the old voucher */
                if (IP_VALID(kmsg->ikm_voucher)) {
-                       ipc_voucher_t sent_voucher = (ipc_voucher_t)kmsg->ikm_voucher->ip_kobject;
+                       ipc_voucher_t sent_voucher = (ipc_voucher_t)ip_get_kobject(kmsg->ikm_voucher);
 
                        recipe->key = MACH_VOUCHER_ATTR_KEY_ALL;
                        recipe->command = MACH_VOUCHER_ATTR_COPY;
index ca4bcee84c00aa3cf87f1f07eeb8baa2ed878b99..a03871cb83e3b884dbb3fd9d45180d332172bb0e 100644 (file)
 #include <kern/simple_lock.h>
 #include <kern/mach_param.h>
 #include <kern/ipc_host.h>
+#include <kern/ipc_kobject.h>
 #include <kern/ipc_mig.h>
 #include <kern/host_notify.h>
 #include <kern/mk_timer.h>
 #include <kern/misc_protos.h>
+#include <kern/suid_cred.h>
 #include <kern/sync_lock.h>
 #include <kern/sync_sema.h>
 #include <kern/ux_handler.h>
@@ -212,7 +214,7 @@ ipc_bootstrap(void)
 #if     MACH_ASSERT
        ipc_port_debug_init();
 #endif
-       mig_init();
+       ipc_kobject_init();
        ipc_table_init();
        ipc_voucher_init();
 
@@ -228,6 +230,8 @@ ipc_bootstrap(void)
        arcade_init();
 #endif
 
+       suid_cred_init();
+
        if (PE_parse_boot_argn("prioritize_launch", &prioritize_launch_bootarg, sizeof(prioritize_launch_bootarg))) {
                prioritize_launch = !!prioritize_launch_bootarg;
        }
index f1611fc821036fccebd36c445a33bd5a1ce6daf0..3a40a39ff0620048bc55372a5f0062f36fe6b314 100644 (file)
@@ -4270,7 +4270,8 @@ ipc_kmsg_copyout_header(
                                        assert(entry->ie_bits & MACH_PORT_TYPE_SEND_RECEIVE);
                                } else {
                                        ip_lock(reply);
-                                       if (!ip_active(reply)) {
+                                       /* Is the reply port still active and allowed to be copied out? */
+                                       if (!ip_active(reply) || !ip_label_check(space, reply, reply_type)) {
                                                /* clear the context value */
                                                reply->ip_reply_context = 0;
                                                ip_unlock(reply);
index 76fc96b8e5822d3001df3470a273badce659e290..adeef2d7283b48afba04ce5755d3bfd8615e3798 100644 (file)
@@ -945,6 +945,7 @@ ipc_object_copyout(
                        break;
                }
 
+
                name = CAST_MACH_PORT_TO_NAME(object);
                kr = ipc_entry_get(space, &name, &entry);
                if (kr != KERN_SUCCESS) {
@@ -968,6 +969,30 @@ ipc_object_copyout(
                        return KERN_INVALID_CAPABILITY;
                }
 
+               /* Don't actually copyout rights we aren't allowed to */
+               if (!ip_label_check(space, ip_object_to_port(object), msgt_name)) {
+                       io_unlock(object);
+                       ipc_entry_dealloc(space, name, entry);
+                       is_write_unlock(space);
+
+                       switch (msgt_name) {
+                       case MACH_MSG_TYPE_PORT_SEND_ONCE:
+                               ipc_port_release_sonce(ip_object_to_port(object));
+                               break;
+                       case MACH_MSG_TYPE_PORT_SEND:
+                               ipc_port_release_send(ip_object_to_port(object));
+                               break;
+                       default:
+                               /*
+                                * We don't allow labeling of "kobjects" with receive
+                                * rights at user-space or port-sets. So, if we get this far,
+                                * something went VERY wrong.
+                                */
+                               panic("ipc_object_copyout: bad port label check failure");
+                       }
+                       return KERN_INVALID_CAPABILITY;
+               }
+
                entry->ie_object = object;
                break;
        }
@@ -1064,6 +1089,25 @@ ipc_object_copyout_name(
                        return KERN_INVALID_CAPABILITY;
                }
 
+               /* Don't actually copyout rights we aren't allowed to */
+               if (!ip_label_check(space, ip_object_to_port(object), msgt_name)) {
+                       io_unlock(object);
+                       ipc_entry_dealloc(space, name, entry);
+                       is_write_unlock(space);
+
+                       switch (msgt_name) {
+                       case MACH_MSG_TYPE_PORT_SEND_ONCE:
+                               ipc_port_release_sonce(ip_object_to_port(object));
+                               break;
+                       case MACH_MSG_TYPE_PORT_SEND:
+                               ipc_port_release_send(ip_object_to_port(object));
+                               break;
+                       default:
+                               panic("ipc_object_copyout_name: bad port label check failure");
+                       }
+                       return KERN_INVALID_CAPABILITY;
+               }
+
                entry->ie_object = object;
        }
 
index 77ddc133353c14e642f6350420de4cf031facc18..4ca1ad5429be25e13c7ce4088b07425784205fe1 100644 (file)
@@ -131,8 +131,9 @@ struct ipc_object_header {
  *     definitions in ipc_port.h.
  */
 #define IO_BITS_PORT_INFO       0x0000f000      /* stupid port tricks */
-#define IO_BITS_KOTYPE          0x000007ff      /* used by the object */
+#define IO_BITS_KOTYPE          0x000003ff      /* used by the object */
 #define IO_BITS_KOBJECT         0x00000800      /* port belongs to a kobject */
+#define IO_BITS_KOLABEL         0x00000400      /* The kobject has a label */
 #define IO_BITS_OTYPE           0x7fff0000      /* determines a zone */
 #define IO_BITS_ACTIVE          0x80000000      /* is object alive? */
 
@@ -141,7 +142,7 @@ struct ipc_object_header {
 #define io_otype(io)            (((io)->io_bits & IO_BITS_OTYPE) >> 16)
 #define io_kotype(io)           ((io)->io_bits & IO_BITS_KOTYPE)
 #define io_is_kobject(io)       (((io)->io_bits & IO_BITS_KOBJECT) != IKOT_NONE)
-
+#define io_is_kolabeled(io)     (((io)->io_bits & IO_BITS_KOLABEL) != 0)
 #define io_makebits(active, otype, kotype)      \
        (((active) ? IO_BITS_ACTIVE : 0) | ((otype) << 16) | (kotype))
 
index 413139c0e86f9de65a06983d5354bc8c7ab740f2..b9acef764b8fcb0edda1278642ca1628dcbab64e 100644 (file)
@@ -129,6 +129,7 @@ struct ipc_port {
 
        union {
                ipc_kobject_t kobject;
+               ipc_kobject_label_t kolabel;
                ipc_importance_task_t imp_task;
                ipc_port_t sync_inheritor_port;
                struct knote *sync_inheritor_knote;
@@ -190,6 +191,7 @@ struct ipc_port {
 #define ip_timestamp            data.timestamp
 
 #define ip_kobject              kdata.kobject
+#define ip_kolabel              kdata.kolabel
 #define ip_imp_task             kdata.imp_task
 #define ip_sync_inheritor_port  kdata.sync_inheritor_port
 #define ip_sync_inheritor_knote kdata.sync_inheritor_knote
@@ -280,6 +282,10 @@ MACRO_END
 
 #define ip_kotype(port)         io_kotype(ip_to_object(port))
 #define ip_is_kobject(port)     io_is_kobject(ip_to_object(port))
+#define ip_is_kolabeled(port)   io_is_kolabeled(ip_to_object(port))
+#define ip_get_kobject(port)    ipc_kobject_get(port)
+#define ip_label_check(space, port, msgt_name) \
+       (!ip_is_kolabeled(port) || ipc_kobject_label_check((space), (port), (msgt_name)))
 
 #define ip_full_kernel(port)    imq_full_kernel(&(port)->ip_messages)
 #define ip_full(port)           imq_full(&(port)->ip_messages)
index 290c71673327187697a2b33ec0d9044f4a14a840..3d2a0dc133f99b6a5fa32f17d9e11a6300995db5 100644 (file)
@@ -237,6 +237,7 @@ ipc_space_rand_freelist(
 kern_return_t
 ipc_space_create(
        ipc_table_size_t        initial,
+       ipc_label_t             label,
        ipc_space_t             *spacep)
 {
        ipc_space_t space;
@@ -271,6 +272,7 @@ ipc_space_create(
        space->is_table = table;
        space->is_table_next = initial + 1;
        space->is_task = NULL;
+       space->is_label = label;
        space->is_low_mod = new_size;
        space->is_high_mod = 0;
        space->is_node_id = HOST_LOCAL_NODE; /* HOST_LOCAL_NODE, except proxy spaces */
@@ -279,6 +281,67 @@ ipc_space_create(
        return KERN_SUCCESS;
 }
 
+/*
+ *     Routine:        ipc_space_label
+ *     Purpose:
+ *             Modify the label on a space. The desired
+ *      label must be a super-set of the current
+ *      label for the space (as rights may already
+ *      have been previously copied out under the
+ *      old label value.
+ *     Conditions:
+ *             Nothing locked.
+ *     Returns:
+ *             KERN_SUCCESS            Updated the label
+ *             KERN_INVALID_VALUE  label not a superset of old
+ */
+kern_return_t
+ipc_space_label(
+       ipc_space_t space,
+       ipc_label_t label)
+{
+       is_write_lock(space);
+       if (!is_active(space)) {
+               is_write_unlock(space);
+               return KERN_SUCCESS;
+       }
+
+       if ((space->is_label & label) != space->is_label) {
+               is_write_unlock(space);
+               return KERN_INVALID_VALUE;
+       }
+       space->is_label = label;
+       is_write_unlock(space);
+       return KERN_SUCCESS;
+}
+
+/*
+ *     Routine:        ipc_space_add_label
+ *     Purpose:
+ *             Modify the label on a space. The desired
+ *      label is added to the labels already set
+ *      on the space.
+ *     Conditions:
+ *             Nothing locked.
+ *     Returns:
+ *             KERN_SUCCESS            Updated the label
+ *             KERN_INVALID_VALUE  label not a superset of old
+ */
+kern_return_t
+ipc_space_add_label(
+       ipc_space_t space,
+       ipc_label_t label)
+{
+       is_write_lock(space);
+       if (!is_active(space)) {
+               is_write_unlock(space);
+               return KERN_SUCCESS;
+       }
+
+       space->is_label |= label;
+       is_write_unlock(space);
+       return KERN_SUCCESS;
+}
 /*
  *     Routine:        ipc_space_create_special
  *     Purpose:
@@ -310,6 +373,7 @@ ipc_space_create_special(
        space->is_bits       = IS_INACTIVE | 1; /* 1 ref, not active, not growing */
        space->is_table      = IE_NULL;
        space->is_task       = TASK_NULL;
+       space->is_label      = IPC_LABEL_SPECIAL;
        space->is_table_next = 0;
        space->is_low_mod    = 0;
        space->is_high_mod   = 0;
index 161c55403a9953d58381dee0ed66e56b9417eef7..8a4466a4d337d4d99ac83b88fb094a6795cf959e 100644 (file)
@@ -119,8 +119,9 @@ struct ipc_space {
        ipc_entry_num_t is_table_hashed;/* count of hashed elements */
        ipc_entry_num_t is_table_free;  /* count of free elements */
        ipc_entry_t is_table;           /* an array of entries */
-       task_t is_task;                 /* associated task */
        struct ipc_table_size *is_table_next; /* info for larger table */
+       task_t is_task;                 /* associated task */
+       ipc_label_t is_label;           /* [private] mandatory access label */
        ipc_entry_num_t is_low_mod;     /* lowest modified entry during growth */
        ipc_entry_num_t is_high_mod;    /* highest modified entry during growth */
        struct bool_gen bool_gen;       /* state for boolean RNG */
@@ -225,8 +226,19 @@ extern kern_return_t ipc_space_create_special(
 /* Create a new IPC space */
 extern kern_return_t ipc_space_create(
        ipc_table_size_t        initial,
+       ipc_label_t             label,
        ipc_space_t             *spacep);
 
+/* Change the label on an existing space */
+extern kern_return_t ipc_space_label(
+       ipc_space_t space,
+       ipc_label_t label);
+
+/* Add a label to an existing space */
+extern kern_return_t ipc_space_add_label(
+       ipc_space_t space,
+       ipc_label_t label);
+
 /* Mark a space as dead and cleans up the entries*/
 extern void ipc_space_terminate(
        ipc_space_t     space);
index eaf5a3798d2aefcf8bd076bdf3ca95665bbebd45..f5fde0daec3c64dcf6f1a20d811001caa7662939 100644 (file)
@@ -62,6 +62,14 @@ typedef struct ipc_pset *ipc_pset_t;
 typedef struct ipc_kmsg *ipc_kmsg_t;
 typedef uint8_t sync_qos_count_t;
 
+typedef uint64_t ipc_label_t;
+#define IPC_LABEL_NONE     ((ipc_label_t)0x0)
+#define IPC_LABEL_DEXT     ((ipc_label_t)0x1)
+#define IPC_LABEL_PLATFORM ((ipc_label_t)0x2)
+#define IPC_LABEL_SPECIAL  ((ipc_label_t)0x3)
+
+typedef struct ipc_kobject_label *ipc_kobject_label_t;
+
 #define IE_NULL ((ipc_entry_t) 0)
 
 #define ITS_NULL        ((ipc_table_size_t) 0)
index eeb226a87cd6cb13df49df38dd94f9b94a0e7bf9..bc739faa350656af0fb1e5bbffd2e7317ca4702f 100644 (file)
@@ -377,7 +377,12 @@ unsafe_convert_port_to_voucher(
        ipc_port_t      port)
 {
        if (IP_VALID(port)) {
-               uintptr_t voucher = (uintptr_t) port->ip_kobject;
+               /* vouchers never labeled (they get transformed before use) */
+               if (ip_is_kolabeled(port)) {
+                       return (uintptr_t)IV_NULL;
+               }
+
+               uintptr_t voucher = (uintptr_t)port->ip_kobject;
 
                /*
                 * No need to lock because we have a reference on the
@@ -407,7 +412,7 @@ convert_port_to_voucher(
 {
        if (IP_VALID(port)) {
                zone_require(port, ipc_object_zones[IOT_PORT]);
-               ipc_voucher_t voucher = (ipc_voucher_t) port->ip_kobject;
+               ipc_voucher_t voucher = (ipc_voucher_t) ip_get_kobject(port);
 
                /*
                 * No need to lock because we have a reference on the
@@ -487,13 +492,14 @@ ipc_voucher_notify(mach_msg_header_t *msg)
 {
        mach_no_senders_notification_t *notification = (void *)msg;
        ipc_port_t port = notification->not_header.msgh_remote_port;
+       ipc_voucher_t voucher = (ipc_voucher_t)ip_get_kobject(port);
 
        require_ip_active(port);
        assert(IKOT_VOUCHER == ip_kotype(port));
 
        /* consume the reference donated by convert_voucher_to_port */
-       zone_require((ipc_voucher_t)port->ip_kobject, ipc_voucher_zone);
-       ipc_voucher_release((ipc_voucher_t)port->ip_kobject);
+       zone_require(voucher, ipc_voucher_zone);
+       ipc_voucher_release(voucher);
 }
 
 /*
@@ -671,7 +677,7 @@ convert_port_to_voucher_attr_control(
 {
        if (IP_VALID(port)) {
                zone_require(port, ipc_object_zones[IOT_PORT]);
-               ipc_voucher_attr_control_t ivac = (ipc_voucher_attr_control_t) port->ip_kobject;
+               ipc_voucher_attr_control_t ivac = (ipc_voucher_attr_control_t) ip_get_kobject(port);
 
                /*
                 * No need to lock because we have a reference on the
@@ -702,12 +708,15 @@ ipc_voucher_attr_control_notify(mach_msg_header_t *msg)
 {
        mach_no_senders_notification_t *notification = (void *)msg;
        ipc_port_t port = notification->not_header.msgh_remote_port;
+       ipc_voucher_attr_control_t ivac;
 
        require_ip_active(port);
        assert(IKOT_VOUCHER_ATTR_CONTROL == ip_kotype(port));
 
        /* release the reference donated by convert_voucher_attr_control_to_port */
-       ivac_release((ipc_voucher_attr_control_t)port->ip_kobject);
+       ivac = (ipc_voucher_attr_control_t)ip_get_kobject(port);
+       zone_require(ivac, ipc_voucher_attr_control_zone);
+       ivac_release(ivac);
 }
 
 /*
@@ -2638,7 +2647,7 @@ ipc_get_pthpriority_from_kmsg_voucher(
                return KERN_FAILURE;
        }
 
-       pthread_priority_voucher = (ipc_voucher_t)kmsg->ikm_voucher->ip_kobject;
+       pthread_priority_voucher = (ipc_voucher_t)ip_get_kobject(kmsg->ikm_voucher);
        kr = mach_voucher_extract_attr_recipe(pthread_priority_voucher,
            MACH_VOUCHER_ATTR_KEY_PTHPRIORITY,
            content_data,
@@ -2683,7 +2692,7 @@ ipc_voucher_send_preprocessing(ipc_kmsg_t kmsg)
        }
 
        /* setup recipe for preprocessing of all the attributes. */
-       pre_processed_voucher = (ipc_voucher_t)kmsg->ikm_voucher->ip_kobject;
+       pre_processed_voucher = (ipc_voucher_t)ip_get_kobject(kmsg->ikm_voucher);
 
        kr = ipc_voucher_prepare_processing_recipe(pre_processed_voucher,
            (mach_voucher_attr_raw_recipe_array_t)recipes,
@@ -2732,7 +2741,7 @@ ipc_voucher_receive_postprocessing(
        }
 
        /* setup recipe for auto redeem of all the attributes. */
-       sent_voucher = (ipc_voucher_t)kmsg->ikm_voucher->ip_kobject;
+       sent_voucher = (ipc_voucher_t)ip_get_kobject(kmsg->ikm_voucher);
 
        kr = ipc_voucher_prepare_processing_recipe(sent_voucher,
            (mach_voucher_attr_raw_recipe_array_t)recipes,
index cf1d90c0b281d69285d8f1d4045528ffd7561062..baa3b96d2e129b80e4d7a409cf8e5dcf34a69ac5 100644 (file)
@@ -86,6 +86,7 @@
 #include <ipc/ipc_right.h>
 
 #include <security/mac_mach_internal.h>
+#include <device/device_types.h>
 #endif
 
 /*
@@ -452,21 +453,23 @@ mach_port_dnrequest_info(
 
 #if !MACH_IPC_DEBUG
 kern_return_t
-mach_port_kobject(
+mach_port_kobject_description(
        __unused ipc_space_t            space,
        __unused mach_port_name_t       name,
        __unused natural_t              *typep,
-       __unused mach_vm_address_t      *addrp)
+       __unused mach_vm_address_t      *addrp,
+       __unused kobject_description_t  desc)
 {
        return KERN_FAILURE;
 }
 #else
 kern_return_t
-mach_port_kobject(
+mach_port_kobject_description(
        ipc_space_t                     space,
        mach_port_name_t                name,
        natural_t                       *typep,
-       mach_vm_address_t               *addrp)
+       mach_vm_address_t               *addrp,
+       kobject_description_t           desc)
 {
        ipc_entry_t entry;
        ipc_port_t port;
@@ -500,19 +503,53 @@ mach_port_kobject(
        }
 
        *typep = (unsigned int) ip_kotype(port);
-       kaddr = (mach_vm_address_t)port->ip_kobject;
+       kaddr = (mach_vm_address_t)ip_get_kobject(port);
        *addrp = 0;
 #if (DEVELOPMENT || DEBUG)
        if (kaddr && ip_is_kobject(port)) {
                *addrp = VM_KERNEL_UNSLIDE_OR_PERM(kaddr);
        }
 #endif
+
+       io_object_t obj = NULL;
+       natural_t   kotype = ip_kotype(port);
+       if (desc) {
+               *desc = '\0';
+               switch (kotype) {
+               case IKOT_IOKIT_OBJECT:
+               case IKOT_IOKIT_CONNECT:
+               case IKOT_IOKIT_IDENT:
+               case IKOT_UEXT_OBJECT:
+                       obj = (io_object_t) kaddr;
+                       iokit_add_reference(obj, IKOT_IOKIT_OBJECT);
+                       break;
+
+               default:
+                       break;
+               }
+       }
+
        ip_unlock(port);
 
+       if (obj) {
+               iokit_port_object_description(obj, desc);
+               iokit_remove_reference(obj);
+       }
+
        return KERN_SUCCESS;
 }
 #endif /* MACH_IPC_DEBUG */
 
+kern_return_t
+mach_port_kobject(
+       ipc_space_t                     space,
+       mach_port_name_t                name,
+       natural_t                       *typep,
+       mach_vm_address_t               *addrp)
+{
+       return mach_port_kobject_description(space, name, typep, addrp, NULL);
+}
+
 /*
  *     Routine:        mach_port_kernel_object [Legacy kernel call]
  *     Purpose:
index 48c4b6014d3289d737da1bf2a19c611174959c3d..8bd54f11d7e741ac79e506c6f13a5f165cc18c50 100644 (file)
@@ -126,7 +126,7 @@ convert_port_to_arcade_register(
                /* No need to lock port because of how refs managed */
                if (ip_kotype(port) == IKOT_ARCADE_REG) {
                        assert(ip_active(port));
-                       arcade_reg = (arcade_register_t)port->ip_kobject;
+                       arcade_reg = (arcade_register_t)ip_get_kobject(port);
                        assert(arcade_reg == &arcade_register_global);
                        assert(arcade_reg->ar_port == port);
                }
index eb51597a266d78fb5262de5030ef0ee0f381e70b..a3e20dcc17533d3631579a08eec718584eabb3fa 100644 (file)
@@ -90,7 +90,7 @@ audit_session_porttoaia(ipc_port_t port)
                ip_lock(port);
                if (IKOT_AU_SESSIONPORT == ip_kotype(port)) {
                        require_ip_active(port);
-                       aia_p = (struct auditinfo_addr *)port->ip_kobject;
+                       aia_p = (struct auditinfo_addr *)ip_get_kobject(port);
                }
                ip_unlock(port);
        }
@@ -119,7 +119,7 @@ audit_session_nosenders(mach_msg_header_t *msg)
 
        require_ip_active(port);
        assert(IKOT_AU_SESSIONPORT == ip_kotype(port));
-       port_aia_p = (struct auditinfo_addr *)port->ip_kobject;
+       port_aia_p = (struct auditinfo_addr *)ip_get_kobject(port);
        assert(NULL != port_aia_p);
 
        audit_session_aiaunref(port_aia_p);
index 59667c828222f44c7f6a472a00d8dce8849a4817..3de320d9f65aa0d08ea45b5b9fe12c908846c752 100644 (file)
@@ -242,17 +242,17 @@ backtrace_interrupted(uintptr_t *bt, unsigned int max_frames,
            was_truncated_out) + 1;
 }
 
-int
+unsigned int
 backtrace_user(uintptr_t *bt, unsigned int max_frames,
-    unsigned int *frames_out, bool *user_64_out, bool *was_truncated_out)
+    int *error_out, bool *user_64_out, bool *was_truncated_out)
 {
        return backtrace_thread_user(current_thread(), bt, max_frames,
-           frames_out, user_64_out, was_truncated_out);
+           error_out, user_64_out, was_truncated_out);
 }
 
-int
+unsigned int
 backtrace_thread_user(void *thread, uintptr_t *bt, unsigned int max_frames,
-    unsigned int *frames_out, bool *user_64_out, bool *was_truncated_out)
+    int *error_out, bool *user_64_out, bool *was_truncated_out)
 {
        bool user_64;
        uintptr_t pc = 0, fp = 0, next_fp = 0;
@@ -263,7 +263,6 @@ backtrace_thread_user(void *thread, uintptr_t *bt, unsigned int max_frames,
 
        assert(bt != NULL);
        assert(max_frames > 0);
-       assert(frames_out != NULL);
 
 #if defined(__x86_64__)
 
@@ -405,8 +404,10 @@ out:
        if (user_64_out) {
                *user_64_out = user_64;
        }
+       if (error_out) {
+               *error_out = err;
+       }
 
-       *frames_out = frame_index;
-       return err;
+       return frame_index;
 #undef INVALID_USER_FP
 }
index 8b56b26df075a4334f0a06f63f2d11fe184d491f..4123482e1988abf9a8f644b3e5d299f71bd58839 100644 (file)
@@ -108,17 +108,20 @@ unsigned int backtrace_interrupted(uintptr_t *bt, unsigned int btlen,
  * thread, nor can it be called from interrupt context or with interrupts
  * disabled.
  *
- * @param btwritten On success, the number of return addresses written is stored
- * here.
+ * @param error The precise error code that occurred is stored here, or 0 if no
+ * error occurred.
  *
  * @param user64 On success, true is stored here if user space was running in
  * 64-bit mode, and false is stored otherwise.
  *
- * @return Returns 0 on success and an errno value on error.
+ * @param was_truncated true is stored here if the full stack could not be written
+ * to bt.
+ *
+ * @return Returns the number of frames written to bt.
  *
  * @seealso backtrace
  */
-int backtrace_user(uintptr_t *bt, unsigned int btlen, unsigned int *btwritten,
+unsigned int backtrace_user(uintptr_t *bt, unsigned int btlen, int *error,
     bool *user64, bool *was_truncated);
 
 /*
@@ -134,8 +137,8 @@ int backtrace_user(uintptr_t *bt, unsigned int btlen, unsigned int *btwritten,
  *
  * @see backtrace_user
  */
-int backtrace_thread_user(void *thread, uintptr_t *bt, unsigned int btlen,
-    unsigned int *btwritten, bool *user64, bool *was_truncated);
+unsigned int backtrace_thread_user(void *thread, uintptr_t *bt,
+    unsigned int btlen, int *error, bool *user64, bool *was_truncated);
 
 __END_DECLS
 
index 25fb8477e48223737aaa8af4eadabd089df90db9..a28e09f22ecf6c1db04994e04e7662786c4940e2 100644 (file)
@@ -49,6 +49,7 @@ typedef enum thread_snapshot_wait_flags {
        kThreadWaitWorkloopSyncWait     = 0x10,
        kThreadWaitOnProcess            = 0x11,
        kThreadWaitSleepWithInheritor   = 0x12,
+       kThreadWaitCompressor           = 0x14,
 } __attribute__((packed)) block_hint_t;
 
 _Static_assert(sizeof(block_hint_t) <= sizeof(short),
index 4ec2af237bcfe33cb05ed315abb492d131adc406..8d9453f5f3d8ccc6bd0b194ff6c2c5a6432d80f0 100644 (file)
@@ -149,6 +149,24 @@ circle_dequeue_tail(circle_queue_t cq)
        return elt;
 }
 
+static inline void
+circle_queue_rotate_head_forward(circle_queue_t cq)
+{
+       queue_entry_t first = circle_queue_first(cq);
+       if (first != NULL) {
+               cq->head = first->next;
+       }
+}
+
+static inline void
+circle_queue_rotate_head_backward(circle_queue_t cq)
+{
+       queue_entry_t last = circle_queue_last(cq);
+       if (last != NULL) {
+               cq->head = last;
+       }
+}
+
 /*
  *     Macro:          cqe_element
  *     Function:
index e885cee9b8e526cce3aa9c005df98e0403b0667a..101678d7bcc275751d832ddd410c6b639ec7e3e6 100644 (file)
@@ -1619,7 +1619,9 @@ clock_interval_to_deadline(
 
        clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime);
 
-       *result = mach_absolute_time() + abstime;
+       if (os_add_overflow(mach_absolute_time(), abstime, result)) {
+               *result = UINT64_MAX;
+       }
 }
 
 void
@@ -1627,7 +1629,9 @@ clock_absolutetime_interval_to_deadline(
        uint64_t                        abstime,
        uint64_t                        *result)
 {
-       *result = mach_absolute_time() + abstime;
+       if (os_add_overflow(mach_absolute_time(), abstime, result)) {
+               *result = UINT64_MAX;
+       }
 }
 
 void
@@ -1635,7 +1639,9 @@ clock_continuoustime_interval_to_deadline(
        uint64_t                        conttime,
        uint64_t                        *result)
 {
-       *result = mach_continuous_time() + conttime;
+       if (os_add_overflow(mach_continuous_time(), conttime, result)) {
+               *result = UINT64_MAX;
+       }
 }
 
 void
@@ -1653,14 +1659,23 @@ clock_deadline_for_periodic_event(
 {
        assert(interval != 0);
 
-       *deadline += interval;
+       // *deadline += interval;
+       if (os_add_overflow(*deadline, interval, deadline)) {
+               *deadline = UINT64_MAX;
+       }
 
        if (*deadline <= abstime) {
-               *deadline = abstime + interval;
-               abstime = mach_absolute_time();
+               // *deadline = abstime + interval;
+               if (os_add_overflow(abstime, interval, deadline)) {
+                       *deadline = UINT64_MAX;
+               }
 
+               abstime = mach_absolute_time();
                if (*deadline <= abstime) {
-                       *deadline = abstime + interval;
+                       // *deadline = abstime + interval;
+                       if (os_add_overflow(abstime, interval, deadline)) {
+                               *deadline = UINT64_MAX;
+                       }
                }
        }
 }
index dfb2703b4013ccb8a9747102c0eae8b97cebd94e..36175a0a27edfec624189bef4e14f38ea8eb8f59 100644 (file)
@@ -135,7 +135,7 @@ host_notify_port_destroy(
 
        ip_lock(port);
        if (ip_kotype(port) == IKOT_HOST_NOTIFY) {
-               entry = (host_notify_t)port->ip_kobject;
+               entry = (host_notify_t)ip_get_kobject(port);
                assert(entry != NULL);
                ipc_kobject_set_atomically(port, IKO_NULL, IKOT_NONE);
                ip_unlock(port);
@@ -187,7 +187,7 @@ host_notify_all(
 
                        ip_lock(port);
                        assert(ip_kotype(port) == IKOT_HOST_NOTIFY);
-                       assert(port->ip_kobject == (ipc_kobject_t)entry);
+                       assert(ip_get_kobject(port) == (ipc_kobject_t)entry);
                        ipc_kobject_set_atomically(port, IKO_NULL, IKOT_NONE);
                        ip_unlock(port);
 
index 800c7b857e390eccbdb61f66a7b9b23deb97e068..58c5a73877157a23a123bca9fef702b8bdeffbd5 100644 (file)
@@ -105,7 +105,7 @@ convert_port_to_clock(
                if (ip_active(port) &&
                    ((ip_kotype(port) == IKOT_CLOCK) ||
                    (ip_kotype(port) == IKOT_CLOCK_CTRL))) {
-                       clock = (clock_t) port->ip_kobject;
+                       clock = (clock_t)ip_get_kobject(port);
                }
                ip_unlock(port);
        }
@@ -131,7 +131,7 @@ convert_port_to_clock_ctrl(
                ip_lock(port);
                if (ip_active(port) &&
                    (ip_kotype(port) == IKOT_CLOCK_CTRL)) {
-                       clock = (clock_t) port->ip_kobject;
+                       clock = (clock_t) ip_get_kobject(port);
                }
                ip_unlock(port);
        }
@@ -195,7 +195,7 @@ port_name_to_clock(
                return clock;
        }
        if (ip_kotype(port) == IKOT_CLOCK) {
-               clock = (clock_t) port->ip_kobject;
+               clock = (clock_t) ip_get_kobject(port);
        }
        ip_unlock(port);
        return clock;
index 2b1b29008a0e1b13db182fff8f1efd5a7812f721..7cf4b903b285bc4e794fe7317a0b024a77e72dca 100644 (file)
@@ -281,7 +281,7 @@ convert_port_to_host(
        if (IP_VALID(port)) {
                if (ip_kotype(port) == IKOT_HOST ||
                    ip_kotype(port) == IKOT_HOST_PRIV) {
-                       host = (host_t) port->ip_kobject;
+                       host = (host_t) ip_get_kobject(port);
                        require_ip_active(port);
                }
        }
@@ -307,7 +307,7 @@ convert_port_to_host_priv(
                ip_lock(port);
                if (ip_active(port) &&
                    (ip_kotype(port) == IKOT_HOST_PRIV)) {
-                       host = (host_t) port->ip_kobject;
+                       host = (host_t) ip_get_kobject(port);
                }
                ip_unlock(port);
        }
@@ -335,7 +335,7 @@ convert_port_to_processor(
                ip_lock(port);
                if (ip_active(port) &&
                    (ip_kotype(port) == IKOT_PROCESSOR)) {
-                       processor = (processor_t) port->ip_kobject;
+                       processor = (processor_t) ip_get_kobject(port);
                }
                ip_unlock(port);
        }
@@ -404,7 +404,7 @@ ref_pset_port_locked(ipc_port_t port, boolean_t matchn, processor_set_t *ppset)
        if (ip_active(port) &&
            ((ip_kotype(port) == IKOT_PSET) ||
            (matchn && (ip_kotype(port) == IKOT_PSET_NAME)))) {
-               pset = (processor_set_t) port->ip_kobject;
+               pset = (processor_set_t) ip_get_kobject(port);
        }
 
        *ppset = pset;
@@ -519,7 +519,7 @@ convert_port_to_host_security(
                ip_lock(port);
                if (ip_active(port) &&
                    (ip_kotype(port) == IKOT_HOST_SECURITY)) {
-                       host = (host_t) port->ip_kobject;
+                       host = (host_t) ip_get_kobject(port);
                }
                ip_unlock(port);
        }
index d29c63124fb87d0aba09e7850dd1f45995a573c7..c942f314131e26dbcadbe35abb3336295e9b57e5 100644 (file)
 #include <kern/sync_sema.h>
 #include <kern/counters.h>
 #include <kern/work_interval.h>
+#include <kern/suid_cred.h>
 
 #include <vm/vm_protos.h>
 
@@ -182,7 +183,7 @@ static mig_hash_t mig_buckets[MAX_MIG_ENTRIES];
 static int mig_table_max_displ;
 static mach_msg_size_t mig_reply_size = sizeof(mig_reply_error_t);
 
-
+static zone_t ipc_kobject_label_zone;
 
 const struct mig_subsystem *mig_e[] = {
        (const struct mig_subsystem *)&mach_vm_subsystem,
@@ -223,7 +224,7 @@ const struct mig_subsystem *mig_e[] = {
 #endif
 };
 
-void
+static void
 mig_init(void)
 {
        unsigned int i, n = sizeof(mig_e) / sizeof(const struct mig_subsystem *);
@@ -267,6 +268,24 @@ mig_init(void)
        printf("mig_table_max_displ = %d\n", mig_table_max_displ);
 }
 
+/*
+ *     Routine:        ipc_kobject_init
+ *     Purpose:
+ *             Deliver notifications to kobjects that care about them.
+ */
+void
+ipc_kobject_init(void)
+{
+       int label_max = CONFIG_TASK_MAX + CONFIG_THREAD_MAX + 1000 /* UEXT estimate */;
+
+       mig_init();
+
+       ipc_kobject_label_zone =
+           zinit(sizeof(struct ipc_kobject_label),
+           label_max * sizeof(struct ipc_kobject_label),
+           sizeof(struct ipc_kobject_label),
+           "ipc kobject labels");
+}
 
 /*
  *     Routine:        ipc_kobject_server
@@ -604,13 +623,49 @@ ipc_kobject_set_atomically(
        port->ip_spares[2] = (port->ip_object.io_bits & IO_BITS_KOTYPE);
 #endif  /* MACH_ASSERT */
        port->ip_object.io_bits = (port->ip_object.io_bits & ~IO_BITS_KOTYPE) | type;
-       port->ip_kobject = kobject;
+       if (ip_is_kolabeled(port)) {
+               ipc_kobject_label_t labelp = port->ip_kolabel;
+               labelp->ikol_kobject = kobject;
+       } else {
+               port->ip_kobject = kobject;
+       }
        if (type != IKOT_NONE) {
                /* Once set, this bit can never be unset */
                port->ip_object.io_bits |= IO_BITS_KOBJECT;
        }
 }
 
+/*
+ *     Routine:        ipc_kobject_init_port
+ *     Purpose:
+ *             Initialize a kobject port with the given types and options.
+ *
+ *             This function never fails.
+ */
+static inline void
+ipc_kobject_init_port(
+       ipc_port_t port,
+       ipc_kobject_t kobject,
+       ipc_kobject_type_t type,
+       ipc_kobject_alloc_options_t options)
+{
+       ipc_kobject_set_atomically(port, kobject, type);
+
+       if (options & IPC_KOBJECT_ALLOC_MAKE_SEND) {
+               ipc_port_make_send_locked(port);
+       }
+       if (options & IPC_KOBJECT_ALLOC_NSREQUEST) {
+               ipc_port_make_sonce_locked(port);
+               port->ip_nsrequest = port;
+       }
+       if (options & IPC_KOBJECT_ALLOC_NO_GRANT) {
+               port->ip_no_grant = 1;
+       }
+       if (options & IPC_KOBJECT_ALLOC_IMMOVABLE_SEND) {
+               port->ip_immovable_send = 1;
+       }
+}
+
 /*
  *     Routine:        ipc_kobject_alloc_port
  *     Purpose:
@@ -627,53 +682,52 @@ ipc_kobject_alloc_port(
        ipc_kobject_type_t      type,
        ipc_kobject_alloc_options_t     options)
 {
-       ipc_port_init_flags_t flags;
-       ipc_space_t space;
-       ipc_port_t port;
+       ipc_port_t port = ipc_port_alloc_kernel();
 
-       if (options & IPC_KOBJECT_ALLOC_IN_TRANSIT) {
-               /* kobject port intended to be copied out to user-space */
-               flags = IPC_PORT_INIT_MESSAGE_QUEUE;
-               space = IS_NULL;
-       } else {
-               /* true kernel-bound kobject port */
-               flags = IPC_PORT_INIT_NONE;
-               space = ipc_space_kernel;
-       }
-       port = ipc_port_alloc_special(space, flags);
        if (port == IP_NULL) {
                panic("ipc_kobject_alloc_port(): failed to allocate port");
        }
 
-       ipc_kobject_set_atomically(port, kobject, type);
+       ipc_kobject_init_port(port, kobject, type, options);
+       return port;
+}
 
-       if (options & IPC_KOBJECT_ALLOC_MAKE_SEND) {
-               ipc_port_make_send_locked(port);
-       }
+/*
+ *     Routine:        ipc_kobject_alloc_labeled_port
+ *     Purpose:
+ *             Allocate a kobject port and associated mandatory access label
+ *             in the kernel space of the specified type.
+ *
+ *             This function never fails.
+ *
+ *     Conditions:
+ *             No locks held (memory is allocated)
+ */
 
-       if (options & IPC_KOBJECT_ALLOC_IN_TRANSIT) {
-               /* reset the port like it has been copied in circularity checked */
-               if (options & IPC_KOBJECT_ALLOC_NSREQUEST) {
-                       panic("ipc_kobject_alloc_port(): invalid option for user-space port");
-               }
-               port->ip_mscount = 0;
-               assert(port->ip_tempowner == 0);
-               assert(port->ip_receiver == IS_NULL);
-               port->ip_receiver = IS_NULL;
-               port->ip_receiver_name = MACH_PORT_NULL;
-       } else {
-               if (options & IPC_KOBJECT_ALLOC_NSREQUEST) {
-                       ipc_port_make_sonce_locked(port);
-                       port->ip_nsrequest = port;
-               }
-       }
-       if (options & IPC_KOBJECT_ALLOC_IMMOVABLE_SEND) {
-               port->ip_immovable_send = 1;
+ipc_port_t
+ipc_kobject_alloc_labeled_port(
+       ipc_kobject_t           kobject,
+       ipc_kobject_type_t      type,
+       ipc_label_t             label,
+       ipc_kobject_alloc_options_t     options)
+{
+       ipc_port_t port;
+       ipc_kobject_label_t labelp;
+
+       port = ipc_port_alloc_kernel();
+       if (port == IP_NULL) {
+               panic("ipc_kobject_alloc_port(): failed to allocate port");
        }
-       if (options & IPC_KOBJECT_ALLOC_NO_GRANT) {
-               port->ip_no_grant = 1;
+
+       labelp = (ipc_kobject_label_t)zalloc(ipc_kobject_label_zone);
+       if (labelp == NULL) {
+               panic("ipc_kobject_alloc_labeled_port(): failed to allocate label");
        }
+       labelp->ikol_label = label;
+       port->ip_kolabel = labelp;
+       port->ip_object.io_bits |= IO_BITS_KOLABEL;
 
+       ipc_kobject_init_port(port, kobject, type, options);
        return port;
 }
 
@@ -744,16 +798,92 @@ ipc_kobject_make_send_lazy_alloc_port(
        return rc;
 }
 
+/*
+ *     Routine:        ipc_kobject_make_send_lazy_alloc_labeled_port
+ *     Purpose:
+ *             Make a send once for a kobject port.
+ *
+ *             A location owning this port is passed in port_store.
+ *             If no port exists, a port is made lazily.
+ *
+ *             A send right is made for the port, and if this is the first one
+ *             (possibly not for the first time), then the no-more-senders
+ *             notification is rearmed.
+ *
+ *             When a notification is armed, the kobject must donate
+ *             one of its references to the port. It is expected
+ *             the no-more-senders notification will consume this reference.
+ *
+ *     Returns:
+ *             TRUE if a notification was armed
+ *             FALSE else
+ *
+ *     Conditions:
+ *             Nothing is locked, memory can be allocated.
+ *             The caller must be able to donate a kobject reference to the port.
+ */
+boolean_t
+ipc_kobject_make_send_lazy_alloc_labeled_port(
+       ipc_port_t              *port_store,
+       ipc_kobject_t           kobject,
+       ipc_kobject_type_t      type,
+       ipc_label_t             label)
+{
+       ipc_port_t port, previous;
+       boolean_t rc = FALSE;
+
+       port = os_atomic_load(port_store, dependency);
+
+       if (!IP_VALID(port)) {
+               port = ipc_kobject_alloc_labeled_port(kobject, type, label,
+                   IPC_KOBJECT_ALLOC_MAKE_SEND | IPC_KOBJECT_ALLOC_NSREQUEST);
+               if (os_atomic_cmpxchgv(port_store, IP_NULL, port, &previous, release)) {
+                       return TRUE;
+               }
+
+               // undo what ipc_kobject_alloc_port() did above
+               port->ip_nsrequest = IP_NULL;
+               port->ip_mscount = 0;
+               port->ip_sorights = 0;
+               port->ip_srights = 0;
+               ip_release(port);
+               ip_release(port);
+               zfree(ipc_kobject_label_zone, port->ip_kolabel);
+               port->ip_object.io_bits &= ~IO_BITS_KOLABEL;
+               port->ip_kolabel = NULL;
+               ipc_port_dealloc_kernel(port);
+
+               port = previous;
+               assert(ip_is_kolabeled(port));
+       }
+
+       ip_lock(port);
+       ipc_port_make_send_locked(port);
+       if (port->ip_srights == 1) {
+               ipc_port_make_sonce_locked(port);
+               assert(port->ip_nsrequest == IP_NULL);
+               port->ip_nsrequest = port;
+               rc = TRUE;
+       }
+       ip_unlock(port);
+
+       return rc;
+}
+
+
 /*
  *     Routine:        ipc_kobject_destroy
  *     Purpose:
  *             Release any kernel object resources associated
  *             with the port, which is being destroyed.
  *
- *             This should only be needed when resources are
- *             associated with a user's port.  In the normal case,
- *             when the kernel is the receiver, the code calling
- *             ipc_port_dealloc_kernel should clean up the resources.
+ *             This path to free object resources should only be
+ *      needed when resources are associated with a user's port.
+ *      In the normal case, when the kernel is the receiver,
+ *      the code calling ipc_port_dealloc_kernel should clean
+ *      up the object resources.
+ *
+ *      Cleans up any kobject label that might be present.
  *     Conditions:
  *             The port is not locked, but it is dead.
  */
@@ -775,11 +905,56 @@ ipc_kobject_destroy(
                host_notify_port_destroy(port);
                break;
 
+       case IKOT_SUID_CRED:
+               suid_cred_destroy(port);
+               break;
+
        default:
                break;
        }
+
+       if (ip_is_kolabeled(port)) {
+               ipc_kobject_label_t labelp = port->ip_kolabel;
+
+               assert(labelp != NULL);
+               assert(ip_is_kobject(port));
+               port->ip_kolabel = NULL;
+               port->ip_object.io_bits &= ~IO_BITS_KOLABEL;
+               zfree(ipc_kobject_label_zone, labelp);
+       }
 }
 
+/*
+ *     Routine:         ipc_kobject_label_check
+ *     Purpose:
+ *             Check to see if the space is allowed to possess a
+ *      right for the given port. In order to qualify, the
+ *      space label must contain all the privileges listed
+ *      in the port/kobject label.
+ *
+ *     Conditions:
+ *             Space is write locked and active.
+ *      Port is locked and active.
+ */
+boolean_t
+ipc_kobject_label_check(
+       ipc_space_t                   space,
+       ipc_port_t                    port,
+       __unused mach_msg_type_name_t msgt_name)
+{
+       ipc_kobject_label_t labelp;
+
+       assert(is_active(space));
+       assert(ip_active(port));
+
+       /* Unlabled ports/kobjects are always allowed */
+       if (!ip_is_kolabeled(port)) {
+               return TRUE;
+       }
+
+       labelp = port->ip_kolabel;
+       return (labelp->ikol_label & space->is_label) == labelp->ikol_label;
+}
 
 boolean_t
 ipc_kobject_notify(
@@ -860,7 +1035,12 @@ ipc_kobject_notify(
                case IKOT_WORK_INTERVAL:
                        work_interval_port_notify(request_header);
                        return TRUE;
+
+               case IKOT_SUID_CRED:
+                       suid_cred_notify(request_header);
+                       return TRUE;
                }
+
                break;
 
        case MACH_NOTIFY_PORT_DELETED:
index 24913d602da931bbd751ce4e4457e8d1599dcf68..03014d1dca0fad60f787e69173e70762b81d7d93 100644 (file)
@@ -133,19 +133,24 @@ typedef natural_t       ipc_kobject_type_t;
 #define IKOT_UEXT_OBJECT                41
 #define IKOT_ARCADE_REG                 42
 
+#define IKOT_SUID_CRED                  48
+
 /*
  * Add new entries here and adjust IKOT_UNKNOWN.
  * Please keep ipc/ipc_object.c:ikot_print_array up to date.
  */
-#define IKOT_UNKNOWN                    43      /* magic catchall       */
+#define IKOT_UNKNOWN                    49      /* magic catchall       */
 #define IKOT_MAX_TYPE   (IKOT_UNKNOWN+1)        /* # of IKOT_ types    */
 
 #ifdef MACH_KERNEL_PRIVATE
 
-/*
- *     Define types of kernel objects that use page lists instead
- *     of entry lists for copyin of out of line memory.
- */
+struct ipc_kobject_label {
+       ipc_label_t   ikol_label;       /* [private] mandatory access label */
+       ipc_kobject_t ikol_kobject;     /* actual kobject address */
+};
+
+/* initialization of kobject subsystem */
+extern void ipc_kobject_init(void);
 
 /* Dispatch a kernel server function */
 extern ipc_kmsg_t ipc_kobject_server(
@@ -174,8 +179,8 @@ __options_decl(ipc_kobject_alloc_options_t, uint32_t, {
        IPC_KOBJECT_ALLOC_NO_GRANT  = 0x00000004,
        /* Make all the send rights immovable */
        IPC_KOBJECT_ALLOC_IMMOVABLE_SEND = 0x00000008,
-       /* Make the port in-transit from the get-go */
-       IPC_KOBJECT_ALLOC_IN_TRANSIT = 0x00000010,
+       /* Add a label structure to the port */
+       IPC_KOBJECT_ALLOC_LABEL = 0x00000010,
 });
 
 /* Allocates a kobject port, never fails */
@@ -184,12 +189,44 @@ extern ipc_port_t ipc_kobject_alloc_port(
        ipc_kobject_type_t          type,
        ipc_kobject_alloc_options_t options);
 
+/* Allocates a kobject port, never fails */
+extern ipc_port_t ipc_kobject_alloc_labeled_port(
+       ipc_kobject_t               kobject,
+       ipc_kobject_type_t          type,
+       ipc_label_t                 label,
+       ipc_kobject_alloc_options_t options);
+
 /* Makes a send right, lazily allocating a kobject port, arming for no-senders, never fails */
 extern boolean_t ipc_kobject_make_send_lazy_alloc_port(
        ipc_port_t                 *port_store,
        ipc_kobject_t               kobject,
        ipc_kobject_type_t          type) __result_use_check;
 
+/* Makes a send right, lazily allocating a kobject port, arming for no-senders, never fails */
+extern boolean_t ipc_kobject_make_send_lazy_alloc_labeled_port(
+       ipc_port_t                 *port_store,
+       ipc_kobject_t               kobject,
+       ipc_kobject_type_t          type,
+       ipc_label_t                 label) __result_use_check;
+
+/* Get the kobject address associated with a port */
+static inline ipc_kobject_t
+ipc_kobject_get(ipc_port_t port)
+{
+       if (ip_is_kobject(port)) {
+               if (ip_is_kolabeled(port)) {
+                       return port->ip_kolabel->ikol_kobject;
+               }
+               return port->ip_kobject;
+       }
+       return 0;
+}
+
+/* Check if a kobject can be copied out to a given space */
+extern boolean_t ipc_kobject_label_check(
+       ipc_space_t space,
+       ipc_port_t port,
+       mach_msg_type_name_t msgt_name);
 
 /* Release any kernel object resources associated with a port */
 extern void ipc_kobject_destroy(
index 722384a00bf235c0e359bc26abbea8e727d1e4ea..6896e3793bf934969f94070948cce0d6bc702b48 100644 (file)
@@ -203,12 +203,27 @@ mach_msg_send_from_kernel_with_options(
        mach_msg_size_t         send_size,
        mach_msg_option_t       option,
        mach_msg_timeout_t      timeout_val)
+{
+       return kernel_mach_msg_send(msg, send_size, option, timeout_val, NULL);
+}
+
+mach_msg_return_t
+kernel_mach_msg_send(
+       mach_msg_header_t       *msg,
+       mach_msg_size_t         send_size,
+       mach_msg_option_t       option,
+       mach_msg_timeout_t      timeout_val,
+       boolean_t               *message_moved)
 {
        ipc_kmsg_t kmsg;
        mach_msg_return_t mr;
 
        KDBG(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_KMSG_INFO) | DBG_FUNC_START);
 
+       if (message_moved) {
+               *message_moved = FALSE;
+       }
+
        mr = ipc_kmsg_get_from_kernel(msg, send_size, &kmsg);
        if (mr != MACH_MSG_SUCCESS) {
                KDBG(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_KMSG_INFO) | DBG_FUNC_END, mr);
@@ -222,6 +237,10 @@ mach_msg_send_from_kernel_with_options(
                return mr;
        }
 
+       if (message_moved) {
+               *message_moved = TRUE;
+       }
+
        /*
         * Until we are sure of its effects, we are disabling
         * importance donation from the kernel-side of user
@@ -313,9 +332,6 @@ mach_msg_send_from_kernel_with_options_legacy(
  *             MACH_RCV_PORT_DIED      The reply port was deallocated.
  */
 
-mach_msg_return_t mach_msg_rpc_from_kernel_body(mach_msg_header_t *msg,
-    mach_msg_size_t send_size, mach_msg_size_t rcv_size, boolean_t legacy);
-
 #if IKM_SUPPORT_LEGACY
 
 #undef mach_msg_rpc_from_kernel
@@ -331,9 +347,8 @@ mach_msg_rpc_from_kernel(
        mach_msg_size_t         send_size,
        mach_msg_size_t         rcv_size)
 {
-       return mach_msg_rpc_from_kernel_body(msg, send_size, rcv_size, TRUE);
+       return kernel_mach_msg_rpc(msg, send_size, rcv_size, TRUE, NULL);
 }
-
 #endif /* IKM_SUPPORT_LEGACY */
 
 mach_msg_return_t
@@ -342,18 +357,19 @@ mach_msg_rpc_from_kernel_proper(
        mach_msg_size_t         send_size,
        mach_msg_size_t         rcv_size)
 {
-       return mach_msg_rpc_from_kernel_body(msg, send_size, rcv_size, FALSE);
+       return kernel_mach_msg_rpc(msg, send_size, rcv_size, FALSE, NULL);
 }
 
 mach_msg_return_t
-mach_msg_rpc_from_kernel_body(
+kernel_mach_msg_rpc(
        mach_msg_header_t       *msg,
        mach_msg_size_t         send_size,
        mach_msg_size_t         rcv_size,
 #if !IKM_SUPPORT_LEGACY
        __unused
 #endif
-       boolean_t           legacy)
+       boolean_t           legacy,
+       boolean_t           *message_moved)
 {
        thread_t self = current_thread();
        ipc_port_t reply;
@@ -365,6 +381,10 @@ mach_msg_rpc_from_kernel_body(
 
        KDBG(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_KMSG_INFO) | DBG_FUNC_START);
 
+       if (message_moved) {
+               *message_moved = FALSE;
+       }
+
        mr = ipc_kmsg_get_from_kernel(msg, send_size, &kmsg);
        if (mr != MACH_MSG_SUCCESS) {
                KDBG(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_KMSG_INFO) | DBG_FUNC_END, mr);
@@ -401,6 +421,10 @@ mach_msg_rpc_from_kernel_body(
                return mr;
        }
 
+       if (message_moved) {
+               *message_moved = TRUE;
+       }
+
        /*
         * respect the thread's SEND_IMPORTANCE option to force importance
         * donation from the kernel-side of user threads
@@ -1045,7 +1069,7 @@ convert_port_to_mig_object(
         * query it to get a reference to the desired interface.
         */
        ppv = NULL;
-       mig_object = (mig_object_t)port->ip_kobject;
+       mig_object = (mig_object_t) ip_get_kobject(port);
        mig_object->pVtbl->QueryInterface((IMIGObject *)mig_object, iid, &ppv);
        ip_unlock(port);
        return (mig_object_t)ppv;
@@ -1068,7 +1092,7 @@ mig_object_no_senders(
        assert(IKOT_MIG == ip_kotype(port));
 
        /* consume the reference donated by convert_mig_object_to_port */
-       mig_object_deallocate((mig_object_t)port->ip_kobject);
+       mig_object_deallocate((mig_object_t) ip_get_kobject(port));
 }
 
 /*
index a4fad67e18dd2356c0d0a98b70488f97358dd4ac..48abc259163605ac87dd23462fa6a87ac2bd8c37 100644 (file)
@@ -155,6 +155,15 @@ mach_msg_rpc_from_kernel_proper(
 
 #define mach_msg_rpc_from_kernel mach_msg_rpc_from_kernel_proper
 
+#ifdef XNU_KERNEL_PRIVATE
+mach_msg_return_t kernel_mach_msg_rpc(
+       mach_msg_header_t               *msg,
+       mach_msg_size_t                     send_size,
+       mach_msg_size_t                     rcv_size,
+       boolean_t                           legacy,
+       boolean_t                           *message_moved);
+#endif /* XNU_KERNEL_PRIVATE */
+
 extern void
 mach_msg_destroy_from_kernel_proper(
        mach_msg_header_t       *msg);
@@ -168,6 +177,13 @@ extern mach_msg_return_t mach_msg_send_from_kernel_with_options_legacy(
        mach_msg_option_t       option,
        mach_msg_timeout_t      timeout_val);
 
+extern mach_msg_return_t kernel_mach_msg_send(
+       mach_msg_header_t       *msg,
+       mach_msg_size_t         send_size,
+       mach_msg_option_t       option,
+       mach_msg_timeout_t      timeout_val,
+       boolean_t               *message_moved);
+
 extern mach_msg_return_t mach_msg_send_from_kernel_with_options(
        mach_msg_header_t       *msg,
        mach_msg_size_t         send_size,
@@ -188,9 +204,6 @@ __END_DECLS
 
 extern void mach_msg_receive_continue(void);
 
-/* Initialize kernel server dispatch table */
-extern void             mig_init(void);
-
 /*
  * Kernel implementation of the MIG object base class
  *
index 16c3c5a51b560b62f62d4a504a338001e180db1a..aaec28a5d4bd71b835a908f84ff61d0461085519 100644 (file)
@@ -82,7 +82,7 @@ fileport_port_to_fileglob(ipc_port_t port)
 
        ip_lock(port);
        if (ip_active(port) && IKOT_FILEPORT == ip_kotype(port)) {
-               fg = (void *)port->ip_kobject;
+               fg = (void *) ip_get_kobject(port);
        }
        ip_unlock(port);
 
@@ -112,7 +112,7 @@ fileport_notify(mach_msg_header_t *msg)
 
        ip_lock(port);
 
-       fg = (struct fileglob *)port->ip_kobject;
+       fg = (struct fileglob *) ip_get_kobject(port);
 
        if (!ip_active(port)) {
                panic("Inactive port passed to fileport_notify()\n");
index 7f65888d55716088ecc58deff1108b776a8edc81..cd1dd1afdcf0e2e3082bbcbe3631088b38b207eb 100644 (file)
@@ -108,7 +108,7 @@ convert_port_to_semaphore(ipc_port_t port)
                 */
                if (ip_kotype(port) == IKOT_SEMAPHORE) {
                        require_ip_active(port);
-                       semaphore = (semaphore_t) port->ip_kobject;
+                       semaphore = (semaphore_t) ip_get_kobject(port);
                        semaphore_reference(semaphore);
                        return semaphore;
                }
@@ -169,7 +169,7 @@ semaphore_notify(mach_msg_header_t *msg)
        require_ip_active(port);
        assert(IKOT_SEMAPHORE == ip_kotype(port));
 
-       semaphore_dereference((semaphore_t)port->ip_kobject);
+       semaphore_dereference((semaphore_t) ip_get_kobject(port));
 }
 
 lock_set_t
index 7fa53a8aaf95656ecb4315ca94b88ce2587852fe..44c93bae2049454ed9e04b87a9327668efc27e4f 100644 (file)
@@ -136,7 +136,7 @@ ipc_task_init(
        int i;
 
 
-       kr = ipc_space_create(&ipc_table_entries[0], &space);
+       kr = ipc_space_create(&ipc_table_entries[0], IPC_LABEL_NONE, &space);
        if (kr != KERN_SUCCESS) {
                panic("ipc_task_init");
        }
@@ -1516,7 +1516,7 @@ convert_port_to_locked_task(ipc_port_t port)
                        ip_unlock(port);
                        return TASK_NULL;
                }
-               task = (task_t) port->ip_kobject;
+               task = (task_t) ip_get_kobject(port);
                assert(task != TASK_NULL);
 
                if (task_conversion_eval(ct, task)) {
@@ -1562,7 +1562,7 @@ convert_port_to_locked_task_inspect(ipc_port_t port)
                        ip_unlock(port);
                        return TASK_INSPECT_NULL;
                }
-               task = (task_inspect_t)port->ip_kobject;
+               task = (task_inspect_t) ip_get_kobject(port);
                assert(task != TASK_INSPECT_NULL);
                /*
                 * Normal lock ordering puts task_lock() before ip_lock().
@@ -1592,7 +1592,7 @@ convert_port_to_task_locked(
 
        if (ip_kotype(port) == IKOT_TASK) {
                task_t ct = current_task();
-               task = (task_t)port->ip_kobject;
+               task = (task_t) ip_get_kobject(port);
                assert(task != TASK_NULL);
 
                if (task_conversion_eval(ct, task)) {
@@ -1674,7 +1674,7 @@ convert_port_to_task_name(
                if (ip_active(port) &&
                    (ip_kotype(port) == IKOT_TASK ||
                    ip_kotype(port) == IKOT_TASK_NAME)) {
-                       task = (task_name_t)port->ip_kobject;
+                       task = (task_name_t) ip_get_kobject(port);
                        assert(task != TASK_NAME_NULL);
 
                        task_reference_internal(task);
@@ -1696,7 +1696,7 @@ convert_port_to_task_inspect_locked(
        require_ip_active(port);
 
        if (ip_kotype(port) == IKOT_TASK) {
-               task = (task_inspect_t)port->ip_kobject;
+               task = (task_inspect_t) ip_get_kobject(port);
                assert(task != TASK_INSPECT_NULL);
 
                task_reference_internal(task);
@@ -1751,7 +1751,7 @@ convert_port_to_task_suspension_token(
 
                if (ip_active(port) &&
                    ip_kotype(port) == IKOT_TASK_RESUME) {
-                       task = (task_suspension_token_t)port->ip_kobject;
+                       task = (task_suspension_token_t) ip_get_kobject(port);
                        assert(task != TASK_NULL);
 
                        task_reference_internal(task);
@@ -1885,7 +1885,7 @@ convert_port_to_thread_locked(
        require_ip_active(port);
 
        if (ip_kotype(port) == IKOT_THREAD) {
-               thread = (thread_t)port->ip_kobject;
+               thread = (thread_t) ip_get_kobject(port);
                assert(thread != THREAD_NULL);
 
                if (options & PORT_TO_THREAD_NOT_CURRENT_THREAD) {
@@ -1948,7 +1948,7 @@ convert_port_to_thread_inspect(
 
                if (ip_active(port) &&
                    ip_kotype(port) == IKOT_THREAD) {
-                       thread = (thread_inspect_t)port->ip_kobject;
+                       thread = (thread_inspect_t) ip_get_kobject(port);
                        assert(thread != THREAD_INSPECT_NULL);
                        thread_reference_internal((thread_t)thread);
                }
index 26176bc3615cb853ab69bfc51c3801efa25fe2d7..31a5ec30f1c99e3dac20e5839a65d490652e357c 100644 (file)
@@ -859,6 +859,7 @@ void
                /* if size was too large for a zone, then use kmem_free */
 
                vm_map_t alloc_map = kernel_map;
+               size = round_page(size);
 
                if ((((vm_offset_t) data) >= kalloc_map_min) && (((vm_offset_t) data) <= kalloc_map_max)) {
                        alloc_map = kalloc_map;
index 7ff3981a7c10b59e76a7491684e2bdd7ff6a555a..cdb62018c3b9fd9fafe74bd2ac9bb002a18f6716 100644 (file)
@@ -62,6 +62,7 @@
 #include <vm/vm_pageout.h>
 #include <vm/vm_fault.h>
 #include <vm/vm_shared_region.h>
+#include <vm/vm_compressor.h>
 #include <libkern/OSKextLibPrivate.h>
 
 #if defined(__x86_64__)
@@ -2752,6 +2753,9 @@ stackshot_thread_wait_owner_info(thread_t thread, thread_waitinfo_t *waitinfo)
        case kThreadWaitSleepWithInheritor:
                kdp_sleep_with_inheritor_find_owner(thread->waitq, thread->wait_event, waitinfo);
                break;
+       case kThreadWaitCompressor:
+               kdp_compressor_busy_find_owner(thread->wait_event, waitinfo);
+               break;
        default:
                waitinfo->owner = 0;
                waitinfo->context = 0;
index e7780c2e32ad49e8d5b2a5f6af05c1599696db83..730d876b1264417b12730981b613f49ddb771209 100644 (file)
@@ -45,6 +45,7 @@
 #include <kern/lock_group.h>
 #include <kern/mk_timer.h>
 #include <kern/thread_call.h>
+#include <ipc/ipc_kmsg.h>
 
 static zone_t           mk_timer_zone;
 
@@ -62,16 +63,22 @@ mach_port_name_t
 mk_timer_create_trap(
        __unused struct mk_timer_create_trap_args *args)
 {
-       mk_timer_t                      timer;
-       ipc_space_t                     myspace = current_space();
-       mach_port_name_t        name = MACH_PORT_NULL;
-       ipc_port_t                      port;
-       kern_return_t           result;
-
+       mk_timer_t            timer;
+       ipc_space_t           myspace = current_space();
+       mach_port_name_t      name = MACH_PORT_NULL;
+       ipc_port_init_flags_t init_flags;
+       ipc_port_t            port;
+       kern_return_t         result;
+
+       /* Allocate and initialize local state of a timer object */
        timer = (mk_timer_t)zalloc(mk_timer_zone);
        if (timer == NULL) {
                return MACH_PORT_NULL;
        }
+       simple_lock_init(&timer->lock, 0);
+       thread_call_setup(&timer->call_entry, mk_timer_expire, timer);
+       timer->is_armed = timer->is_dead = FALSE;
+       timer->active = 0;
 
        /* Pre-allocate a kmsg for the timer messages */
        ipc_kmsg_t kmsg;
@@ -81,32 +88,24 @@ mk_timer_create_trap(
                return MACH_PORT_NULL;
        }
 
-       /* Allocate an in-transit kobject port with a send right */
-       ipc_kobject_alloc_options_t options;
-       options = (IPC_KOBJECT_ALLOC_IN_TRANSIT | IPC_KOBJECT_ALLOC_MAKE_SEND);
-       port = ipc_kobject_alloc_port((ipc_kobject_t)timer, IKOT_TIMER, options);
-       assert(port != IP_NULL);
+       init_flags = IPC_PORT_INIT_MESSAGE_QUEUE;
+       result = ipc_port_alloc(myspace, init_flags, &name, &port);
+       if (result != KERN_SUCCESS) {
+               zfree(mk_timer_zone, timer);
+               ipc_kmsg_free(kmsg);
+               return MACH_PORT_NULL;
+       }
 
-       /* Associate the kmsg */
+       /* Associate the pre-allocated kmsg with the port */
        ipc_kmsg_set_prealloc(kmsg, port);
 
-       /* Initialize the timer object and bind port to it */
-       simple_lock_init(&timer->lock, 0);
-       thread_call_setup(&timer->call_entry, mk_timer_expire, timer);
-       timer->is_armed = timer->is_dead = FALSE;
-       timer->active = 0;
-       timer->port = port;
+       /* port locked, receive right at user-space */
+       ipc_kobject_set_atomically(port, (ipc_kobject_t)timer, IKOT_TIMER);
 
-       /* Copyout the receive right for the timer port to user-space */
-       current_thread()->ith_knote = ITH_KNOTE_NULL;
-       result = ipc_object_copyout(myspace, ip_to_object(port),
-           MACH_MSG_TYPE_MOVE_RECEIVE,
-           NULL, NULL, &name);
-       if (result != KERN_SUCCESS) {
-               ipc_object_destroy(ip_to_object(port), MACH_MSG_TYPE_MOVE_RECEIVE);
-               /* should trigger mk_timer_port_destroy() call */
-               return MACH_PORT_NULL;
-       }
+       /* make a (naked) send right for the timer to keep */
+       timer->port = ipc_port_make_send_locked(port);
+
+       ip_unlock(port);
 
        return name;
 }
@@ -119,7 +118,7 @@ mk_timer_port_destroy(
 
        ip_lock(port);
        if (ip_kotype(port) == IKOT_TIMER) {
-               timer = (mk_timer_t)port->ip_kobject;
+               timer = (mk_timer_t) ip_get_kobject(port);
                assert(timer != NULL);
                ipc_kobject_set_atomically(port, IKO_NULL, IKOT_NONE);
                simple_lock(&timer->lock, LCK_GRP_NULL);
@@ -274,7 +273,7 @@ mk_timer_arm_trap_internal(mach_port_name_t name, uint64_t expire_time, uint64_t
        }
 
        if (ip_kotype(port) == IKOT_TIMER) {
-               timer = (mk_timer_t)port->ip_kobject;
+               timer = (mk_timer_t) ip_get_kobject(port);
                assert(timer != NULL);
 
                simple_lock(&timer->lock, LCK_GRP_NULL);
@@ -358,7 +357,7 @@ mk_timer_cancel_trap(
        }
 
        if (ip_kotype(port) == IKOT_TIMER) {
-               timer = (mk_timer_t)port->ip_kobject;
+               timer = (mk_timer_t) ip_get_kobject(port);
                assert(timer != NULL);
                simple_lock(&timer->lock, LCK_GRP_NULL);
                assert(timer->port == port);
index d8a808f60166631f31fefd01b30d935be027b894..172efd30313bb66a31ca10124b1b39ae8d7d6e10 100644 (file)
@@ -67,12 +67,23 @@ static uint64_t sched_clutch_root_bucket_deadline_calculate(sched_clutch_root_bu
 static void sched_clutch_root_bucket_deadline_update(sched_clutch_root_bucket_t, sched_clutch_root_t, uint64_t);
 static int sched_clutch_root_bucket_pri_compare(sched_clutch_root_bucket_t, sched_clutch_root_bucket_t);
 
+/* Options for clutch bucket ordering in the runq */
+__options_decl(sched_clutch_bucket_options_t, uint32_t, {
+       SCHED_CLUTCH_BUCKET_OPTIONS_NONE        = 0x0,
+       /* Round robin clutch bucket on thread removal */
+       SCHED_CLUTCH_BUCKET_OPTIONS_SAMEPRI_RR  = 0x1,
+       /* Insert clutch bucket at head (for thread preemption) */
+       SCHED_CLUTCH_BUCKET_OPTIONS_HEADQ       = 0x2,
+       /* Insert clutch bucket at tail (default) */
+       SCHED_CLUTCH_BUCKET_OPTIONS_TAILQ       = 0x4,
+});
+
 /* Clutch bucket level hierarchy management */
-static void sched_clutch_bucket_hierarchy_insert(sched_clutch_root_t, sched_clutch_bucket_t, sched_bucket_t, uint64_t);
-static void sched_clutch_bucket_hierarchy_remove(sched_clutch_root_t, sched_clutch_bucket_t, sched_bucket_t, uint64_t);
-static boolean_t sched_clutch_bucket_runnable(sched_clutch_bucket_t, sched_clutch_root_t, uint64_t);
-static boolean_t sched_clutch_bucket_update(sched_clutch_bucket_t, sched_clutch_root_t, uint64_t);
-static void sched_clutch_bucket_empty(sched_clutch_bucket_t, sched_clutch_root_t, uint64_t);
+static void sched_clutch_bucket_hierarchy_insert(sched_clutch_root_t, sched_clutch_bucket_t, sched_bucket_t, uint64_t, sched_clutch_bucket_options_t);
+static void sched_clutch_bucket_hierarchy_remove(sched_clutch_root_t, sched_clutch_bucket_t, sched_bucket_t, uint64_t, sched_clutch_bucket_options_t);
+static boolean_t sched_clutch_bucket_runnable(sched_clutch_bucket_t, sched_clutch_root_t, uint64_t, sched_clutch_bucket_options_t);
+static boolean_t sched_clutch_bucket_update(sched_clutch_bucket_t, sched_clutch_root_t, uint64_t, sched_clutch_bucket_options_t);
+static void sched_clutch_bucket_empty(sched_clutch_bucket_t, sched_clutch_root_t, uint64_t, sched_clutch_bucket_options_t);
 
 static void sched_clutch_bucket_cpu_usage_update(sched_clutch_bucket_t, uint64_t);
 static void sched_clutch_bucket_cpu_blocked_update(sched_clutch_bucket_t, uint64_t);
@@ -87,7 +98,7 @@ static void sched_clutch_bucket_timeshare_update(sched_clutch_bucket_t);
 static boolean_t sched_thread_sched_pri_promoted(thread_t);
 /* Clutch membership management */
 static boolean_t sched_clutch_thread_insert(sched_clutch_root_t, thread_t, integer_t);
-static void sched_clutch_thread_remove(sched_clutch_root_t, thread_t, uint64_t);
+static void sched_clutch_thread_remove(sched_clutch_root_t, thread_t, uint64_t, sched_clutch_bucket_options_t);
 static thread_t sched_clutch_thread_highest(sched_clutch_root_t);
 
 /* Clutch properties updates */
@@ -317,6 +328,133 @@ sched_clutch_root_init(
        }
 }
 
+/*
+ * Clutch Bucket Runqueues
+ *
+ * The clutch buckets are maintained in a runq at the root bucket level. The
+ * runq organization allows clutch buckets to be ordered based on various
+ * factors such as:
+ *
+ * - Clutch buckets are round robin'ed at the same priority level when a
+ *   thread is selected from a clutch bucket. This prevents a clutch bucket
+ *   from starving out other clutch buckets at the same priority.
+ *
+ * - Clutch buckets are inserted at the head when it becomes runnable due to
+ *   thread preemption. This allows threads that were preempted to maintain
+ *   their order in the queue.
+ *
+ */
+
+/*
+ * sched_clutch_bucket_runq_init()
+ *
+ * Initialize a clutch bucket runq.
+ */
+static void
+sched_clutch_bucket_runq_init(
+       sched_clutch_bucket_runq_t clutch_buckets_rq)
+{
+       clutch_buckets_rq->scbrq_highq = NOPRI;
+       for (uint8_t i = 0; i < BITMAP_LEN(NRQS); i++) {
+               clutch_buckets_rq->scbrq_bitmap[i] = 0;
+       }
+       clutch_buckets_rq->scbrq_count = 0;
+       for (int i = 0; i < NRQS; i++) {
+               circle_queue_init(&clutch_buckets_rq->scbrq_queues[i]);
+       }
+}
+
+/*
+ * sched_clutch_bucket_runq_empty()
+ *
+ * Returns if a clutch bucket runq is empty.
+ */
+static boolean_t
+sched_clutch_bucket_runq_empty(
+       sched_clutch_bucket_runq_t clutch_buckets_rq)
+{
+       return clutch_buckets_rq->scbrq_count == 0;
+}
+
+/*
+ * sched_clutch_bucket_runq_peek()
+ *
+ * Returns the highest priority clutch bucket in the runq.
+ */
+static sched_clutch_bucket_t
+sched_clutch_bucket_runq_peek(
+       sched_clutch_bucket_runq_t clutch_buckets_rq)
+{
+       if (clutch_buckets_rq->scbrq_count > 0) {
+               circle_queue_t queue = &clutch_buckets_rq->scbrq_queues[clutch_buckets_rq->scbrq_highq];
+               return cqe_queue_first(queue, struct sched_clutch_bucket, scb_runqlink);
+       } else {
+               return NULL;
+       }
+}
+
+/*
+ * sched_clutch_bucket_runq_enqueue()
+ *
+ * Enqueue a clutch bucket into the runq based on the options passed in.
+ */
+static void
+sched_clutch_bucket_runq_enqueue(
+       sched_clutch_bucket_runq_t clutch_buckets_rq,
+       sched_clutch_bucket_t clutch_bucket,
+       sched_clutch_bucket_options_t options)
+{
+       circle_queue_t queue = &clutch_buckets_rq->scbrq_queues[clutch_bucket->scb_priority];
+       if (circle_queue_empty(queue)) {
+               circle_enqueue_tail(queue, &clutch_bucket->scb_runqlink);
+               bitmap_set(clutch_buckets_rq->scbrq_bitmap, clutch_bucket->scb_priority);
+               if (clutch_bucket->scb_priority > clutch_buckets_rq->scbrq_highq) {
+                       clutch_buckets_rq->scbrq_highq = clutch_bucket->scb_priority;
+               }
+       } else {
+               if (options & SCHED_CLUTCH_BUCKET_OPTIONS_HEADQ) {
+                       circle_enqueue_head(queue, &clutch_bucket->scb_runqlink);
+               } else {
+                       /*
+                        * Default behavior (handles SCHED_CLUTCH_BUCKET_OPTIONS_TAILQ &
+                        * SCHED_CLUTCH_BUCKET_OPTIONS_NONE)
+                        */
+                       circle_enqueue_tail(queue, &clutch_bucket->scb_runqlink);
+               }
+       }
+       clutch_buckets_rq->scbrq_count++;
+}
+
+/*
+ * sched_clutch_bucket_runq_remove()
+ *
+ * Remove a clutch bucket from the runq.
+ */
+static void
+sched_clutch_bucket_runq_remove(
+       sched_clutch_bucket_runq_t clutch_buckets_rq,
+       sched_clutch_bucket_t clutch_bucket)
+{
+       circle_queue_t queue = &clutch_buckets_rq->scbrq_queues[clutch_bucket->scb_priority];
+       circle_dequeue(queue, &clutch_bucket->scb_runqlink);
+       assert(clutch_buckets_rq->scbrq_count > 0);
+       clutch_buckets_rq->scbrq_count--;
+       if (circle_queue_empty(queue)) {
+               bitmap_clear(clutch_buckets_rq->scbrq_bitmap, clutch_bucket->scb_priority);
+               clutch_buckets_rq->scbrq_highq = bitmap_first(clutch_buckets_rq->scbrq_bitmap, NRQS);
+       }
+}
+
+static void
+sched_clutch_bucket_runq_rotate(
+       sched_clutch_bucket_runq_t clutch_buckets_rq,
+       sched_clutch_bucket_t clutch_bucket)
+{
+       circle_queue_t queue = &clutch_buckets_rq->scbrq_queues[clutch_bucket->scb_priority];
+       assert(clutch_bucket == cqe_queue_first(queue, struct sched_clutch_bucket, scb_runqlink));
+       circle_queue_rotate_head_forward(queue);
+}
+
 /*
  * sched_clutch_root_bucket_init()
  *
@@ -328,7 +466,7 @@ sched_clutch_root_bucket_init(
        sched_bucket_t bucket)
 {
        root_bucket->scrb_bucket = bucket;
-       priority_queue_init(&root_bucket->scrb_clutch_buckets, PRIORITY_QUEUE_BUILTIN_KEY | PRIORITY_QUEUE_MAX_HEAP);
+       sched_clutch_bucket_runq_init(&root_bucket->scrb_clutch_buckets);
        priority_queue_entry_init(&root_bucket->scrb_pqlink);
        root_bucket->scrb_deadline = SCHED_CLUTCH_INVALID_TIME_64;
        root_bucket->scrb_warped_deadline = 0;
@@ -738,7 +876,6 @@ sched_clutch_bucket_init(
 
        clutch_bucket->scb_interactivity_ts = 0;
        clutch_bucket->scb_blocked_ts = SCHED_CLUTCH_BUCKET_BLOCKED_TS_INVALID;
-       priority_queue_entry_init(&clutch_bucket->scb_pqlink);
        clutch_bucket->scb_clutch = clutch;
        clutch_bucket->scb_root = NULL;
        priority_queue_init(&clutch_bucket->scb_clutchpri_prioq, PRIORITY_QUEUE_BUILTIN_KEY | PRIORITY_QUEUE_MAX_HEAP);
@@ -818,7 +955,8 @@ sched_clutch_bucket_hierarchy_insert(
        sched_clutch_root_t root_clutch,
        sched_clutch_bucket_t clutch_bucket,
        sched_bucket_t bucket,
-       uint64_t timestamp)
+       uint64_t timestamp,
+       sched_clutch_bucket_options_t options)
 {
        sched_clutch_hierarchy_locked_assert(root_clutch);
        if (bucket > TH_BUCKET_FIXPRI) {
@@ -835,12 +973,12 @@ sched_clutch_bucket_hierarchy_insert(
        sched_clutch_root_bucket_t root_bucket = &root_clutch->scr_buckets[bucket];
 
        /* If this is the first clutch bucket in the root bucket, insert the root bucket into the root priority queue */
-       if (priority_queue_empty(&root_bucket->scrb_clutch_buckets)) {
+       if (sched_clutch_bucket_runq_empty(&root_bucket->scrb_clutch_buckets)) {
                sched_clutch_root_bucket_runnable(root_bucket, root_clutch, timestamp);
        }
 
-       /* Insert the clutch bucket into the root bucket priority queue */
-       priority_queue_insert(&root_bucket->scrb_clutch_buckets, &clutch_bucket->scb_pqlink, clutch_bucket->scb_priority, PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE);
+       /* Insert the clutch bucket into the root bucket run queue with order based on options */
+       sched_clutch_bucket_runq_enqueue(&root_bucket->scrb_clutch_buckets, clutch_bucket, options);
        os_atomic_store(&clutch_bucket->scb_root, root_clutch, relaxed);
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_TG_BUCKET_STATE) | DBG_FUNC_NONE,
            thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, SCHED_CLUTCH_STATE_RUNNABLE, clutch_bucket->scb_priority, 0);
@@ -856,7 +994,8 @@ sched_clutch_bucket_hierarchy_remove(
        sched_clutch_root_t root_clutch,
        sched_clutch_bucket_t clutch_bucket,
        sched_bucket_t bucket,
-       uint64_t timestamp)
+       uint64_t timestamp,
+       __unused sched_clutch_bucket_options_t options)
 {
        sched_clutch_hierarchy_locked_assert(root_clutch);
        if (bucket > TH_BUCKET_FIXPRI) {
@@ -873,14 +1012,14 @@ sched_clutch_bucket_hierarchy_remove(
        sched_clutch_root_bucket_t root_bucket = &root_clutch->scr_buckets[bucket];
 
        /* Remove the clutch bucket from the root bucket priority queue */
-       priority_queue_remove(&root_bucket->scrb_clutch_buckets, &clutch_bucket->scb_pqlink, PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE);
+       sched_clutch_bucket_runq_remove(&root_bucket->scrb_clutch_buckets, clutch_bucket);
        os_atomic_store(&clutch_bucket->scb_root, NULL, relaxed);
        clutch_bucket->scb_blocked_ts = timestamp;
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_TG_BUCKET_STATE) | DBG_FUNC_NONE,
            thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, SCHED_CLUTCH_STATE_EMPTY, 0, 0);
 
        /* If the root bucket priority queue is now empty, remove it from the root priority queue */
-       if (priority_queue_empty(&root_bucket->scrb_clutch_buckets)) {
+       if (sched_clutch_bucket_runq_empty(&root_bucket->scrb_clutch_buckets)) {
                sched_clutch_root_bucket_empty(root_bucket, root_clutch, timestamp);
        }
 }
@@ -1030,10 +1169,10 @@ static sched_clutch_bucket_t
 sched_clutch_root_bucket_highest_clutch_bucket(
        sched_clutch_root_bucket_t root_bucket)
 {
-       if (priority_queue_empty(&root_bucket->scrb_clutch_buckets)) {
+       if (sched_clutch_bucket_runq_empty(&root_bucket->scrb_clutch_buckets)) {
                return NULL;
        }
-       return priority_queue_max(&root_bucket->scrb_clutch_buckets, struct sched_clutch_bucket, scb_pqlink);
+       return sched_clutch_bucket_runq_peek(&root_bucket->scrb_clutch_buckets);
 }
 
 /*
@@ -1047,12 +1186,13 @@ static boolean_t
 sched_clutch_bucket_runnable(
        sched_clutch_bucket_t clutch_bucket,
        sched_clutch_root_t root_clutch,
-       uint64_t timestamp)
+       uint64_t timestamp,
+       sched_clutch_bucket_options_t options)
 {
        sched_clutch_hierarchy_locked_assert(root_clutch);
        sched_clutch_bucket_cpu_blocked_update(clutch_bucket, timestamp);
        clutch_bucket->scb_priority = sched_clutch_bucket_pri_calculate(clutch_bucket, timestamp);
-       sched_clutch_bucket_hierarchy_insert(root_clutch, clutch_bucket, clutch_bucket->scb_bucket, timestamp);
+       sched_clutch_bucket_hierarchy_insert(root_clutch, clutch_bucket, clutch_bucket->scb_bucket, timestamp, options);
        /* Update the timesharing properties of this clutch_bucket; also done every sched_tick */
        sched_clutch_bucket_timeshare_update(clutch_bucket);
        int16_t root_old_pri = root_clutch->scr_priority;
@@ -1063,32 +1203,35 @@ sched_clutch_bucket_runnable(
 /*
  * sched_clutch_bucket_update()
  *
- * Update the clutch_bucket's position in the hierarchy based on whether
- * the newly runnable thread changes its priority. Also update the root
- * priority accordingly.
+ * Update the clutch_bucket's position in the hierarchy. This routine is
+ * called when a new thread is inserted or removed from a runnable clutch
+ * bucket. The options specify some properties about the clutch bucket
+ * insertion order into the clutch bucket runq.
  */
 static boolean_t
 sched_clutch_bucket_update(
        sched_clutch_bucket_t clutch_bucket,
        sched_clutch_root_t root_clutch,
-       uint64_t timestamp)
+       uint64_t timestamp,
+       sched_clutch_bucket_options_t options)
 {
        sched_clutch_hierarchy_locked_assert(root_clutch);
        uint64_t new_pri = sched_clutch_bucket_pri_calculate(clutch_bucket, timestamp);
+       sched_clutch_bucket_runq_t bucket_runq = &root_clutch->scr_buckets[clutch_bucket->scb_bucket].scrb_clutch_buckets;
        if (new_pri == clutch_bucket->scb_priority) {
+               /*
+                * If SCHED_CLUTCH_BUCKET_OPTIONS_SAMEPRI_RR is specified, move the clutch bucket
+                * to the end of the runq. Typically used when a thread is selected for execution
+                * from a clutch bucket.
+                */
+               if (options & SCHED_CLUTCH_BUCKET_OPTIONS_SAMEPRI_RR) {
+                       sched_clutch_bucket_runq_rotate(bucket_runq, clutch_bucket);
+               }
                return false;
        }
-       struct priority_queue *bucket_prioq = &root_clutch->scr_buckets[clutch_bucket->scb_bucket].scrb_clutch_buckets;
-
-       if (new_pri < clutch_bucket->scb_priority) {
-               clutch_bucket->scb_priority = new_pri;
-               priority_queue_entry_decrease(bucket_prioq, &clutch_bucket->scb_pqlink,
-                   clutch_bucket->scb_priority, PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE);
-       } else {
-               clutch_bucket->scb_priority = new_pri;
-               priority_queue_entry_increase(bucket_prioq, &clutch_bucket->scb_pqlink,
-                   clutch_bucket->scb_priority, PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE);
-       }
+       sched_clutch_bucket_runq_remove(bucket_runq, clutch_bucket);
+       clutch_bucket->scb_priority = new_pri;
+       sched_clutch_bucket_runq_enqueue(bucket_runq, clutch_bucket, options);
 
        int16_t root_old_pri = root_clutch->scr_priority;
        sched_clutch_root_pri_update(root_clutch);
@@ -1106,10 +1249,11 @@ static void
 sched_clutch_bucket_empty(
        sched_clutch_bucket_t clutch_bucket,
        sched_clutch_root_t root_clutch,
-       uint64_t timestamp)
+       uint64_t timestamp,
+       sched_clutch_bucket_options_t options)
 {
        sched_clutch_hierarchy_locked_assert(root_clutch);
-       sched_clutch_bucket_hierarchy_remove(root_clutch, clutch_bucket, clutch_bucket->scb_bucket, timestamp);
+       sched_clutch_bucket_hierarchy_remove(root_clutch, clutch_bucket, clutch_bucket->scb_bucket, timestamp, options);
        clutch_bucket->scb_priority = sched_clutch_bucket_pri_calculate(clutch_bucket, timestamp);
        sched_clutch_root_pri_update(root_clutch);
 }
@@ -1407,17 +1551,16 @@ sched_clutch_thread_insert(
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_THREAD_STATE) | DBG_FUNC_NONE,
            thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, thread_tid(thread), SCHED_CLUTCH_STATE_RUNNABLE, 0);
 
-       /* Enqueue the clutch into the hierarchy (if needed) and update properties */
+       /* Enqueue the clutch into the hierarchy (if needed) and update properties; pick the insertion order based on thread options */
+       sched_clutch_bucket_options_t scb_options = (options & SCHED_HEADQ) ? SCHED_CLUTCH_BUCKET_OPTIONS_HEADQ : SCHED_CLUTCH_BUCKET_OPTIONS_TAILQ;
        if (clutch_bucket->scb_thr_count == 0) {
                sched_clutch_thr_count_inc(&clutch_bucket->scb_thr_count);
                sched_clutch_thr_count_inc(&root_clutch->scr_thr_count);
-               /* Insert the newly runnable clutch bucket into the hierarchy */
-               result = sched_clutch_bucket_runnable(clutch_bucket, root_clutch, current_timestamp);
+               result = sched_clutch_bucket_runnable(clutch_bucket, root_clutch, current_timestamp, scb_options);
        } else {
                sched_clutch_thr_count_inc(&clutch_bucket->scb_thr_count);
                sched_clutch_thr_count_inc(&root_clutch->scr_thr_count);
-               /* Update the position of the clutch bucket in the hierarchy */
-               result = sched_clutch_bucket_update(clutch_bucket, root_clutch, current_timestamp);
+               result = sched_clutch_bucket_update(clutch_bucket, root_clutch, current_timestamp, scb_options);
        }
        return result;
 }
@@ -1433,7 +1576,8 @@ static void
 sched_clutch_thread_remove(
        sched_clutch_root_t root_clutch,
        thread_t thread,
-       uint64_t current_timestamp)
+       uint64_t current_timestamp,
+       sched_clutch_bucket_options_t options)
 {
        sched_clutch_hierarchy_locked_assert(root_clutch);
        sched_clutch_t clutch = sched_clutch_for_thread(thread);
@@ -1460,9 +1604,9 @@ sched_clutch_thread_remove(
 
        /* Remove the clutch from hierarchy (if needed) and update properties */
        if (clutch_bucket->scb_thr_count == 0) {
-               sched_clutch_bucket_empty(clutch_bucket, root_clutch, current_timestamp);
+               sched_clutch_bucket_empty(clutch_bucket, root_clutch, current_timestamp, options);
        } else {
-               sched_clutch_bucket_update(clutch_bucket, root_clutch, current_timestamp);
+               sched_clutch_bucket_update(clutch_bucket, root_clutch, current_timestamp, options);
        }
 }
 
@@ -1498,8 +1642,8 @@ sched_clutch_thread_highest(
        thread_t thread = run_queue_peek(&clutch_bucket->scb_runq);
        assert(thread != NULL);
 
-       /* Remove and return the thread from the hierarchy */
-       sched_clutch_thread_remove(root_clutch, thread, current_timestamp);
+       /* Remove and return the thread from the hierarchy; also round robin the clutch bucket if the priority remains unchanged */
+       sched_clutch_thread_remove(root_clutch, thread, current_timestamp, SCHED_CLUTCH_BUCKET_OPTIONS_SAMEPRI_RR);
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_THREAD_SELECT) | DBG_FUNC_NONE,
            thread_tid(thread), thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, 0, 0);
        return thread;
@@ -1978,7 +2122,7 @@ sched_clutch_processor_queue_remove(
                 */
                if (SCHED_CLUTCH_THREAD_ELIGIBLE(thread)) {
                        sched_clutch_root_t pset_clutch_root = sched_clutch_processor_root_clutch(processor);
-                       sched_clutch_thread_remove(pset_clutch_root, thread, mach_absolute_time());
+                       sched_clutch_thread_remove(pset_clutch_root, thread, mach_absolute_time(), SCHED_CLUTCH_BUCKET_OPTIONS_NONE);
                } else {
                        rq = sched_clutch_thread_bound_runq(processor, thread);
                        run_queue_remove(rq, thread);
@@ -2722,7 +2866,7 @@ sched_clutch_bucket_threads_drain(sched_clutch_bucket_t clutch_bucket, sched_clu
        uint64_t current_timestamp = mach_approximate_time();
        while (thread_count > 0) {
                thread = run_queue_peek(&clutch_bucket->scb_runq);
-               sched_clutch_thread_remove(root_clutch, thread, current_timestamp);
+               sched_clutch_thread_remove(root_clutch, thread, current_timestamp, SCHED_CLUTCH_BUCKET_OPTIONS_NONE);
                enqueue_tail(clutch_threads, &thread->runq_links);
                thread_count--;
        }
index eef5bee4d667078d4638c65d8a4ca9bfde622407..5600eb3785503f894d3d98e86f5675c9862ce323 100644 (file)
  */
 #define SCHED_CLUTCH_THREAD_ELIGIBLE(thread)    ((thread->bound_processor) == PROCESSOR_NULL)
 
+/*
+ * Clutch Bucket Runqueue Structure.
+ */
+struct sched_clutch_bucket_runq {
+       int                     scbrq_highq;
+       bitmap_t                scbrq_bitmap[BITMAP_LEN(NRQS_MAX)];
+       int                     scbrq_count;
+       circle_queue_head_t     scbrq_queues[NRQS_MAX];
+};
+typedef struct sched_clutch_bucket_runq *sched_clutch_bucket_runq_t;
+
 /*
  *
  * Clutch hierarchy locking protocol
@@ -84,7 +95,7 @@ struct sched_clutch_root_bucket {
        /* (I) sched bucket represented by this root bucket */
        uint8_t                         scrb_bucket;
        /* (P) priority queue for all clutch buckets in this sched bucket */
-       struct priority_queue           scrb_clutch_buckets;
+       struct sched_clutch_bucket_runq scrb_clutch_buckets;
        /* (P) priority queue entry to use for enqueueing root bucket into root prioq */
        struct priority_queue_entry     scrb_pqlink;
        /* (P) ageout deadline for this root bucket */
@@ -226,8 +237,8 @@ struct sched_clutch_bucket {
        /* (A) CPU usage information for the clutch bucket */
        sched_clutch_bucket_cpu_data_t  scb_cpu_data;
 
-       /* (P) linkage for clutch_bucket in root_bucket priority queue */
-       struct priority_queue_entry     scb_pqlink;
+       /* (P) linkage for clutch_bucket in root_bucket runqueue */
+       queue_chain_t                   scb_runqlink;
        /* (I) clutch to which this clutch bucket belongs */
        struct sched_clutch             *scb_clutch;
        /* (A) pointer to the root of the hierarchy this bucket is in */
index 64da1a58f2a3034c499973fb38ea41b72d8b7e05..c54237a2ae6f556156e72c6e069435ae25ef2115 100644 (file)
@@ -65,7 +65,7 @@ The second level is the “thread group” level which decides which thread grou
 
 **Implementation**
 
-The thread group level implements a variation of the FreeBSD ULE scheduler to decide which clutch bucket should be selected next for execution. Each clutch bucket with runnable threads is represented as an entry in a priority queue which is ordered by clutch bucket priorities. The clutch bucket selection algorithm simply selects the clutch bucket with the highest priority in the priority queue. The priority calculation for the clutch buckets is based on the following factors:
+The thread group level implements a variation of the FreeBSD ULE scheduler to decide which clutch bucket should be selected next for execution. Each clutch bucket with runnable threads is represented as an entry in a runqueue which is ordered by clutch bucket priorities. The clutch bucket selection algorithm simply selects the clutch bucket with the highest priority in the clutch bucket runqueue. The priority calculation for the clutch buckets is based on the following factors:
 
 * **Highest runnable thread in the clutch bucket**: The clutch bucket maintains a priority queue which contains threads ordered by their promoted or base priority (whichever property made the thread eligible to be part of that clutch bucket). It uses the highest of these threads to calculate the base priority of the clutch bucket. The use of both base and sched priority allows the scheduler to honor priority differences specified from userspace via SPIs, priority boosts due to priority inheritance mechanisms like turnstiles and other priority affecting mechanisms outside the core scheduler.
 * **Interactivity score**: The scheduler calculates an interactivity score based on the ratio of voluntary blocking time and CPU usage time for the clutch bucket as a whole. This score allows the scheduler to prefer highly interactive thread groups over batch processing compute intensive thread groups.
@@ -77,6 +77,8 @@ The interactivity score based algorithm is well suited for this level due to the
 * Since the priority calculation is fairly cheap, the scheduler is able to maintain up-to-date information about all thread groups which leads to more optimal decisions.
 * Thread groups provide a convenient abstraction for groups of threads working together for a user workload. Basing scheduling decisions on this abstraction allows the system to make interesting choices such as preferring Apps over daemons which is typically better for system responsiveness.
 
+The clutch bucket runqueue data structure allows the clutch buckets to be inserted at the head of the queue when threads from that clutch bucket are pre-empted. The runqueues also rotate the clutch bucket to the end of the runqueue at the same priority level when a thread is selected for execution from the clutch bucket. This allows the system to round robin efficiently among clutch buckets at the same priority value especially on highly contended low CPU systems.
+
 ### Thread Level
 
 At the lowest level the scheduler decides which thread within a clutch bucket should be selected next for execution. Each runnable thread in the clutch bucket is represented as an entry in a runqueue which is organized based on the schedpri of threads. The thread selection algorithm simply selects the highest priority thread in the runqueue. The schedpri calculation for the threads is based on the traditional Mach scheduling algorithm which uses load & CPU usage to decay priority for a thread. The thread decay model is more suited at this level as compared to the global scheduler because the load calculation only accounts for threads in the same clutch bucket. Since all threads in the same clutch bucket belong to the same thread group and scheduling bucket, this algorithm provides quick CPU access for latency sensitive threads within the clutch bucket without impacting other non-related threads in the system.
index 04e9db25eb20b5a300410fd2dbd14aa1f164b07a..36a97ec7d28a242bd7638bc914fae9a037434914 100644 (file)
@@ -352,6 +352,9 @@ kernel_bootstrap(void)
        kernel_bootstrap_log("machine_init");
        machine_init();
 
+       kernel_bootstrap_log("thread_machine_init_template");
+       thread_machine_init_template();
+
        kernel_bootstrap_log("clock_init");
        clock_init();
 
diff --git a/osfmk/kern/suid_cred.c b/osfmk/kern/suid_cred.c
new file mode 100644 (file)
index 0000000..b876d73
--- /dev/null
@@ -0,0 +1,240 @@
+/*
+ * Copyright (c) 2019 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ *
+ * An SUID credential is a port type which allows a process to create a new
+ * process with a specific user id. It provides an alternative means to acheive
+ * this to the more traditional SUID bit file permission.
+ *
+ * To create a new SUID credential the process must be running as root and must
+ * have a special entitlement. When created, the credential is associated with a
+ * specific vnode and UID so the unprivileged owner of the credential may only
+ * create a new process from the file associated with that vnode and the
+ * resulting effective UID will be that of the UID in the credential.
+ */
+
+#include <kern/ipc_kobject.h>
+#include <kern/queue.h>
+#include <kern/suid_cred.h>
+
+#include <mach/mach_types.h>
+#include <mach/task.h>
+
+#include <IOKit/IOBSD.h>
+
+/* Declarations necessary to call vnode_lookup()/vnode_put(). */
+struct vnode;
+struct vfs_context;
+extern int vnode_lookup(const char *, int, struct vnode **,
+    struct vfs_context *);
+extern struct vfs_context * vfs_context_current(void);
+extern int vnode_put(struct vnode *);
+
+/* Declarations necessary to call kauth_cred_issuser(). */
+struct ucred;
+extern int kauth_cred_issuser(struct ucred *);
+extern struct ucred *kauth_cred_get(void);
+
+static struct zone *suid_cred_zone = NULL;
+
+/* Data associated with the suid cred port. Consumed during posix_spawn(). */
+struct suid_cred {
+       ipc_port_t port;
+       struct vnode *vnode;
+       uint32_t uid;
+};
+
+/* Allocs a new suid credential. The vnode reference will be owned by the newly
+ * created suid_cred_t. */
+static suid_cred_t
+suid_cred_alloc(struct vnode *vnode, uint32_t uid)
+{
+       suid_cred_t sc = SUID_CRED_NULL;
+
+       assert(vnode != NULL);
+
+       sc = zalloc(suid_cred_zone);
+       if (sc != NULL) {
+               // Lazily allocated in convert_suid_cred_to_port().
+               sc->port = IP_NULL;
+               sc->vnode = vnode;
+               sc->uid = uid;
+       }
+
+       return sc;
+}
+
+static void
+suid_cred_free(suid_cred_t sc)
+{
+       assert(sc != NULL);
+       assert(sc->vnode != NULL);
+
+       vnode_put(sc->vnode);
+
+       sc->uid = UINT32_MAX;
+       sc->vnode = NULL;
+       sc->port = IP_NULL;
+
+       zfree(suid_cred_zone, sc);
+}
+
+void
+suid_cred_destroy(ipc_port_t port)
+{
+       suid_cred_t sc = NULL;
+
+       ip_lock(port);
+       assert(ip_kotype(port) == IKOT_SUID_CRED);
+       sc = (suid_cred_t)port->ip_kobject;
+       ipc_kobject_set_atomically(port, IKO_NULL, IKOT_NONE);
+       ip_unlock(port);
+
+       assert(sc->port == port);
+
+       suid_cred_free(sc);
+}
+
+void
+suid_cred_notify(mach_msg_header_t *msg)
+{
+       assert(msg->msgh_id == MACH_NOTIFY_NO_SENDERS);
+
+       mach_no_senders_notification_t *not = (mach_no_senders_notification_t *)msg;
+       ipc_port_t port = not->not_header.msgh_remote_port;
+
+       if (IP_VALID(port)) {
+               ipc_port_dealloc_kernel(port);
+       }
+}
+
+ipc_port_t
+convert_suid_cred_to_port(suid_cred_t sc)
+{
+       if (sc == NULL) {
+               return IP_NULL;
+       }
+
+       if (!ipc_kobject_make_send_lazy_alloc_port(&sc->port,
+           (ipc_kobject_t) sc, IKOT_SUID_CRED)) {
+               suid_cred_free(sc);
+               return IP_NULL;
+       }
+
+       return sc->port;
+}
+
+/*
+ * Verify the suid cred port. The cached vnode should match the passed vnode.
+ * The uid to be used to spawn the new process is returned in 'uid'.
+ */
+int
+suid_cred_verify(ipc_port_t port, struct vnode *vnode, uint32_t *uid)
+{
+       suid_cred_t sc = NULL;
+       int ret = -1;
+
+       if (!IP_VALID(port)) {
+               return -1;
+       }
+
+       ip_lock(port);
+
+       if (ip_kotype(port) != IKOT_SUID_CRED) {
+               ip_unlock(port);
+               return -1;
+       }
+
+       if (!ip_active(port)) {
+               ip_unlock(port);
+               return -1;
+       }
+
+       sc = (suid_cred_t)port->ip_kobject;
+
+       if (vnode != sc->vnode) {
+               ip_unlock(port);
+               return -1;
+       }
+
+       *uid = sc->uid;
+       ret = 0;
+
+       ipc_port_destroy(port);
+       return ret;
+}
+
+void
+suid_cred_init(void)
+{
+       const size_t sc_size = sizeof(struct suid_cred);
+       suid_cred_zone = zinit(sc_size, 1024 * sc_size, 0, "suid_cred");
+}
+
+kern_return_t
+task_create_suid_cred(
+       task_t task,
+       suid_cred_path_t path,
+       suid_cred_uid_t uid,
+       suid_cred_t *sc_p)
+{
+       suid_cred_t sc = NULL;
+       struct vnode *vnode;
+       int  err = -1;
+
+       if (task == TASK_NULL || task != current_task()) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       // Task must have entitlement.
+       if (!IOTaskHasEntitlement(task, "com.apple.private.suid_cred")) {
+               return KERN_NO_ACCESS;
+       }
+
+       // Thread must be root owned.
+       if (!kauth_cred_issuser(kauth_cred_get())) {
+               return KERN_NO_ACCESS;
+       }
+
+       // Find the vnode for the path.
+       err = vnode_lookup(path, 0, &vnode, vfs_context_current());
+       if (err != 0) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       sc = suid_cred_alloc(vnode, uid);
+       if (sc == NULL) {
+               (void) vnode_put(vnode);
+               return KERN_RESOURCE_SHORTAGE;
+       }
+
+       *sc_p = sc;
+
+       return KERN_SUCCESS;
+}
diff --git a/osfmk/kern/suid_cred.h b/osfmk/kern/suid_cred.h
new file mode 100644 (file)
index 0000000..ff05762
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2019 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _KERN_SUID_CRED_H
+#define _KERN_SUID_CRED_H
+
+#if XNU_KERNEL_PRIVATE
+
+#include <kern/kern_types.h>
+#include <mach/mach_types.h>
+
+struct vnode;
+
+extern ipc_port_t convert_suid_cred_to_port(suid_cred_t);
+
+extern void suid_cred_init(void);
+
+extern void suid_cred_notify(mach_msg_header_t *msg);
+
+extern int suid_cred_verify(ipc_port_t port, struct vnode *vnode, uint32_t *uid);
+
+extern void suid_cred_destroy(ipc_port_t port);
+
+#endif /* XNU_KERNEL_PRIVATE */
+
+#endif /* _KERN_SUID_CRED_H */
index a2a147207e6c971318b19f9b538af6f5c40c9d68..75812e5c3b0d08488808692c7c4b3a43b9111a41 100644 (file)
@@ -55,7 +55,7 @@ sysdiagnose_notify_user(uint32_t keycode)
 
        KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SYSDIAGNOSE, SYSDIAGNOSE_NOTIFY_USER) | DBG_FUNC_START, 0, 0, 0, 0, 0);
 
-       kr = send_sysdiagnose_notification(user_port, keycode);
+       kr = send_sysdiagnose_notification_with_audit_token(user_port, keycode);
        ipc_port_release_send(user_port);
        return kr;
 }
index fd98be48127fb6c98816554df59cfe0fed05d08e..98d7250c059e5006633dbba875d5e7a90daf816c 100644 (file)
@@ -2282,7 +2282,7 @@ task_port_notify(mach_msg_header_t *msg)
 
        require_ip_active(port);
        assert(IKOT_TASK == ip_kotype(port));
-       task = (task_t) port->ip_kobject;
+       task = (task_t) ip_get_kobject(port);
 
        assert(task_is_a_corpse(task));
 
@@ -2682,18 +2682,7 @@ task_terminate_internal(
        pmap_set_process(task->map->pmap, pid, procname);
 #endif /* MACH_ASSERT */
 
-       vm_map_remove(task->map,
-           task->map->min_offset,
-           task->map->max_offset,
-           /*
-            * Final cleanup:
-            * + no unnesting
-            * + remove immutable mappings
-            * + allow gaps in range
-            */
-           (VM_MAP_REMOVE_NO_UNNESTING |
-           VM_MAP_REMOVE_IMMUTABLE |
-           VM_MAP_REMOVE_GAPS_OK));
+       vm_map_terminate(task->map);
 
        /* release our shared region */
        vm_shared_region_set(task, NULL);
@@ -4267,6 +4256,7 @@ task_freeze(
        task_unlock(task);
 
        if (VM_CONFIG_COMPRESSOR_IS_PRESENT &&
+           (kr == KERN_SUCCESS) &&
            (eval_only == FALSE)) {
                vm_wake_compactor_swapper();
                /*
@@ -6735,6 +6725,7 @@ task_update_logical_writes(task_t task, uint32_t io_size, int flags, void *vp)
        int64_t io_delta = 0;
        int64_t * global_counter_to_update;
        boolean_t needs_telemetry = FALSE;
+       boolean_t is_external_device = FALSE;
        int ledger_to_update = 0;
        struct task_writes_counters * writes_counters_to_update;
 
@@ -6751,32 +6742,42 @@ task_update_logical_writes(task_t task, uint32_t io_size, int flags, void *vp)
                global_counter_to_update = &global_logical_writes_count;
                ledger_to_update = task_ledgers.logical_writes;
                writes_counters_to_update = &task->task_writes_counters_internal;
+               is_external_device = FALSE;
        } else {
                global_counter_to_update = &global_logical_writes_to_external_count;
                ledger_to_update = task_ledgers.logical_writes_to_external;
                writes_counters_to_update = &task->task_writes_counters_external;
+               is_external_device = TRUE;
        }
 
        switch (flags) {
        case TASK_WRITE_IMMEDIATE:
                OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_immediate_writes));
                ledger_credit(task->ledger, ledger_to_update, io_size);
-               coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
+               if (!is_external_device) {
+                       coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
+               }
                break;
        case TASK_WRITE_DEFERRED:
                OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_deferred_writes));
                ledger_credit(task->ledger, ledger_to_update, io_size);
-               coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
+               if (!is_external_device) {
+                       coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
+               }
                break;
        case TASK_WRITE_INVALIDATED:
                OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_invalidated_writes));
                ledger_debit(task->ledger, ledger_to_update, io_size);
-               coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, FALSE, io_size);
+               if (!is_external_device) {
+                       coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, FALSE, io_size);
+               }
                break;
        case TASK_WRITE_METADATA:
                OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_metadata_writes));
                ledger_credit(task->ledger, ledger_to_update, io_size);
-               coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
+               if (!is_external_device) {
+                       coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
+               }
                break;
        }
 
@@ -6784,7 +6785,7 @@ task_update_logical_writes(task_t task, uint32_t io_size, int flags, void *vp)
        if (io_telemetry_limit != 0) {
                /* If io_telemetry_limit is 0, disable global updates and I/O telemetry */
                needs_telemetry = global_update_logical_writes(io_delta, global_counter_to_update);
-               if (needs_telemetry) {
+               if (needs_telemetry && !is_external_device) {
                        act_set_io_telemetry_ast(current_thread());
                }
        }
index b723f0b7c4c7eb46c51e38e9760a05430333a628..29595b72184debf3aacdfb1e101e9c976e054c54 100644 (file)
@@ -486,9 +486,10 @@ telemetry_take_sample(thread_t thread, uint8_t microsnapshot_flags, struct micro
 
        uintptr_t frames[128];
        bool user64_regs = false;
-       int backtrace_error = backtrace_user(frames,
-           sizeof(frames) / sizeof(frames[0]), &btcount, &user64_regs, NULL);
-       if (backtrace_error) {
+       int bterror = 0;
+       btcount = backtrace_user(frames,
+           sizeof(frames) / sizeof(frames[0]), &bterror, &user64_regs, NULL);
+       if (bterror != 0) {
                return;
        }
        bool user64_va = task_has_64Bit_addr(task);
index e26f4dced01dce5ccd0295f5203003bc489967f5..fde3ef327598d1b3c6fae7e07ebf3ff3e7ef3294 100644 (file)
@@ -246,7 +246,7 @@ void __attribute__((noinline)) SENDING_NOTIFICATION__TASK_HAS_TOO_MANY_THREADS(t
 
 os_refgrp_decl(static, thread_refgrp, "thread", NULL);
 
-void
+thread_t
 thread_bootstrap(void)
 {
        /*
@@ -418,7 +418,13 @@ thread_bootstrap(void)
        /* fiddle with init thread to skip asserts in set_sched_pri */
        init_thread.sched_pri = MAXPRI_KERNEL;
 
-       machine_set_current_thread(&init_thread);
+       return &init_thread;
+}
+
+void
+thread_machine_init_template(void)
+{
+       machine_thread_template_init(&thread_template);
 }
 
 extern boolean_t allow_qos_policy_set;
index 673259a16dc2c7fd633a30be45f15cbe3d8ade95..f5b7cf612b191fd1174487d119ccaa660befdf62 100644 (file)
@@ -675,7 +675,9 @@ struct thread {
 #define assert_thread_magic(thread) do { (void)(thread); } while (0)
 #endif
 
-extern void                     thread_bootstrap(void);
+extern thread_t                 thread_bootstrap(void);
+
+extern void                     thread_machine_init_template(void);
 
 extern void                     thread_init(void);
 
@@ -861,7 +863,9 @@ extern kern_return_t    machine_thread_dup(
        thread_t                target,
        boolean_t               is_corpse);
 
-extern void                             machine_thread_init(void);
+extern void             machine_thread_init(void);
+
+extern void             machine_thread_template_init(thread_t thr_template);
 
 extern kern_return_t    machine_thread_create(
        thread_t                thread,
index 5986b975dda25ec882a1f3d5318d42c886d70c05..ed14fe3086482753a0bd6df6f8b839ae4457a5bc 100644 (file)
@@ -129,7 +129,7 @@ work_interval_port_convert_locked(ipc_port_t port)
                return NULL;
        }
 
-       work_interval = (struct work_interval *)port->ip_kobject;
+       work_interval = (struct work_interval *) ip_get_kobject(port);
 
        wi_retain(work_interval);
 
@@ -228,7 +228,7 @@ work_interval_port_notify(mach_msg_header_t *msg)
                    port, port->ip_srights);
        }
 
-       work_interval = (struct work_interval *)port->ip_kobject;
+       work_interval = (struct work_interval *) ip_get_kobject(port);
 
        if (work_interval == NULL) {
                panic("work_interval_port_notify(): missing kobject: %p", port);
index 4a38dd7c581523f94daa101d14397c91246ddaab..b42389c91164f43ebe8b4602f15eff014b5508a9 100644 (file)
@@ -335,24 +335,30 @@ kperf_ucallstack_sample(struct kp_ucallstack *cs, struct kperf_context *context)
 
        bool user64 = false;
        bool trunc = false;
-       int err = backtrace_thread_user(thread, cs->kpuc_frames,
-           cs->kpuc_nframes - 1, &cs->kpuc_nframes, &user64, &trunc);
-       cs->kpuc_flags = CALLSTACK_KERNEL_WORDS;
-       if (user64) {
-               cs->kpuc_flags |= CALLSTACK_64BIT;
-       }
-       if (trunc) {
-               cs->kpuc_flags |= CALLSTACK_TRUNCATED;
-       }
+       int error = 0;
+       /*
+        * Leave space for the fixup information.
+        */
+       unsigned int maxnframes = cs->kpuc_nframes - 1;
+       unsigned int nframes = backtrace_thread_user(thread, cs->kpuc_frames,
+           maxnframes, &error, &user64, &trunc);
+       cs->kpuc_nframes = MIN(maxnframes, nframes);
 
-       if (!err || err == EFAULT) {
+       /*
+        * Ignore EFAULT to get as much of the stack as possible.  It will be
+        * marked as truncated, below.
+        */
+       if (error == 0 || error == EFAULT) {
                callstack_fixup_user(cs, thread);
                cs->kpuc_flags |= CALLSTACK_VALID;
        } else {
                cs->kpuc_nframes = 0;
-               BUF_INFO(PERF_CS_ERROR, ERR_GETSTACK, err);
+               BUF_INFO(PERF_CS_ERROR, ERR_GETSTACK, error);
        }
 
+       cs->kpuc_flags |= CALLSTACK_KERNEL_WORDS | (user64 ? CALLSTACK_64BIT : 0) |
+           (trunc ? CALLSTACK_TRUNCATED : 0);
+
        BUF_INFO(PERF_CS_USAMPLE | DBG_FUNC_END, (uintptr_t)thread_tid(thread),
            cs->kpuc_flags, cs->kpuc_nframes);
 }
index b998ba05605c101f627e745fe12c9c7748855c61..c9cc8992cf26c33f5fd9038981785f84e5270723 100644 (file)
@@ -222,7 +222,6 @@ _STRUCT_XMM_REG
 };
 #endif /* !__DARWIN_UNIX03 */
 
-#if !defined(RC_HIDE_XNU_J137)
 /* defn of 256 bit YMM regs */
 
 #if __DARWIN_UNIX03
@@ -268,7 +267,6 @@ _STRUCT_OPMASK_REG
        char            opmask_reg[8];
 };
 #endif /* !__DARWIN_UNIX03 */
-#endif /* not RC_HIDE_XNU_J137 */
 
 /* 
  * Floating point state.
@@ -362,7 +360,6 @@ _STRUCT_X86_AVX_STATE32
        _STRUCT_XMM_REG         __fpu_ymmh7;            /* YMMH 7  */
 };
 
-#if !defined(RC_HIDE_XNU_J137)
 #define        _STRUCT_X86_AVX512_STATE32      struct __darwin_i386_avx512_state
 _STRUCT_X86_AVX512_STATE32
 {
@@ -424,7 +421,6 @@ _STRUCT_X86_AVX512_STATE32
        _STRUCT_YMM_REG         __fpu_zmmh6;            /* ZMMH 6  */
        _STRUCT_YMM_REG         __fpu_zmmh7;            /* ZMMH 7  */
 };
-#endif /* not RC_HIDE_XNU_J137 */
 
 #else /* !__DARWIN_UNIX03 */
 #define        _STRUCT_X86_FLOAT_STATE32       struct i386_float_state
@@ -510,7 +506,6 @@ _STRUCT_X86_AVX_STATE32
        _STRUCT_XMM_REG         fpu_ymmh7;              /* YMMH 7  */
 };
 
-#if !defined(RC_HIDE_XNU_J137)
 #define        _STRUCT_X86_AVX512_STATE32      struct i386_avx512_state
 _STRUCT_X86_AVX512_STATE32
 {
@@ -572,7 +567,6 @@ _STRUCT_X86_AVX512_STATE32
        _STRUCT_YMM_REG         fpu_zmmh6;              /* ZMMH 6  */
        _STRUCT_YMM_REG         fpu_zmmh7;              /* ZMMH 7  */
 };
-#endif /* not RC_HIDE_XNU_J137 */
 
 #endif /* !__DARWIN_UNIX03 */
 
@@ -835,7 +829,6 @@ _STRUCT_X86_AVX_STATE64
        _STRUCT_XMM_REG         __fpu_ymmh15;           /* YMMH 15  */
 };
 
-#if !defined(RC_HIDE_XNU_J137)
 #define        _STRUCT_X86_AVX512_STATE64      struct __darwin_x86_avx512_state64
 _STRUCT_X86_AVX512_STATE64
 {
@@ -943,7 +936,6 @@ _STRUCT_X86_AVX512_STATE64
        _STRUCT_ZMM_REG         __fpu_zmm30;            /* ZMM 30  */
        _STRUCT_ZMM_REG         __fpu_zmm31;            /* ZMM 31  */
 };
-#endif /* not RC_HIDE_XNU_J137 */
 
 #else /* !__DARWIN_UNIX03 */
 #define        _STRUCT_X86_FLOAT_STATE64       struct x86_float_state64
@@ -1065,7 +1057,6 @@ _STRUCT_X86_AVX_STATE64
        _STRUCT_XMM_REG         fpu_ymmh15;             /* YMMH 15  */
 };
 
-#if !defined(RC_HIDE_XNU_J137)
 #define        _STRUCT_X86_AVX512_STATE64      struct x86_avx512_state64
 _STRUCT_X86_AVX512_STATE64
 {
@@ -1173,7 +1164,6 @@ _STRUCT_X86_AVX512_STATE64
        _STRUCT_ZMM_REG         fpu_zmm30;              /* ZMM 30  */
        _STRUCT_ZMM_REG         fpu_zmm31;              /* ZMM 31  */
 };
-#endif /* not RC_HIDE_XNU_J137 */
 
 #endif /* !__DARWIN_UNIX03 */
 
index 3d2c69302bd58af8e3872718bb97bddde489b171..c704e42c6ded596907350d18e9ff1869d615f75d 100644 (file)
@@ -130,9 +130,7 @@ struct x86_avx512_thread_state {
 typedef union {
        struct x86_fx_thread_state      fx;
        struct x86_avx_thread_state     avx;
-#if !defined(RC_HIDE_XNU_J137)
        struct x86_avx512_thread_state  avx512;
-#endif
 } x86_ext_thread_state_t;
 
 #define EVEX_PREFIX     0x62            /* AVX512's EVEX vector operation prefix */
index 69d1a03d96a69d478691e535b13528a60bda5bda..759489dcf7bc4f492e13a931e2c3fc26488ca07a 100644 (file)
 #define _MACH_I386_THREAD_STATE_H_
 
 /* Size of maximum exported thread state in words */
-#if !defined(RC_HIDE_XNU_J137)
 #define I386_THREAD_STATE_MAX   (614)    /* Size of biggest state possible */
-#else
-#define I386_THREAD_STATE_MAX   (224)    /* Size of biggest state possible */
-#endif /* !defined(RC_HIDE_XNU_J137) */
 
 #if defined (__i386__) || defined(__x86_64__)
 #define THREAD_STATE_MAX        I386_THREAD_STATE_MAX
index ea3328933de73c099e427beef7a7de4b9b0de3f5..d62095ad7ee576ad6229aab99d26f033559250d8 100644 (file)
@@ -76,6 +76,8 @@ subsystem
 #include <mach/mach_types.defs>
 #include <mach_debug/mach_debug_types.defs>
 
+type kobject_description_t = c_string[*:512];
+
 /*
  *     Returns the set of port and port set names
  *     to which the target task has access, along with
@@ -658,4 +660,17 @@ routine mach_port_swap_guard(
                new_guard       : uint64_t);
 #endif
 
+/*
+ *     Return the type and address of the kernel object
+ *     that the given send/receive right represents.
+ *      This call is only valid on MACH_IPC_DEBUG kernels.
+ *      Otherwise, KERN_FAILURE is returned.
+ */
+routine        mach_port_kobject_description(
+               task            : ipc_space_inspect_t;
+               name            : mach_port_name_t;
+       out     object_type     : natural_t;
+       out     object_addr     : mach_vm_address_t;
+       out     description     : kobject_description_t);
+
 /* vim: set ft=c : */
index d2e9fb0b4ecae222c9a9610a6e91450ab83be731..09613b3a88d390c654a6ad30e032b4e02199ba21 100644 (file)
@@ -175,6 +175,15 @@ type thread_act_consume_ref_t = mach_port_move_send_t
 #endif /* KERNEL_SERVER */
                ;
 
+type suid_cred_path_t = c_string[*:1024];
+type suid_cred_uid_t = uint32_t;
+type suid_cred_t = mach_port_t
+#if    KERNEL_SERVER
+               outtran: mach_port_t convert_suid_cred_to_port(suid_cred_t)
+#endif /* KERNEL_SERVER */
+               ;
+
+
                /* thread_state_t: This inline array can hold
                 * a machine-dependent amount of data, defined in
                 * mach/machine/???? (currently THREAD_STATE_MAX,
@@ -642,6 +651,7 @@ simport <kern/arcade.h>;    /* for arcade_register conversions */
 
 simport <kern/ipc_mig.h>;      /* pick up kernel-specific MIG things */
 
+simport <kern/suid_cred.h>;
 #endif /* KERNEL_SERVER */
 
 import <mach/mig.h>;
index 5430caaebb343b85e534f952609318724875eb76..e46370cc81d06dcdbe92c08f98fc815b312abb69 100644 (file)
@@ -133,8 +133,8 @@ typedef struct ledger                   *ledger_t;
 typedef struct alarm                    *alarm_t;
 typedef struct clock                    *clock_serv_t;
 typedef struct clock                    *clock_ctrl_t;
-
 typedef struct arcade_register          *arcade_register_t;
+typedef struct suid_cred               *suid_cred_t;
 
 /*
  * OBSOLETE: lock_set interfaces are obsolete.
@@ -155,8 +155,8 @@ struct semaphore;
 struct ledger;
 struct alarm;
 struct clock;
-
 struct arcade_register;
+struct suid_cred;
 
 __END_DECLS
 
@@ -190,8 +190,9 @@ typedef mach_port_t             ledger_t;
 typedef mach_port_t             alarm_t;
 typedef mach_port_t             clock_serv_t;
 typedef mach_port_t             clock_ctrl_t;
-
 typedef mach_port_t             arcade_register_t;
+typedef mach_port_t             suid_cred_t;
+
 #endif  /* KERNEL */
 
 /*
@@ -260,6 +261,8 @@ typedef exception_handler_t     exception_port_t;
 typedef exception_handler_array_t exception_port_arrary_t;
 typedef char vfs_path_t[4096];
 typedef char nspace_path_t[1024]; /* 1024 == PATH_MAX */
+typedef char suid_cred_path_t[1024];
+typedef uint32_t suid_cred_uid_t;
 
 #ifdef KERNEL
 #define TASK_NULL               ((task_t) NULL)
@@ -284,6 +287,7 @@ typedef char nspace_path_t[1024]; /* 1024 == PATH_MAX */
 #define CLOCK_NULL              ((clock_t) NULL)
 #define UND_SERVER_NULL         ((UNDServerRef) NULL)
 #define ARCADE_REG_NULL         ((arcade_register_t) NULL)
+#define SUID_CRED_NULL         ((suid_cred_t) NULL)
 #else
 #define TASK_NULL               ((task_t) 0)
 #define TASK_NAME_NULL          ((task_name_t) 0)
@@ -307,6 +311,7 @@ typedef char nspace_path_t[1024]; /* 1024 == PATH_MAX */
 #define CLOCK_NULL              ((clock_t) 0)
 #define UND_SERVER_NULL         ((UNDServerRef) 0)
 #define ARCADE_REG_NULL         ((arcade_register_t) 0)
+#define SUID_CRED_NULL         ((suid_cred_t) 0)
 #endif
 
 /* DEPRECATED */
index af048e2b020b963cfcd0145ddff3080ed0b33a37..c74c29f746d4957cbf97c6266f9863e917d2720c 100644 (file)
@@ -47,4 +47,9 @@ simpleroutine sysdiagnose_notification(
               sysdiagnose_port    : mach_port_t;
               flags               : uint32_t);
 
+simpleroutine sysdiagnose_notification_with_audit_token(
+              sysdiagnose_port    : mach_port_t;
+              flags               : uint32_t;
+              ServerAuditToken atoken : audit_token_t);
+
 /* vim: set ft=c : */
index 378fe2039e949bdfa4415b600edce5b13aaf4511..8723a5255d784bc4755bc4b877747b92738952bc 100644 (file)
@@ -512,5 +512,11 @@ routine task_set_exc_guard_behavior(
                task     : task_t;
                behavior : task_exc_guard_behavior_t);
                
+routine task_create_suid_cred(
+               task            : task_t;
+               path            : suid_cred_path_t;
+               uid             : suid_cred_uid_t;
+       out     delegation      : suid_cred_t);
+
 /* vim: set ft=c : */
 
index 4ba2440df9d75e07c0b1a1e5800d2ae5ed64633c..8781b108e7f4e733853bf20a546476f28aea5be3 100644 (file)
@@ -89,5 +89,7 @@ struct mach_core_fileheader {
        struct mach_core_details files[MACH_CORE_FILEHEADER_MAXFILES];
 };
 
+#define KOBJECT_DESCRIPTION_LENGTH      512
+typedef char kobject_description_t[KOBJECT_DESCRIPTION_LENGTH];
 
 #endif  /* _MACH_DEBUG_MACH_DEBUG_TYPES_H_ */
index db1574d06032d37300a1c4047971b5d9c66ae24b..207ee0fbc4c69cb8af1c4c7092a518424628118a 100644 (file)
@@ -1401,7 +1401,7 @@ memory_object_iopl_request(
        if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
                vm_named_entry_t        named_entry;
 
-               named_entry = (vm_named_entry_t)port->ip_kobject;
+               named_entry = (vm_named_entry_t) ip_get_kobject(port);
                /* a few checks to make sure user is obeying rules */
                if (*upl_size == 0) {
                        if (offset >= named_entry->size) {
@@ -2340,7 +2340,7 @@ convert_port_to_upl(
                ip_unlock(port);
                return (upl_t)NULL;
        }
-       upl = (upl_t) port->ip_kobject;
+       upl = (upl_t) ip_get_kobject(port);
        ip_unlock(port);
        upl_lock(upl);
        upl->ref_count += 1;
index c79a03e57b471e7190b20946d361eeba8749229c..071a66d4e3adc3729a1091b5ca3e2af71da49a07 100644 (file)
 
 extern boolean_t vm_darkwake_mode;
 
+#if DEVELOPMENT || DEBUG
+int do_cseg_wedge_thread(void);
+int do_cseg_unwedge_thread(void);
+static event_t debug_cseg_wait_event = NULL;
+#endif /* DEVELOPMENT || DEBUG */
+
 #if POPCOUNT_THE_COMPRESSED_DATA
 boolean_t popcount_c_segs = TRUE;
 
@@ -678,7 +684,29 @@ vm_compressor_init(void)
                compressor_pool_size = ((kernel_map->max_offset - kernel_map->min_offset) - kernel_map->size) - VM_RESERVE_SIZE;
        }
        compressor_pool_multiplier = 1;
+
+#elif defined(__arm64__) && defined(XNU_TARGET_OS_WATCH)
+
+       /*
+        * On M9 watches the compressor can become big and can lead to
+        * churn in workingset resulting in audio drops. Setting a cap
+        * on the compressor size favors reclaiming unused memory
+        * sitting in idle band via jetsams
+        */
+
+#define COMPRESSOR_CAP_PERCENTAGE        30ULL
+
+       if (compressor_pool_max_size > max_mem) {
+               compressor_pool_max_size = max_mem;
+       }
+
+       if (vm_compression_limit == 0) {
+               compressor_pool_size = (max_mem * COMPRESSOR_CAP_PERCENTAGE) / 100ULL;
+       }
+       compressor_pool_multiplier = 1;
+
 #else
+
        if (compressor_pool_max_size > max_mem) {
                compressor_pool_max_size = max_mem;
        }
@@ -1074,11 +1102,48 @@ c_seg_do_minor_compaction_and_unlock(c_segment_t c_seg, boolean_t clear_busy, bo
        return c_seg_freed;
 }
 
+void
+kdp_compressor_busy_find_owner(event64_t wait_event, thread_waitinfo_t *waitinfo)
+{
+       c_segment_t c_seg = (c_segment_t) wait_event;
+
+       waitinfo->owner = thread_tid(c_seg->c_busy_for_thread);
+       waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(c_seg);
+}
+
+#if DEVELOPMENT || DEBUG
+int
+do_cseg_wedge_thread(void)
+{
+       struct c_segment c_seg;
+       c_seg.c_busy_for_thread = current_thread();
+
+       debug_cseg_wait_event = (event_t) &c_seg;
+
+       thread_set_pending_block_hint(current_thread(), kThreadWaitCompressor);
+       assert_wait((event_t) (&c_seg), THREAD_INTERRUPTIBLE);
+
+       thread_block(THREAD_CONTINUE_NULL);
+
+       return 0;
+}
+
+int
+do_cseg_unwedge_thread(void)
+{
+       thread_wakeup(debug_cseg_wait_event);
+       debug_cseg_wait_event = NULL;
+
+       return 0;
+}
+#endif /* DEVELOPMENT || DEBUG */
 
 void
 c_seg_wait_on_busy(c_segment_t c_seg)
 {
        c_seg->c_wanted = 1;
+
+       thread_set_pending_block_hint(current_thread(), kThreadWaitCompressor);
        assert_wait((event_t) (c_seg), THREAD_UNINT);
 
        lck_mtx_unlock_always(&c_seg->c_lock);
index c95cc518ac9e668dc5364cf208818ebc120e1667..c3b7229525789660cce2be3cab591d95cca05e3c 100644 (file)
@@ -170,9 +170,7 @@ struct c_segment {
        unsigned int    cseg_swap_size;
 #endif /* CHECKSUM_THE_SWAP */
 
-#if MACH_ASSERT
        thread_t        c_busy_for_thread;
-#endif /* MACH_ASSERT */
 
        int             c_slot_var_array_len;
        struct  c_slot  *c_slot_var_array;
@@ -237,7 +235,7 @@ extern  vm_offset_t     c_buffers;
        assert((cseg)->c_busy);                         \
        (cseg)->c_busy = 0;                             \
        assert((cseg)->c_busy_for_thread != NULL);      \
-       assert((((cseg)->c_busy_for_thread = NULL), TRUE));     \
+       (cseg)->c_busy_for_thread = NULL;               \
        if ((cseg)->c_wanted) {                         \
                (cseg)->c_wanted = 0;                   \
                thread_wakeup((event_t) (cseg));        \
@@ -249,7 +247,7 @@ extern  vm_offset_t     c_buffers;
        assert((cseg)->c_busy == 0);                    \
        (cseg)->c_busy = 1;                             \
        assert((cseg)->c_busy_for_thread == NULL);      \
-       assert((((cseg)->c_busy_for_thread = current_thread()), TRUE)); \
+       (cseg)->c_busy_for_thread = current_thread();   \
        MACRO_END
 
 
@@ -373,6 +371,8 @@ extern uint32_t vm_compressor_catchup_threshold_divisor_overridden;
 
 extern uint64_t vm_compressor_compute_elapsed_msecs(clock_sec_t, clock_nsec_t, clock_sec_t, clock_nsec_t);
 
+extern void kdp_compressor_busy_find_owner(event64_t wait_event, thread_waitinfo_t *waitinfo);
+
 #define PAGE_REPLACEMENT_DISALLOWED(enable)     (enable == TRUE ? lck_rw_lock_shared(&c_master_lock) : lck_rw_done(&c_master_lock))
 #define PAGE_REPLACEMENT_ALLOWED(enable)        (enable == TRUE ? lck_rw_lock_exclusive(&c_master_lock) : lck_rw_done(&c_master_lock))
 
index e3956937b4c80a5960048cead8e03ea0041cd2f6..1622e5547705f21f9f76d0e86c36ad058751e2ae 100644 (file)
@@ -6682,7 +6682,7 @@ vm_record_rtfault(thread_t cthread, uint64_t fstart, vm_map_offset_t fault_vaddr
        uint64_t cupid = get_current_unique_pid();
 
        uintptr_t bpc = 0;
-       uint32_t bfrs = 0;
+       int btr = 0;
        bool u64 = false;
 
        /* Capture a single-frame backtrace; this extracts just the program
@@ -6690,7 +6690,7 @@ vm_record_rtfault(thread_t cthread, uint64_t fstart, vm_map_offset_t fault_vaddr
         * further user stack traversals, thus avoiding copyin()s and further
         * faults.
         */
-       int btr = backtrace_thread_user(cthread, &bpc, 1U, &bfrs, &u64, NULL);
+       unsigned int bfrs = backtrace_thread_user(cthread, &bpc, 1U, &btr, &u64, NULL);
 
        if ((btr == 0) && (bfrs > 0)) {
                cfpc = bpc;
index 6146c8e40417985bfcd06669603a5a66616df055..ab0d87614e7a3d13ae0e22ac4d2a2087a57754ed 100644 (file)
@@ -1197,6 +1197,7 @@ vm_map_create_options(
        result->map_disallow_data_exec = FALSE;
        result->is_nested_map = FALSE;
        result->map_disallow_new_exec = FALSE;
+       result->terminated = FALSE;
        result->highest_entry_end = 0;
        result->first_free = vm_map_to_entry(result);
        result->hint = vm_map_to_entry(result);
@@ -4023,7 +4024,7 @@ vm_map_enter_mem_object_helper(
        } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
                vm_named_entry_t        named_entry;
 
-               named_entry = (vm_named_entry_t) port->ip_kobject;
+               named_entry = (vm_named_entry_t) ip_get_kobject(port);
 
                if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
                    VM_FLAGS_RETURN_4K_DATA_ADDR)) {
@@ -7612,7 +7613,7 @@ vm_map_delete(
        const vm_map_offset_t   FIND_GAP = 1;   /* a not page aligned value */
        const vm_map_offset_t   GAPS_OK = 2;    /* a different not page aligned value */
 
-       if (map != kernel_map && !(flags & VM_MAP_REMOVE_GAPS_OK)) {
+       if (map != kernel_map && !(flags & VM_MAP_REMOVE_GAPS_OK) && !map->terminated) {
                gap_start = FIND_GAP;
        } else {
                gap_start = GAPS_OK;
@@ -8327,6 +8328,34 @@ vm_map_delete(
        return KERN_SUCCESS;
 }
 
+
+/*
+ *     vm_map_terminate:
+ *
+ *     Clean out a task's map.
+ */
+kern_return_t
+vm_map_terminate(
+       vm_map_t        map)
+{
+       vm_map_lock(map);
+       map->terminated = TRUE;
+       vm_map_unlock(map);
+
+       return vm_map_remove(map,
+                  map->min_offset,
+                  map->max_offset,
+                  /*
+                   * Final cleanup:
+                   * + no unnesting
+                   * + remove immutable mappings
+                   * + allow gaps in range
+                   */
+                  (VM_MAP_REMOVE_NO_UNNESTING |
+                  VM_MAP_REMOVE_IMMUTABLE |
+                  VM_MAP_REMOVE_GAPS_OK));
+}
+
 /*
  *     vm_map_remove:
  *
@@ -17809,7 +17838,7 @@ convert_port_entry_to_map(
                        if (ip_active(port) && (ip_kotype(port)
                            == IKOT_NAMED_ENTRY)) {
                                named_entry =
-                                   (vm_named_entry_t)port->ip_kobject;
+                                   (vm_named_entry_t) ip_get_kobject(port);
                                if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
                                        ip_unlock(port);
 
@@ -17867,7 +17896,7 @@ try_again:
                ip_lock(port);
                if (ip_active(port) &&
                    (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
-                       named_entry = (vm_named_entry_t)port->ip_kobject;
+                       named_entry = (vm_named_entry_t) ip_get_kobject(port);
                        if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
                                ip_unlock(port);
                                try_failed_count++;
@@ -18692,6 +18721,7 @@ again:
                }
        }
 
+       *shared_count = (unsigned int) ((dirty_shared_count * PAGE_SIZE_64) / (1024 * 1024ULL));
        if (evaluation_phase) {
                unsigned int shared_pages_threshold = (memorystatus_freeze_shared_mb_per_process_max * 1024 * 1024ULL) / PAGE_SIZE_64;
 
@@ -18724,7 +18754,6 @@ again:
                goto again;
        } else {
                kr = KERN_SUCCESS;
-               *shared_count = (unsigned int) ((dirty_shared_count * PAGE_SIZE_64) / (1024 * 1024ULL));
        }
 
 done:
index 3360cdfb4f137fddfe45b5a787b2a6537e581b99..e49170c5d7ceae7d075c7fa9f64d2e1ea4931260 100644 (file)
@@ -516,7 +516,8 @@ struct _vm_map {
        /* boolean_t */ map_disallow_new_exec:1,         /* Disallow new executable code */
        /* boolean_t */ jit_entry_exists:1,
        /* boolean_t */ has_corpse_footprint:1,
-       /* reserved */ pad:20;
+       /* boolean_t */ terminated:1,
+       /* reserved */ pad:19;
        unsigned int            timestamp;      /* Version number */
 };
 
@@ -1348,6 +1349,9 @@ extern kern_return_t    vm_map_enter_mem_object_control(
        vm_prot_t               max_protection,
        vm_inherit_t            inheritance);
 
+extern kern_return_t    vm_map_terminate(
+       vm_map_t                map);
+
 #endif /* !XNU_KERNEL_PRIVATE */
 
 /* Deallocate a region */
index d8befa53c08641435f975281544e278337ffb967..341ce4754e0ea0c7122b69f4a90a73f0c5bb1c48 100644 (file)
@@ -328,7 +328,7 @@ vm_shared_region_vm_map(
        assert(shared_region->sr_ref_count > 1);
 
        sr_handle = shared_region->sr_mem_entry;
-       sr_mem_entry = (vm_named_entry_t) sr_handle->ip_kobject;
+       sr_mem_entry = (vm_named_entry_t) ip_get_kobject(sr_handle);
        sr_map = sr_mem_entry->backing.map;
        assert(sr_mem_entry->is_sub_map);
 
@@ -912,7 +912,7 @@ vm_shared_region_destroy(
        assert(!shared_region->sr_persists);
        assert(!shared_region->sr_slid);
 
-       mem_entry = (vm_named_entry_t) shared_region->sr_mem_entry->ip_kobject;
+       mem_entry = (vm_named_entry_t) ip_get_kobject(shared_region->sr_mem_entry);
        assert(mem_entry->is_sub_map);
        assert(!mem_entry->internal);
        assert(!mem_entry->is_copy);
@@ -1066,7 +1066,7 @@ vm_shared_region_undo_mappings(
 
                /* no need to lock because this data is never modified... */
                sr_handle = shared_region->sr_mem_entry;
-               sr_mem_entry = (vm_named_entry_t) sr_handle->ip_kobject;
+               sr_mem_entry = (vm_named_entry_t) ip_get_kobject(sr_handle);
                sr_map = sr_mem_entry->backing.map;
                sr_base_address = shared_region->sr_base_address;
        }
@@ -1208,7 +1208,7 @@ vm_shared_region_map_file(
 
        /* no need to lock because this data is never modified... */
        sr_handle = shared_region->sr_mem_entry;
-       sr_mem_entry = (vm_named_entry_t) sr_handle->ip_kobject;
+       sr_mem_entry = (vm_named_entry_t) ip_get_kobject(sr_handle);
        sr_map = sr_mem_entry->backing.map;
        sr_base_address = shared_region->sr_base_address;
 
@@ -1593,7 +1593,7 @@ vm_shared_region_trim_and_get(task_t task)
        }
 
        sr_handle = shared_region->sr_mem_entry;
-       sr_mem_entry = (vm_named_entry_t) sr_handle->ip_kobject;
+       sr_mem_entry = (vm_named_entry_t) ip_get_kobject(sr_handle);
        sr_map = sr_mem_entry->backing.map;
 
        /* Trim the pmap if possible. */
@@ -2749,14 +2749,14 @@ vm_commpage_text_init(void)
        /* create the 32 bit comm text page */
        unsigned int offset = (random() % _PFZ32_SLIDE_RANGE) << PAGE_SHIFT; /* restricting to 32bMAX-2PAGE */
        _vm_commpage_init(&commpage_text32_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
-       commpage_text32_entry = (vm_named_entry_t) commpage_text32_handle->ip_kobject;
+       commpage_text32_entry = (vm_named_entry_t) ip_get_kobject(commpage_text32_handle);
        commpage_text32_map = commpage_text32_entry->backing.map;
        commpage_text32_location = (user32_addr_t) (_COMM_PAGE32_TEXT_START + offset);
        /* XXX if (cpu_is_64bit_capable()) ? */
        /* create the 64-bit comm page */
        offset = (random() % _PFZ64_SLIDE_RANGE) << PAGE_SHIFT; /* restricting sliding upto 2Mb range */
        _vm_commpage_init(&commpage_text64_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
-       commpage_text64_entry = (vm_named_entry_t) commpage_text64_handle->ip_kobject;
+       commpage_text64_entry = (vm_named_entry_t) ip_get_kobject(commpage_text64_handle);
        commpage_text64_map = commpage_text64_entry->backing.map;
        commpage_text64_location = (user64_addr_t) (_COMM_PAGE64_TEXT_START + offset);
 
@@ -2782,13 +2782,13 @@ vm_commpage_init(void)
 #if defined(__i386__) || defined(__x86_64__)
        /* create the 32-bit comm page */
        _vm_commpage_init(&commpage32_handle, _COMM_PAGE32_AREA_LENGTH);
-       commpage32_entry = (vm_named_entry_t) commpage32_handle->ip_kobject;
+       commpage32_entry = (vm_named_entry_t) ip_get_kobject(commpage32_handle);
        commpage32_map = commpage32_entry->backing.map;
 
        /* XXX if (cpu_is_64bit_capable()) ? */
        /* create the 64-bit comm page */
        _vm_commpage_init(&commpage64_handle, _COMM_PAGE64_AREA_LENGTH);
-       commpage64_entry = (vm_named_entry_t) commpage64_handle->ip_kobject;
+       commpage64_entry = (vm_named_entry_t) ip_get_kobject(commpage64_handle);
        commpage64_map = commpage64_entry->backing.map;
 
 #endif /* __i386__ || __x86_64__ */
index ab106cb5a0f5f7f4f8fcf8b0f0641cbd7da5ad49..027d0c99265c9e96584fcea33415a0f70c2537a0 100644 (file)
@@ -2372,7 +2372,7 @@ mach_make_memory_entry_internal(
 
        if (IP_VALID(parent_handle) &&
            ip_kotype(parent_handle) == IKOT_NAMED_ENTRY) {
-               parent_entry = (vm_named_entry_t) parent_handle->ip_kobject;
+               parent_entry = (vm_named_entry_t) ip_get_kobject(parent_handle);
        } else {
                parent_entry = NULL;
        }
@@ -3646,7 +3646,7 @@ memory_entry_purgeable_control_internal(
                return KERN_INVALID_ARGUMENT;
        }
 
-       mem_entry = (vm_named_entry_t) entry_port->ip_kobject;
+       mem_entry = (vm_named_entry_t) ip_get_kobject(entry_port);
 
        named_entry_lock(mem_entry);
 
@@ -3709,7 +3709,7 @@ memory_entry_access_tracking_internal(
                return KERN_INVALID_ARGUMENT;
        }
 
-       mem_entry = (vm_named_entry_t) entry_port->ip_kobject;
+       mem_entry = (vm_named_entry_t) ip_get_kobject(entry_port);
 
        named_entry_lock(mem_entry);
 
@@ -3788,7 +3788,7 @@ mach_memory_entry_ownership(
            ip_kotype(entry_port) != IKOT_NAMED_ENTRY) {
                return KERN_INVALID_ARGUMENT;
        }
-       mem_entry = (vm_named_entry_t) entry_port->ip_kobject;
+       mem_entry = (vm_named_entry_t) ip_get_kobject(entry_port);
 
        named_entry_lock(mem_entry);
 
@@ -3842,7 +3842,7 @@ mach_memory_entry_get_page_counts(
                return KERN_INVALID_ARGUMENT;
        }
 
-       mem_entry = (vm_named_entry_t) entry_port->ip_kobject;
+       mem_entry = (vm_named_entry_t) ip_get_kobject(entry_port);
 
        named_entry_lock(mem_entry);
 
@@ -3907,7 +3907,7 @@ mach_destroy_memory_entry(
 #if MACH_ASSERT
        assert(ip_kotype(port) == IKOT_NAMED_ENTRY);
 #endif /* MACH_ASSERT */
-       named_entry = (vm_named_entry_t)port->ip_kobject;
+       named_entry = (vm_named_entry_t) ip_get_kobject(port);
 
        named_entry_lock(named_entry);
        named_entry->ref_count -= 1;
@@ -3934,8 +3934,7 @@ mach_destroy_memory_entry(
                lck_mtx_unlock(&vm_named_entry_list_lock_data);
 #endif /* VM_NAMED_ENTRY_LIST */
 
-               kfree(port->ip_kobject,
-                   sizeof(struct vm_named_entry));
+               kfree(named_entry, sizeof(struct vm_named_entry));
        } else {
                named_entry_unlock(named_entry);
        }
@@ -3961,7 +3960,7 @@ mach_memory_entry_page_op(
                return KERN_INVALID_ARGUMENT;
        }
 
-       mem_entry = (vm_named_entry_t) entry_port->ip_kobject;
+       mem_entry = (vm_named_entry_t) ip_get_kobject(entry_port);
 
        named_entry_lock(mem_entry);
 
@@ -4014,7 +4013,7 @@ mach_memory_entry_range_op(
                return KERN_INVALID_ARGUMENT;
        }
 
-       mem_entry = (vm_named_entry_t) entry_port->ip_kobject;
+       mem_entry = (vm_named_entry_t) ip_get_kobject(entry_port);
 
        named_entry_lock(mem_entry);
 
index b182f653a3dc002af0ffd1aea03642794d6ef06d..1e9a3431333739fb2f81a95657d7f091e96e8e43 100644 (file)
@@ -37,6 +37,7 @@
 #include <sys/errno.h>
 #include <sys/monotonic.h>
 #include <x86_64/monotonic.h>
+#include <kern/kpc.h>
 
 /*
  * Sanity check the compiler.
@@ -164,7 +165,13 @@ static void
 enable_counters(void)
 {
        wrmsr64(FIXED_CTR_CTRL, FIXED_CTR_CTRL_INIT | FIXED_CTR_CTRL_ENABLE);
-       wrmsr64(GLOBAL_CTRL, GLOBAL_CTRL_FIXED_EN);
+
+       uint64_t global_en = GLOBAL_CTRL_FIXED_EN;
+       if (kpc_get_running() & KPC_CLASS_CONFIGURABLE_MASK) {
+               global_en |= kpc_get_configurable_pmc_mask(KPC_CLASS_CONFIGURABLE_MASK);
+       }
+
+       wrmsr64(GLOBAL_CTRL, global_en);
 }
 
 static void
index 6a99e38fd91ad95c31e8f22397f556875a9e6bbe..b4dad979c6dc13d996f5fbaa867dfa04616d21b7 100644 (file)
@@ -2088,6 +2088,15 @@ mac_iokit_check_hid_control(kauth_cred_t cred __unused)
        return 0;
 }
 
+int mac_mount_check_snapshot_mount(vfs_context_t ctx, struct vnode *rvp, struct vnode *vp, struct componentname *cnp,
+    const char *name, const char *vfc_name);
+int
+mac_mount_check_snapshot_mount(vfs_context_t ctx __unused, struct vnode *rvp __unused, struct vnode *vp __unused,
+    struct componentname *cnp __unused, const char *name __unused, const char *vfc_name __unused)
+{
+       return 0;
+}
+
 int mac_vnode_check_trigger_resolve(vfs_context_t ctx __unused, struct vnode *dvp __unused, struct componentname *cnp __unused);
 int
 mac_vnode_check_trigger_resolve(vfs_context_t ctx __unused, struct vnode *dvp __unused, struct componentname *cnp __unused)
index e8c27a348179e2109b2a56ca17cecd3a5e209a47..865dfaa7a5e7dc6c176e6fef5bbfa6a9a7e98ec7 100644 (file)
@@ -271,6 +271,9 @@ int     mac_mount_check_snapshot_create(vfs_context_t ctx, struct mount *mp,
     const char *name);
 int     mac_mount_check_snapshot_delete(vfs_context_t ctx, struct mount *mp,
     const char *name);
+int     mac_mount_check_snapshot_mount(vfs_context_t ctx, struct vnode *rvp,
+    struct vnode *vp, struct componentname *cnp, const char *name,
+    const char *vfc_name);
 int     mac_mount_check_snapshot_revert(vfs_context_t ctx, struct mount *mp,
     const char *name);
 int     mac_mount_check_remount(vfs_context_t ctx, struct mount *mp);
index 1b46adf7ae247bec28de12c203e8e1968badb069..3f9ddbd52cc50da57893325d6fd0d7c7b84042d5 100644 (file)
@@ -1855,6 +1855,32 @@ typedef int mpo_mount_check_snapshot_delete_t(
        struct mount *mp,
        const char *name
        );
+/**
+ *  @brief Access control check for fs_snapshot_mount
+ *  @param cred Subject credential
+ *  @param rvp Vnode of either the root directory of the
+ *  filesystem to mount snapshot of, or the device from
+ *  which to mount the snapshot.
+ *  @param vp Vnode that is to be the mount point
+ *  @param cnp Component name for vp
+ *  @param name Name of snapshot to mount
+ *  @param vfc_name Filesystem type name
+ *
+ *  Determine whether the subject identified by the credential can
+ *  mount the named snapshot from the filesystem at the given
+ *  directory.
+ *
+ *  @return Return 0 if access is granted, otherwise an appropriate value
+ *  for errno should be returned.
+ */
+typedef int mpo_mount_check_snapshot_mount_t(
+       kauth_cred_t cred,
+       struct vnode *rvp,
+       struct vnode *vp,
+       struct componentname *cnp,
+       const char *name,
+       const char *vfc_name
+       );
 /**
  *  @brief Access control check for fs_snapshot_revert
  *  @param cred Subject credential
@@ -6296,7 +6322,7 @@ typedef void mpo_reserved_hook_t(void);
  * Please note that this should be kept in sync with the check assumptions
  * policy in bsd/kern/policy_check.c (policy_ops struct).
  */
-#define MAC_POLICY_OPS_VERSION 59 /* inc when new reserved slots are taken */
+#define MAC_POLICY_OPS_VERSION 62 /* inc when new reserved slots are taken */
 struct mac_policy_ops {
        mpo_audit_check_postselect_t            *mpo_audit_check_postselect;
        mpo_audit_check_preselect_t             *mpo_audit_check_preselect;
@@ -6450,7 +6476,7 @@ struct mac_policy_ops {
 
        mpo_vnode_check_trigger_resolve_t       *mpo_vnode_check_trigger_resolve;
        mpo_mount_check_mount_late_t            *mpo_mount_check_mount_late;
-       mpo_reserved_hook_t                     *mpo_reserved1;
+       mpo_mount_check_snapshot_mount_t        *mpo_mount_check_snapshot_mount;
        mpo_reserved_hook_t                     *mpo_reserved2;
        mpo_skywalk_flow_check_connect_t        *mpo_skywalk_flow_check_connect;
        mpo_skywalk_flow_check_listen_t         *mpo_skywalk_flow_check_listen;
index 95afa830b40c0e0c9e5bc2bf33c95072de63294f..67452ded363e599b7fe5cd8bff19ff901f160c53 100644 (file)
  * KDBG_EVENTID(DBG_FSYSTEM, DBG_VFS, dcode) global event id, see bsd/sys/kdebug.h.
  * Note that dcode is multiplied by 4 and ORed as part of the construction. See bsd/kern/trace_codes
  * for list of system-wide {global event id, name} pairs. Currently DBG_VFS event ids are in range
- * [0x3130000, 0x313016C].
+ * [0x3130000, 0x3130170].
  */
 
 //#define VFS_TRACE_POLICY_OPS
@@ -2337,6 +2337,29 @@ mac_mount_check_snapshot_delete(vfs_context_t ctx, struct mount *mp,
        return error;
 }
 
+int
+mac_mount_check_snapshot_mount(vfs_context_t ctx, struct vnode *rvp, struct vnode *vp, struct componentname *cnp,
+    const char *name, const char *vfc_name)
+{
+       kauth_cred_t cred;
+       int error;
+
+#if SECURITY_MAC_CHECK_ENFORCE
+       /* 21167099 - only check if we allow write */
+       if (!mac_vnode_enforce) {
+               return 0;
+       }
+#endif
+       cred = vfs_context_ucred(ctx);
+       if (!mac_cred_check_enforce(cred)) {
+               return 0;
+       }
+       VFS_KERNEL_DEBUG_START1(92, vp);
+       MAC_CHECK(mount_check_snapshot_mount, cred, rvp, vp, cnp, name, vfc_name);
+       VFS_KERNEL_DEBUG_END1(92, vp);
+       return error;
+}
+
 int
 mac_mount_check_snapshot_revert(vfs_context_t ctx, struct mount *mp,
     const char *name)
index 610cecb15778556bdbf868eb7b65d20a20c88251..790dc11c114b05b4ecde7a1315a4736fc9170ed7 100644 (file)
@@ -77,7 +77,7 @@ install-immovable_send_client: immovable_send_client
 kdebug: INVALID_ARCHS = i386
 kdebug: OTHER_LDFLAGS = -framework ktrace -ldarwintest_utils -framework kperf
 
-EXCLUDED_SOURCES += drop_priv.c kperf_helpers.c xnu_quick_test_helpers.c memorystatus_assertion_helpers.c
+EXCLUDED_SOURCES += drop_priv.c kperf_helpers.c xnu_quick_test_helpers.c memorystatus_assertion_helpers.c bpflib.c in_cksum.c
 
 ifneq ($(PLATFORM),iPhoneOS)
 EXCLUDED_SOURCES += jumbo_va_spaces_28530648.c perf_compressor.c memorystatus_freeze_test.c
@@ -112,7 +112,8 @@ memorystatus_zone_test: OTHER_CFLAGS += -isystem $(SDKROOT)/System/Library/Frame
 memorystatus_zone_test: OTHER_LDFLAGS += -framework ktrace
 memorystatus_zone_test: OTHER_LDFLAGS += -ldarwintest_utils
 
-kpc: OTHER_LDFLAGS += -framework kperf
+kpc: OTHER_LDFLAGS += -framework kperf -framework ktrace
+kpc: INVALID_ARCHS = i386
 
 kperf: INVALID_ARCHS = i386
 kperf: OTHER_CFLAGS += kperf_helpers.c
@@ -278,6 +279,9 @@ socket_bind_35685803: CODE_SIGN_ENTITLEMENTS = network_entitlements.plist
 
 net_tuntests: CODE_SIGN_ENTITLEMENTS = network_entitlements.plist
 
+net_bridge: OTHER_CFLAGS += bpflib.c in_cksum.c
+net_bridge: OTHER_LDFLAGS += -ldarwintest_utils
+
 ifneq (osx,$(TARGET_NAME))
 EXCLUDED_SOURCES += no32exec_35914211.c no32exec_35914211_helper.c
 else  # target = osx
@@ -352,4 +356,18 @@ debug_control_port_for_pid: CODE_SIGN_ENTITLEMENTS = ./debug_control_port_for_pi
 
 prng: OTHER_LDFLAGS += -ldarwintest_utils
 
+OTHER_TEST_TARGETS += io_catalog_send_data
+
+io_catalog_send_data: INVALID_ARCHS = i386
+io_catalog_send_data: OTHER_CFLAGS += -DTEST_UNENTITLED -framework IOKit -framework CoreFoundation -framework Foundation
+io_catalog_send_data: iokit/io_catalog_send_data.m
+       $(CC) $(DT_CFLAGS) $(OTHER_CFLAGS) $(CFLAGS) $(DT_LDFLAGS) $(OTHER_LDFLAGS) $(LDFLAGS) $< -o $(SYMROOT)/$@
+
+task_create_suid_cred: CODE_SIGN_ENTITLEMENTS = ./task_create_suid_cred_entitlement.plist
+
+OTHER_TEST_TARGETS += task_create_suid_cred_unentitled
+task_create_suid_cred_unentitled: OTHER_CFLAGS += -DUNENTITLED
+task_create_suid_cred_unentitled: task_create_suid_cred.c
+       $(CC) $(DT_CFLAGS) $(OTHER_CFLAGS) $(CFLAGS) $(DT_LDFLAGS) $(OTHER_LDFLAGS) $(LDFLAGS) $< -o $(SYMROOT)/$@
+
 include $(DEVELOPER_DIR)/AppleInternal/Makefiles/darwintest/Makefile.targets
diff --git a/tests/bpflib.c b/tests/bpflib.c
new file mode 100644 (file)
index 0000000..aa9b913
--- /dev/null
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2000-2018 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <net/bpf.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <errno.h>
+#include <net/if.h>
+#include <stdbool.h>
+#define PRIVATE_EXTERN __private_extern__
+
+#include "bpflib.h"
+
+#ifdef TESTING
+#include "util.h"
+#endif /* TESTING */
+
+PRIVATE_EXTERN int
+bpf_set_timeout(int fd, struct timeval * tv_p)
+{
+       return ioctl(fd, BIOCSRTIMEOUT, tv_p);
+}
+
+PRIVATE_EXTERN int
+bpf_get_blen(int fd, int * blen)
+{
+       return ioctl(fd, BIOCGBLEN, blen);
+}
+
+PRIVATE_EXTERN int
+bpf_set_header_complete(int fd, u_int header_complete)
+{
+       return ioctl(fd, BIOCSHDRCMPLT, &header_complete);
+}
+
+PRIVATE_EXTERN int
+bpf_set_see_sent(int fd, u_int see_sent)
+{
+       return ioctl(fd, BIOCSSEESENT, &see_sent);
+}
+
+PRIVATE_EXTERN int
+bpf_dispose(int bpf_fd)
+{
+       if (bpf_fd >= 0) {
+               return close(bpf_fd);
+       }
+       return 0;
+}
+
+PRIVATE_EXTERN int
+bpf_new(void)
+{
+       char bpfdev[256];
+       int i;
+       int fd = -1;
+
+       for (i = 0; true; i++) {
+               snprintf(bpfdev, sizeof(bpfdev), "/dev/bpf%d", i);
+               fd = open(bpfdev, O_RDWR, 0);
+               if (fd >= 0) {
+#ifdef SO_TC_CTL
+                       int tc = SO_TC_CTL;
+                       (void) ioctl(fd, BIOCSETTC, &tc);
+#endif /* SO_TC_CTL */
+                       break;
+               }
+               if (errno != EBUSY) {
+                       break;
+               }
+       }
+       return fd;
+}
+
+PRIVATE_EXTERN int
+bpf_setif(int fd, const char * en_name)
+{
+       struct ifreq ifr;
+
+       strlcpy(ifr.ifr_name, en_name, sizeof(ifr.ifr_name));
+       return ioctl(fd, BIOCSETIF, &ifr);
+}
+
+PRIVATE_EXTERN int
+bpf_set_immediate(int fd, u_int value)
+{
+       return ioctl(fd, BIOCIMMEDIATE, &value);
+}
+
+PRIVATE_EXTERN int
+bpf_filter_receive_none(int fd)
+{
+       struct bpf_insn insns[] = {
+               BPF_STMT(BPF_RET + BPF_K, 0),
+       };
+       struct bpf_program prog;
+
+       prog.bf_len = sizeof(insns) / sizeof(struct bpf_insn);
+       prog.bf_insns = insns;
+       return ioctl(fd, BIOCSETF, &prog);
+}
+
+PRIVATE_EXTERN int
+bpf_arp_filter(int fd, int type_offset, int type, u_int pkt_size)
+{
+       struct bpf_insn insns[] = {
+               BPF_STMT(BPF_LD + BPF_H + BPF_ABS, type_offset),
+               BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, type, 0, 1),
+               BPF_STMT(BPF_RET + BPF_K, pkt_size),
+               BPF_STMT(BPF_RET + BPF_K, 0),
+       };
+       struct bpf_program prog;
+
+       prog.bf_len = sizeof(insns) / sizeof(struct bpf_insn);
+       prog.bf_insns = insns;
+       return ioctl(fd, BIOCSETF, &prog);
+}
+
+#ifdef TESTING
+#include <net/if_arp.h>
+#include <net/ethernet.h>
+#include <netinet/if_ether.h>
+
+
+void
+bpf_read_continuously(int fd, u_int blen)
+{
+       int n;
+       char * rxbuf = malloc(blen);
+
+       printf("rx buf len is %d\n", blen);
+       while (1) {
+               n = read(fd, rxbuf, blen);
+               if (n < 0) {
+                       perror("bpf_read_continuously");
+                       return;
+               }
+               if (n == 0) {
+                       continue;
+               }
+               print_data(rxbuf, n);
+       }
+}
+
+int
+main(int argc, char * argv[])
+{
+       int fd = bpf_new();
+       char * en_name = "en0";
+       u_int bpf_blen = 0;
+
+       if (fd < 0) {
+               perror("no bpf devices");
+               exit(1);
+       }
+
+       if (argc > 1) {
+               en_name = argv[1];
+       }
+       (void)bpf_set_immediate(fd, 1);
+       if (bpf_arp_filter(fd, 12, ETHERTYPE_ARP,
+           sizeof(struct ether_arp) + sizeof(struct ether_header))
+           < 0) {
+               perror("bpf_arp_filter");
+       }
+       if (bpf_setif(fd, en_name) < 0) {
+               perror("bpf_attach");
+               exit(1);
+       }
+
+       if (bpf_get_blen(fd, &bpf_blen) < 0) {
+               perror("bpf_get_blen");
+               exit(1);
+       }
+       bpf_read_continuously(fd, bpf_blen);
+       exit(0);
+       return 0;
+}
+#endif /* TESTING */
diff --git a/tests/bpflib.h b/tests/bpflib.h
new file mode 100644 (file)
index 0000000..84e2174
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2000 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+
+#ifndef _S_BPFLIB_H
+#define _S_BPFLIB_H
+
+int bpf_get_blen(int fd, int * blen);
+int bpf_new(void);
+int bpf_dispose(int fd);
+int bpf_setif(int fd, const char * en_name);
+int bpf_set_immediate(int fd, u_int value);
+int bpf_filter_receive_none(int fd);
+int bpf_arp_filter(int fd, int type_offset, int type, u_int packet_size);
+int bpf_set_timeout(int fd, struct timeval * tv_p);
+int bpf_set_header_complete(int fd, u_int header_complete);
+int bpf_set_see_sent(int fd, u_int see_send);
+
+#endif /* _S_BPFLIB_H */
diff --git a/tests/fcntl.c b/tests/fcntl.c
new file mode 100644 (file)
index 0000000..877425f
--- /dev/null
@@ -0,0 +1,41 @@
+#include <darwintest.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <darwintest_utils.h>
+#include <mach/vm_page_size.h>
+
+/** Verify that F_ADDSIGS does not page fault off the end of the user blob
+ * 1. Find VA space for 3 pages
+ * 2. Unmap the last page
+ * 3. Start fs_blob_start at PAGE_SIZE + 1 bytes away from the end of the
+ * VA region (such that any read of more than PAGE_SIZE + 1 bytes will fault)
+ * 4. Call fcntl with the arguments and verify the output is not EFAULT
+ */
+T_DECL(fcntl_addsig, "Verify that fcntl(F_ADDSIGS) doesn't EFAULT", T_META_NAMESPACE("xnu.vfs")) {
+       void* blob_space = mmap(NULL, vm_page_size * 3, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+       T_ASSERT_NE(blob_space, MAP_FAILED, "Blob Region: %p [%zd]", blob_space, vm_page_size);
+
+       T_ASSERT_POSIX_SUCCESS(munmap((char*)blob_space + (vm_page_size * 2), vm_page_size), NULL);
+
+       size_t blob_size = vm_page_size + 1;
+       char* blob_start = ((char*)blob_space) + (vm_page_size * 2) - blob_size;
+       fsignatures_t args = { .fs_file_start = 0, .fs_blob_start =  blob_start, .fs_blob_size = blob_size};
+
+       // Create test file to operate on
+       const char * tmp_dir = dt_tmpdir();
+       char tmp_file_name[PATH_MAX];
+       sprintf(tmp_file_name, "%s/foo", tmp_dir);
+       FILE* tmp_file = fopen(tmp_file_name, "wx");
+       fprintf(tmp_file, "Just some random content");
+       fclose(tmp_file);
+
+       int fd = open(tmp_file_name, O_RDONLY);
+       T_ASSERT_POSIX_SUCCESS(fd, "tmp file: %s", tmp_file_name);
+
+       // This command will fail, but should not fail with EFAULT
+       int result = fcntl(fd, F_ADDSIGS, &args);
+       int error = errno;
+       T_QUIET; T_EXPECT_EQ(result, -1, NULL);
+       // EBADEXEC is expected, but not required for success of this test
+       T_EXPECT_NE(error, EFAULT, "fcntl: %d (%d:%s)", result, error, strerror(error));
+}
diff --git a/tests/in_cksum.c b/tests/in_cksum.c
new file mode 100644 (file)
index 0000000..2dc3f49
--- /dev/null
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2000-2018 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+/*
+ * Copyright (c) 1988, 1992, 1993
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ *     @(#)in_cksum.c  8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include "in_cksum.h"
+
+typedef union {
+       char        c[2];
+       u_short     s;
+} short_union_t;
+
+typedef union {
+       u_short     s[2];
+       long        l;
+} long_union_t;
+
+static __inline__ void
+reduce(int * sum)
+{
+       long_union_t l_util;
+
+       l_util.l = *sum;
+       *sum = l_util.s[0] + l_util.s[1];
+       if (*sum > 65535) {
+               *sum -= 65535;
+       }
+       return;
+}
+
+
+#include <stdio.h>
+
+unsigned short
+in_cksum(void * pkt, int len)
+{
+       u_short * w;
+       int sum = 0;
+
+       w = (u_short *)pkt;
+       while ((len -= 32) >= 0) {
+               sum += w[0]; sum += w[1];
+               sum += w[2]; sum += w[3];
+               sum += w[4]; sum += w[5];
+               sum += w[6]; sum += w[7];
+               sum += w[8]; sum += w[9];
+               sum += w[10]; sum += w[11];
+               sum += w[12]; sum += w[13];
+               sum += w[14]; sum += w[15];
+               w += 16;
+       }
+       len += 32;
+       while ((len -= 8) >= 0) {
+               sum += w[0]; sum += w[1];
+               sum += w[2]; sum += w[3];
+               w += 4;
+       }
+       len += 8;
+       if (len) {
+               reduce(&sum);
+               while ((len -= 2) >= 0) {
+                       sum += *w++;
+               }
+       }
+       if (len == -1) { /* odd-length packet */
+               short_union_t s_util;
+
+               s_util.s = 0;
+               s_util.c[0] = *((char *)w);
+               s_util.c[1] = 0;
+               sum += s_util.s;
+       }
+       reduce(&sum);
+       return ~sum & 0xffff;
+}
diff --git a/tests/in_cksum.h b/tests/in_cksum.h
new file mode 100644 (file)
index 0000000..d84b916
--- /dev/null
@@ -0,0 +1,27 @@
+#ifndef _S_IN_CKSUM_H
+#define _S_IN_CKSUM_H
+/*
+ * Copyright (c) 2000 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+extern unsigned short in_cksum(void * pkt, int len);
+
+#endif /* _S_IN_CKSUM_H */
diff --git a/tests/iokit/io_catalog_send_data.m b/tests/iokit/io_catalog_send_data.m
new file mode 100644 (file)
index 0000000..f7cf014
--- /dev/null
@@ -0,0 +1,136 @@
+/*
+ * io_catalog_send_data.m
+ *
+ * A regression test to build an IORegistry entry with mismatching
+ * IOService and IOUserClientClass via IOCatalogueSendData, to verify
+ * if exploit risk still exists in IOCatalogueSendData.
+ *
+ */
+#include <darwintest.h>
+
+#include <Foundation/Foundation.h>
+#include <IOKit/IOCFSerialize.h>
+#include <IOKit/IOKitLib.h>
+
+#define kIOClassKey            @"IOClass"
+#define kIOProviderClassKey    @"IOProviderClass"
+#define kIOMatchCategoryKey    @"IOMatchCategory"
+#define kIOUserClientClassKey  @"IOUserClientClass"
+#define vIOProviderClassValue  @"IOResources"
+
+T_GLOBAL_META(T_META_NAMESPACE("xnu.iokit"),
+       T_META_RUN_CONCURRENTLY(true));
+
+kern_return_t
+build_ioregistry_by_catalog_send_data(const char *match_name,
+    const char *userclient_name, const char *service_name)
+{
+       kern_return_t kret;
+
+       NSArray *rootCatalogueArray = @[@{
+           kIOProviderClassKey: vIOProviderClassValue,
+           kIOClassKey: @(service_name),
+           kIOUserClientClassKey: @(userclient_name),
+           kIOMatchCategoryKey: @(match_name)
+       }];
+
+       CFDataRef cfData = IOCFSerialize((__bridge CFTypeRef)rootCatalogueArray,
+           kIOCFSerializeToBinary);
+
+       kret = IOCatalogueSendData(MACH_PORT_NULL, 1, CFDataGetBytePtr(cfData),
+           CFDataGetLength(cfData));
+
+       if (cfData) {
+               CFRelease(cfData);
+       }
+
+       return kret;
+}
+
+bool
+test_open_ioregistry(const char *match_name, const char *service_name,
+    bool exploit)
+{
+       kern_return_t kret;
+       bool ioreg_found = false;
+       CFStringRef cfstrMatchName = NULL;
+       io_connect_t conn = IO_OBJECT_NULL;
+       io_iterator_t iter = IO_OBJECT_NULL, obj = IO_OBJECT_NULL;
+       CFMutableDictionaryRef service_info = NULL, properties = NULL;
+
+       service_info = IOServiceMatching(service_name);
+       kret = IOServiceGetMatchingServices(kIOMasterPortDefault, service_info, &iter);
+       T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "IOServiceGetMatchingServices");
+       cfstrMatchName = CFStringCreateWithCString(kCFAllocatorDefault,
+           match_name, kCFStringEncodingUTF8);
+
+       while (obj = IOIteratorNext(iter)) {
+               kret = IORegistryEntryCreateCFProperties(obj, &properties,
+                   kCFAllocatorDefault, kNilOptions);
+               if (kret != KERN_SUCCESS) {
+                       T_LOG("IORegistryEntryCreateCFProperties fails, 0x%08X",
+                           (uint32_t)kret);
+                       IOObjectRelease(obj);
+                       continue;
+               }
+
+               CFStringRef value = CFDictionaryGetValue(properties, CFSTR("IOMatchCategory"));
+               if (value && CFGetTypeID(value) == CFStringGetTypeID() &&
+                   CFEqual(value, cfstrMatchName)) {
+                       ioreg_found = true;
+               } else {
+                       IOObjectRelease(obj);
+                       continue;
+               }
+
+               if (!exploit) {
+                       goto bail;
+               }
+
+               T_LOG("try to exploit by opening io service, possibly panic?");
+               IOServiceOpen(obj, mach_task_self(), 0, &conn);
+               IOObjectRelease(obj);
+
+               break;
+       }
+
+bail:
+       if (cfstrMatchName) {
+               CFRelease(cfstrMatchName);
+       }
+
+       if (properties) {
+               CFRelease(properties);
+       }
+
+       if (iter != IO_OBJECT_NULL) {
+               IOObjectRelease(iter);
+       }
+
+       if (conn != IO_OBJECT_NULL) {
+               IOServiceClose(conn);
+       }
+
+       return ioreg_found;
+}
+
+T_DECL(io_catalog_send_data_test, "regression test to build an IORegistry entry"
+    " with mismatching IOService and IOUserClientClass by IOCatalogueSendData, "
+    "to verify if exploit risk still exists in IOCatalogueSendData for "
+    "potential DoS - <rdar://problem/31558871>")
+{
+       kern_return_t kret;
+
+       kret = build_ioregistry_by_catalog_send_data("fooBar",
+           "IOSurfaceRootUserClient", "IOReportHub");
+#if (TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR)
+       /* this trick to build an entry by io_catalog_send_data should fail */
+       T_EXPECT_EQ(kret, kIOReturnNotPrivileged, "build an entry with"
+           " mismatch IOService and IOUserClientClass by IOCatalogueSendData "
+           "should fail as kIOReturnNotPrivileged");
+#else
+       T_EXPECT_EQ(kret, KERN_SUCCESS, "IOCatalogueSendData should return success with kextd");
+#endif
+       T_EXPECT_FALSE(test_open_ioregistry("fooBar", "IOReportHub", false),
+           "Mismatched entry built by IOCatalogueSendData should not be opened");
+}
index 62b87e68e9ae85be9a3335d670b66c2cd5bc9ed2..7e32363449102f69cb7016a8e3e71f768740a7b3 100644 (file)
-/* Copyright (c) 2018 Apple Inc.  All rights reserved. */
+// Copyright (c) 2018-2020 Apple Inc.  All rights reserved.
 
 #include <darwintest.h>
+#include <ktrace/config.h>
+#include <ktrace/session.h>
 #include <inttypes.h>
+#include <libproc.h>
+#include <pthread.h>
 #include <stdint.h>
+#include <sys/resource.h>
 #include <sys/sysctl.h>
 
 #include <kperf/kpc.h>
+#include <kperf/kperf.h>
+
+#include "ktrace_helpers.h"
+#include "kperf_helpers.h"
 
 T_GLOBAL_META(
        T_META_NAMESPACE("xnu.ktrace"),
        T_META_ASROOT(true),
        T_META_CHECK_LEAKS(false));
 
-T_DECL(fixed_thread_counters,
-    "test that fixed thread counters return monotonically increasing values")
+struct machine {
+       unsigned int ncpus;
+       unsigned int nfixed;
+       unsigned int nconfig;
+};
+
+static void
+skip_if_unsupported(void)
+{
+       int r;
+       int supported = 0;
+       size_t supported_size = sizeof(supported);
+
+       r = sysctlbyname("kern.monotonic.supported", &supported, &supported_size,
+           NULL, 0);
+       if (r < 0) {
+               T_WITH_ERRNO;
+               T_SKIP("could not find \"kern.monotonic.supported\" sysctl");
+       }
+
+       if (!supported) {
+               T_SKIP("PMCs are not supported on this platform");
+       }
+}
+
+static struct rusage_info_v4 pre_ru = {};
+
+static void
+start_kpc(void)
+{
+       T_SETUPBEGIN;
+
+       kpc_classmask_t classes = KPC_CLASS_FIXED_MASK |
+           KPC_CLASS_CONFIGURABLE_MASK;
+       int ret = kpc_set_counting(classes);
+       T_ASSERT_POSIX_SUCCESS(ret, "started counting");
+
+       ret = proc_pid_rusage(getpid(), RUSAGE_INFO_V4, (rusage_info_t *)&pre_ru);
+       T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "got rusage information");
+
+       kpc_classmask_t classes_on = kpc_get_counting();
+       T_QUIET;
+       T_ASSERT_EQ(classes, classes_on, "classes counting is correct");
+
+       T_SETUPEND;
+}
+
+static void kpc_reset_atend(void);
+
+#if defined(__arm__) || defined(__arm64__)
+#define CYCLES_EVENT 0x02
+#else // defined(__arm__) || defined(__arm64__)
+#define CYCLES_EVENT (0x10000 | 0x20000 | 0x3c)
+#endif // !defined(__arm__) && !defined(__arm64__)
+
+static void
+prepare_kpc(struct machine *mch, bool config, bool reset)
 {
+       T_SETUPBEGIN;
+
+       if (!reset) {
+               T_ATEND(kpc_reset_atend);
+       }
+
+       size_t ncpus_sz = sizeof(mch->ncpus);
+       int ret = sysctlbyname("hw.logicalcpu_max", &mch->ncpus, &ncpus_sz,
+           NULL, 0);
+       T_QUIET;
+       T_ASSERT_POSIX_SUCCESS(ret, "sysctlbyname(hw.logicalcpu_max)");
+       T_QUIET;
+       T_ASSERT_GT(mch->ncpus, 0, "must have some number of CPUs");
+
+       ret = kpc_force_all_ctrs_set(1);
+       T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_force_all_ctrs_set(1)");
+
+       int forcing = 0;
+       ret = kpc_force_all_ctrs_get(&forcing);
+       T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_force_all_ctrs_get");
+       T_QUIET; T_ASSERT_EQ(forcing, 1, "counters must be forced");
+
+       mch->nfixed = kpc_get_counter_count(KPC_CLASS_FIXED_MASK);
+       mch->nconfig = kpc_get_counter_count(KPC_CLASS_CONFIGURABLE_MASK);
+
+       T_LOG("machine: ncpus = %d, nfixed = %d, nconfig = %d", mch->ncpus,
+           mch->nfixed, mch->nconfig);
+
+       if (config) {
+               uint32_t nconfigs = kpc_get_config_count(
+                   KPC_CLASS_CONFIGURABLE_MASK);
+               uint64_t *configs = calloc(nconfigs, sizeof(*configs));
+               T_QUIET; T_ASSERT_NOTNULL(configs, "allocated config words");
+
+               for (unsigned int i = 0; i < nconfigs; i++) {
+                       configs[i] = reset ? 0 : CYCLES_EVENT;
+               }
+
+               ret = kpc_set_config(KPC_CLASS_CONFIGURABLE_MASK, configs);
+               T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_config");
+       }
+
+       T_SETUPEND;
+}
+
+static void
+kpc_reset_atend(void)
+{
+       struct machine mch = {};
+       prepare_kpc(&mch, true, true);
+       uint64_t *periods = calloc(mch.nconfig, sizeof(*periods));
+       T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(periods, "allocate periods array");
+
+       int ret = kpc_set_period(KPC_CLASS_CONFIGURABLE_MASK, periods);
+       T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_period");
+       free(periods);
+}
+
+static void *
+spin(void *arg)
+{
+       while (*(volatile int *)arg == 0) {
+               ;
+       }
+
+       return NULL;
+}
+
+static pthread_t *
+start_threads(const struct machine *mch, void *(*func)(void *), void *arg)
+{
+       T_SETUPBEGIN;
+
+       pthread_t *threads = calloc((unsigned int)mch->ncpus,
+           sizeof(*threads));
+       T_QUIET; T_ASSERT_NOTNULL(threads, "allocated array of threads");
+       for (unsigned int i = 0; i < mch->ncpus; i++) {
+               int error = pthread_create(&threads[i], NULL, func, arg);
+               T_QUIET; T_ASSERT_POSIX_ZERO(error, "pthread_create");
+       }
+
+       T_SETUPEND;
+
+       return threads;
+}
+
+static void
+end_threads(const struct machine *mch, pthread_t *threads)
+{
+       for (unsigned int i = 0; i < mch->ncpus; i++) {
+               int error = pthread_join(threads[i], NULL);
+               T_QUIET; T_ASSERT_POSIX_ZERO(error, "joined thread %d", i);
+       }
+       free(threads);
+}
+
+struct tally {
+       uint64_t firstvalue;
+       uint64_t lastvalue;
+       uint64_t nchecks;
+       uint64_t nzero;
+       uint64_t nstuck;
+       uint64_t ndecrease;
+};
+
+static void
+check_counters(unsigned int ncpus, unsigned int nctrs, struct tally *tallies,
+               uint64_t *counts)
+{
+       for (unsigned int i = 0; i < ncpus; i++) {
+               for (unsigned int j = 0; j < nctrs; j++) {
+                       unsigned int ctr = i * nctrs + j;
+                       struct tally *tly = &tallies[ctr];
+                       uint64_t count = counts[ctr];
+
+                       if (counts[ctr] == 0) {
+                               tly->nzero++;
+                       }
+                       if (tly->lastvalue == count) {
+                               tly->nstuck++;
+                       }
+                       if (tly->lastvalue > count) {
+                               tly->ndecrease++;
+                       }
+                       tly->lastvalue = count;
+                       if (tly->nchecks == 0) {
+                               tly->firstvalue = count;
+                       }
+                       tly->nchecks++;
+               }
+       }
+}
 
+static void
+check_tally(const char *name, unsigned int ncpus, unsigned int nctrs,
+               struct tally *tallies)
+{
+       for (unsigned int i = 0; i < ncpus; i++) {
+               for (unsigned int j = 0; j < nctrs; j++) {
+                       unsigned int ctr = i * nctrs + j;
+                       struct tally *tly = &tallies[ctr];
+
+                       T_LOG("CPU %2u PMC %u: nchecks = %llu, last value = %llx, "
+                               "delta = %llu, nstuck = %llu", i, j,
+                           tly->nchecks, tly->lastvalue, tly->lastvalue - tly->firstvalue,
+                           tly->nstuck);
+                       T_QUIET; T_EXPECT_GT(tly->nchecks, 0ULL,
+                           "checked that CPU %d %s counter %d values", i, name, j);
+                       T_QUIET; T_EXPECT_EQ(tly->nzero, 0ULL,
+                           "CPU %d %s counter %d value was zero", i, name, j);
+                       T_QUIET; T_EXPECT_EQ(tly->nstuck, 0ULL,
+                           "CPU %d %s counter %d value was stuck", i, name, j);
+                       T_QUIET; T_EXPECT_EQ(tly->ndecrease, 0ULL,
+                           "CPU %d %s counter %d value decreased", i, name, j);
+               }
+       }
+}
+
+#define TESTDUR_NS (5 * NSEC_PER_SEC)
+
+T_DECL(kpc_cpu_direct_configurable,
+    "test that configurable counters return monotonically increasing values")
+{
+       skip_if_unsupported();
+
+       struct machine mch = {};
+       prepare_kpc(&mch, true, false);
+
+       int until = 0;
+       pthread_t *threads = start_threads(&mch, spin, &until);
+       start_kpc();
+
+       T_SETUPBEGIN;
+
+       uint64_t startns = clock_gettime_nsec_np(CLOCK_MONOTONIC);
+       uint64_t *counts = kpc_counterbuf_alloc();
+       T_QUIET; T_ASSERT_NOTNULL(counts, "allocated space for counter values");
+       memset(counts, 0, sizeof(*counts) * mch.ncpus * (mch.nfixed + mch.nconfig));
+       struct tally *tly = calloc(mch.ncpus * mch.nconfig, sizeof(*tly));
+       T_QUIET; T_ASSERT_NOTNULL(tly, "allocated space for tallies");
+
+       T_SETUPEND;
+
+       int n = 0;
+       while (clock_gettime_nsec_np(CLOCK_MONOTONIC) - startns < TESTDUR_NS) {
+               int ret = kpc_get_cpu_counters(true,
+                   KPC_CLASS_CONFIGURABLE_MASK, NULL, counts);
+               T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_get_cpu_counters");
+
+               check_counters(mch.ncpus, mch.nconfig, tly, counts);
+
+               usleep(10000);
+               n++;
+               if (n % 100 == 0) {
+                       T_LOG("checked 100 times");
+               }
+       }
+
+       check_tally("config", mch.ncpus, mch.nconfig, tly);
+
+       until = 1;
+       end_threads(&mch, threads);
+}
+
+T_DECL(kpc_thread_direct_instrs_cycles,
+    "test that fixed thread counters return monotonically increasing values")
+{
        int err;
        uint32_t ctrs_cnt;
        uint64_t *ctrs_a;
        uint64_t *ctrs_b;
 
+       skip_if_unsupported();
+
        T_SETUPBEGIN;
 
        ctrs_cnt = kpc_get_counter_count(KPC_CLASS_FIXED_MASK);
@@ -68,10 +340,227 @@ T_DECL(fixed_thread_counters,
        free(ctrs_b);
 }
 
+#define PMI_TEST_DURATION_NS (15 * NSEC_PER_SEC)
+#define PERIODIC_CPU_COUNT_MS (250)
+#define NTIMESLICES (72)
+#define PMI_PERIOD (50ULL * 1000 * 1000)
+#define END_EVENT KDBG_EVENTID(0xfe, 0xfe, 0)
+
+struct cpu {
+       uint64_t prev_count, max_skid;
+       unsigned int timeslices[NTIMESLICES];
+};
+
+T_DECL(kpc_pmi_configurable,
+    "test that PMIs don't interfere with sampling counters in kperf")
+{
+       skip_if_unsupported();
+
+       start_controlling_ktrace();
+       struct machine mch = {};
+       prepare_kpc(&mch, true, false);
+
+       T_SETUPBEGIN;
+
+       uint64_t *periods = calloc(mch.nconfig, sizeof(*periods));
+       T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(periods, "allocate periods array");
+       periods[0] = PMI_PERIOD;
+
+       int ret = kpc_set_period(KPC_CLASS_CONFIGURABLE_MASK, periods);
+       T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_period");
+       free(periods);
+
+       int32_t *actions = calloc(mch.nconfig, sizeof(*actions));
+       actions[0] = 1;
+       ret = kpc_set_actionid(KPC_CLASS_CONFIGURABLE_MASK, actions);
+       T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_actionid");
+       free(actions);
+
+       (void)kperf_action_count_set(1);
+       ret = kperf_action_samplers_set(1, KPERF_SAMPLER_TINFO);
+       T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kperf_action_samplers_set");
+
+       ktrace_config_t ktconfig = ktrace_config_create_current();
+       T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(ktconfig, "create current config");
+       ret = ktrace_config_print_description(ktconfig, stdout);
+       T_QUIET; T_ASSERT_POSIX_ZERO(ret, "print config description");
+
+       struct cpu *cpus = calloc(mch.ncpus, sizeof(*cpus));
+       T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(cpus, "allocate CPUs array");
+
+       __block unsigned int nsamples = 0;
+       __block uint64_t first_ns = 0;
+       __block uint64_t last_ns = 0;
+
+       ktrace_session_t sess = ktrace_session_create();
+       T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(sess, "ktrace_session_create");
+
+       ktrace_events_single(sess, PERF_KPC_PMI, ^(struct trace_point *tp) {
+               if (tp->debugid & DBG_FUNC_END) {
+                       return;
+               }
+
+               uint64_t cur_ns = 0;
+               int cret = ktrace_convert_timestamp_to_nanoseconds(sess,
+                   tp->timestamp, &cur_ns);
+               T_QUIET; T_ASSERT_POSIX_ZERO(cret, "convert timestamp");
+
+               uint64_t count = tp->arg2;
+               if (first_ns == 0) {
+                       first_ns = cur_ns;
+               }
+               struct cpu *cpu = &cpus[tp->cpuid];
+
+               if (cpu->prev_count != 0) {
+                       uint64_t delta = count - cpu->prev_count;
+                       T_QUIET; T_EXPECT_GT(delta, PMI_PERIOD,
+                           "counter delta should be greater than PMI period");
+                       uint64_t skid = delta - PMI_PERIOD;
+                       if (skid > cpu->max_skid) {
+                               cpu->max_skid = skid;
+                       }
+               }
+               cpu->prev_count = count;
+
+               double slice = (double)(cur_ns - first_ns) / PMI_TEST_DURATION_NS *
+                   NTIMESLICES;
+               if (slice < NTIMESLICES) {
+                       cpu->timeslices[(unsigned int)slice] += 1;
+               }
+
+               nsamples++;
+       });
+
+       ktrace_events_single(sess, END_EVENT, ^(struct trace_point *tp __unused) {
+               int cret = ktrace_convert_timestamp_to_nanoseconds(sess,
+                   tp->timestamp, &last_ns);
+               T_QUIET; T_ASSERT_POSIX_ZERO(cret, "convert timestamp");
+
+               ktrace_end(sess, 1);
+       });
+
+       uint64_t *counts = kpc_counterbuf_alloc();
+       T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(counts,
+                       "allocated counter values array");
+       memset(counts, 0, sizeof(*counts) * mch.ncpus * (mch.nfixed + mch.nconfig));
+       struct tally *tly = calloc(mch.ncpus * (mch.nconfig + mch.nfixed),
+                       sizeof(*tly));
+       T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(tly, "allocated tallies array");
+
+       dispatch_source_t cpu_count_timer = dispatch_source_create(
+                       DISPATCH_SOURCE_TYPE_TIMER, 0, 0, dispatch_get_main_queue());
+    dispatch_source_set_timer(cpu_count_timer, dispatch_time(DISPATCH_TIME_NOW,
+        PERIODIC_CPU_COUNT_MS * NSEC_PER_MSEC),
+        PERIODIC_CPU_COUNT_MS * NSEC_PER_MSEC, 0);
+    dispatch_source_set_cancel_handler(cpu_count_timer, ^{
+        dispatch_release(cpu_count_timer);
+    });
+
+    __block uint64_t first_check_ns = 0;
+    __block uint64_t last_check_ns = 0;
+
+    dispatch_source_set_event_handler(cpu_count_timer, ^{
+               int cret = kpc_get_cpu_counters(true,
+                   KPC_CLASS_FIXED_MASK | KPC_CLASS_CONFIGURABLE_MASK, NULL, counts);
+               T_QUIET; T_ASSERT_POSIX_SUCCESS(cret, "kpc_get_cpu_counters");
+
+               if (!first_check_ns) {
+                       first_check_ns = clock_gettime_nsec_np(CLOCK_MONOTONIC);
+               } else {
+                       last_check_ns = clock_gettime_nsec_np(CLOCK_MONOTONIC);
+               }
+               check_counters(mch.ncpus, mch.nfixed + mch.nconfig, tly, counts);
+       });
+
+       int stop = 0;
+       (void)start_threads(&mch, spin, &stop);
+
+       ktrace_set_completion_handler(sess, ^{
+               dispatch_cancel(cpu_count_timer);
+
+               check_tally("config", mch.ncpus, mch.nfixed + mch.nconfig, tly);
+
+               struct rusage_info_v4 post_ru = {};
+               int ruret = proc_pid_rusage(getpid(), RUSAGE_INFO_V4,
+                               (rusage_info_t *)&post_ru);
+               T_QUIET; T_ASSERT_POSIX_SUCCESS(ruret, "got rusage information");
+
+               T_LOG("saw %llu cycles in process", post_ru.ri_cycles - pre_ru.ri_cycles);
+               uint64_t total = 0;
+
+               unsigned int nsamplecpus = 0;
+               char sample_slices[NTIMESLICES + 1];
+               sample_slices[NTIMESLICES] = '\0';
+               for (unsigned int i = 0; i < mch.ncpus; i++) {
+                       memset(sample_slices, '.', sizeof(sample_slices) - 1);
+
+                       struct cpu *cpu = &cpus[i];
+                       unsigned int nsampleslices = 0, ncpusamples = 0,
+                                       last_contiguous = 0;
+                       bool seen_empty = false;
+                       for (unsigned int j = 0; j < NTIMESLICES; j++) {
+                               unsigned int nslice = cpu->timeslices[j];
+                               nsamples += nslice;
+                               ncpusamples += nslice;
+                               if (nslice > 0) {
+                                       nsampleslices++;
+                                       sample_slices[j] = '*';
+                               } else {
+                                       seen_empty = true;
+                               }
+                               if (!seen_empty) {
+                                       last_contiguous = j;
+                               }
+                       }
+                       unsigned int ctr = i * (mch.nfixed + mch.nconfig) + mch.nfixed;
+                       uint64_t delta = tly[ctr].lastvalue - tly[ctr].firstvalue;
+                       T_LOG("%g GHz", (double)delta / (last_check_ns - first_check_ns));
+                       total += delta;
+                       T_LOG("CPU %2u: %4u/%u, %6u/%llu, max skid = %llu (%.1f%%), "
+                                       "last contiguous = %u", i,
+                                       nsampleslices, NTIMESLICES, ncpusamples, delta / PMI_PERIOD,
+                                       cpu->max_skid, (double)cpu->max_skid / PMI_PERIOD * 100,
+                                       last_contiguous);
+                       T_LOG("%s", sample_slices);
+                       if (nsampleslices > 0) {
+                               nsamplecpus++;
+                       }
+                       T_EXPECT_EQ(last_contiguous, NTIMESLICES - 1,
+                                       "CPU %2u: saw samples in each time slice", i);
+               }
+               T_LOG("kpc reported %llu total cycles", total);
+               T_LOG("saw %u sample events, across %u/%u cpus", nsamples, nsamplecpus,
+                               mch.ncpus);
+               T_END;
+       });
+
+       int dbglvl = 3;
+       ret = sysctlbyname("kperf.debug_level", NULL, NULL, &dbglvl,
+           sizeof(dbglvl));
+       T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "set kperf debug level");
+       ret = kperf_sample_set(1);
+       T_ASSERT_POSIX_SUCCESS(ret, "kperf_sample_set");
+
+       start_kpc();
+
+       int error = ktrace_start(sess, dispatch_get_main_queue());
+       T_ASSERT_POSIX_ZERO(error, "started tracing");
+
+       dispatch_after(dispatch_time(DISPATCH_TIME_NOW, PMI_TEST_DURATION_NS),
+                       dispatch_get_main_queue(), ^{
+               T_LOG("ending tracing after timeout");
+               kdebug_trace(END_EVENT, 0, 0, 0, 0);
+       });
+
+       dispatch_activate(cpu_count_timer);
+
+       T_SETUPEND;
+
+       dispatch_main();
+}
+
 #if defined(__arm64__)
-/*
- * This policy only applies to arm64 devices.
- */
+// This policy only applies to arm64 devices.
 
 static int g_prev_disablewl = 0;
 
@@ -87,7 +576,7 @@ whitelist_atend(void)
 
 T_DECL(whitelist, "ensure kpc's whitelist is filled out")
 {
-       /* Start enforcing the whitelist. */
+       // Start enforcing the whitelist.
        int set = 0;
        size_t getsz = sizeof(g_prev_disablewl);
        int ret = sysctlbyname("kpc.disable_whitelist", &g_prev_disablewl, &getsz,
@@ -102,31 +591,29 @@ T_DECL(whitelist, "ensure kpc's whitelist is filled out")
        uint32_t nconfigs = kpc_get_config_count(KPC_CLASS_CONFIGURABLE_MASK);
        uint64_t *config = calloc(nconfigs, sizeof(*config));
 
-       /*
-        * Check that events in the whitelist are allowed.  CORE_CYCLE (0x2) is
-        * always present in the whitelist.
-        */
+       // Check that events in the whitelist are allowed.  CORE_CYCLE (0x2) is
+       // always present in the whitelist.
        config[0] = 0x02;
        ret = kpc_set_config(KPC_CLASS_CONFIGURABLE_MASK, config);
        T_ASSERT_POSIX_SUCCESS(ret, "configured kpc to count cycles");
 
-       /* Check that non-event bits are ignored by the whitelist. */
+       // Check that non-event bits are ignored by the whitelist.
        config[0] = 0x102;
        ret = kpc_set_config(KPC_CLASS_CONFIGURABLE_MASK, config);
        T_ASSERT_POSIX_SUCCESS(ret,
            "configured kpc to count cycles with non-event bits set");
 
-       /* Check that configurations of non-whitelisted events fail. */
+       // Check that configurations of non-whitelisted events fail.
        config[0] = 0xfe;
        ret = kpc_set_config(KPC_CLASS_CONFIGURABLE_MASK, config);
        T_ASSERT_POSIX_FAILURE(ret, EPERM,
            "shouldn't allow arbitrary events with whitelist enabled");
 
-       /* Clean up the configuration. */
+       // Clean up the configuration.
        config[0] = 0;
        (void)kpc_set_config(KPC_CLASS_CONFIGURABLE_MASK, config);
 
        free(config);
 }
 
-#endif /* defined(__arm64__) */
+#endif // defined(__arm64__)
index 29ceeab7d2788f5d77bb8778b44027b4504a8323..c74b9671a6c4270d09540c70ef357ee50067fa30 100644 (file)
@@ -52,6 +52,7 @@ spinning_thread(void *semp)
 #define PERF_KPC_REG    KDBG_EVENTID(DBG_PERF, 6, 5)
 #define PERF_KPC_REG32  KDBG_EVENTID(DBG_PERF, 6, 7)
 #define PERF_INSTR_DATA KDBG_EVENTID(DBG_PERF, 1, 17)
+#define PERF_EVENT      KDBG_EVENTID(DBG_PERF, 0, 0)
 
 #define SCHED_HANDOFF KDBG_EVENTID(DBG_MACH, DBG_MACH_SCHED, \
                MACH_STACK_HANDOFF)
index 466f3d9a7739bb9dc334409cc156982dc8d0d073..b31cc4dad9572033998760065053ef6db9d548d8 100644 (file)
@@ -5,4 +5,7 @@
 
 void configure_kperf_stacks_timer(pid_t pid, unsigned int period_ms);
 
+#define PERF_SAMPLE KDBG_EVENTID(DBG_PERF, 0, 0)
+#define PERF_KPC_PMI KDBG_EVENTID(DBG_PERF, 6, 0)
+
 #endif /* !defined(KPERF_HELPERS_H) */
diff --git a/tests/launchd_plists/com.apple.xnu.test.task_create_suid_cred.plist b/tests/launchd_plists/com.apple.xnu.test.task_create_suid_cred.plist
new file mode 100644 (file)
index 0000000..463d039
--- /dev/null
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+       <key>Label</key>
+       <string>com.apple.xnu.test.task_create_suid_cred</string>
+       <key>MachServices</key>
+       <dict>
+               <key>com.apple.xnu.test.task_create_suid_cred</key>
+               <true/>
+       </dict>
+       <key>ThrottleInterval</key>
+       <integer>1</integer>
+       <key>UserName</key>
+       <string>root</string>
+       <key>ProcessType</key>
+       <string>Adaptive</string>
+       <key>EnvironmentVariables</key>
+       <dict>
+               <key>MallocNanoZone</key>
+               <string>1</string>
+       </dict>
+</dict>
+</plist>
index c9399519aaccd50b047673a31b85dd1246cd0673..471312f805abb443a6ccb51be05e63d7a97258fa 100644 (file)
@@ -2,6 +2,7 @@
 #include <signal.h>
 #include <sys/sysctl.h>
 #include <sys/kern_memorystatus.h>
+#include <time.h>
 #include <mach-o/dyld.h>
 #include <mach/mach_vm.h>
 #include <mach/vm_page_size.h>  /* Needed for vm_region info */
@@ -36,7 +37,7 @@ T_GLOBAL_META(
        X(MEMORYSTATUS_CONTROL_FAILED) \
        X(IS_FREEZABLE_NOT_AS_EXPECTED) \
        X(MEMSTAT_PRIORITY_CHANGE_FAILED) \
-    X(INVALID_ALLOCATE_PAGES_ARGUMENTS) \
+       X(INVALID_ALLOCATE_PAGES_ARGUMENTS) \
        X(EXIT_CODE_MAX)
 
 #define EXIT_CODES_ENUM(VAR) VAR,
@@ -599,6 +600,7 @@ memorystatus_assertion_test_demote_frozen()
        /* these values will remain fixed during testing */
        int             active_limit_mb = 15;   /* arbitrary */
        int             inactive_limit_mb = 7;  /* arbitrary */
+       int             demote_value = 1;
        /* Launch the child process, and elevate its priority */
        int requestedpriority;
        dispatch_source_t ds_signal, ds_exit;
@@ -613,8 +615,8 @@ memorystatus_assertion_test_demote_frozen()
                /* Freeze the process, trigger agressive demotion, and check that it hasn't been demoted. */
                freeze_process(child_pid);
                /* Agressive demotion */
-               sysctl_ret = sysctlbyname("kern.memorystatus_demote_frozen_processes", NULL, NULL, NULL, 0);
-               T_ASSERT_POSIX_SUCCESS(sysctl_ret, "sysctl kern.memorystatus_demote_frozen_processes failed");
+               sysctl_ret = sysctlbyname("kern.memorystatus_demote_frozen_processes", NULL, NULL, &demote_value, sizeof(demote_value));
+               T_QUIET; T_ASSERT_POSIX_SUCCESS(sysctl_ret, "sysctl kern.memorystatus_demote_frozen_processes succeeded");
                /* Check */
                (void)check_properties(child_pid, requestedpriority, inactive_limit_mb, 0x0, ASSERTION_STATE_IS_SET, "Priority was set");
                T_LOG("Relinquishing our assertion.");
@@ -622,7 +624,7 @@ memorystatus_assertion_test_demote_frozen()
                relinquish_assertion_priority(child_pid, 0x0);
                (void)check_properties(child_pid, JETSAM_PRIORITY_AGING_BAND2, inactive_limit_mb, 0x0, ASSERTION_STATE_IS_RELINQUISHED, "Assertion was reqlinquished.");
                /* Kill the child */
-               T_QUIET; T_ASSERT_POSIX_SUCCESS(kill(child_pid, SIGKILL), "Unable to kill child process");
+               T_QUIET; T_ASSERT_POSIX_SUCCESS(kill(child_pid, SIGKILL), "Killed child process");
                T_END;
        });
 
@@ -650,3 +652,41 @@ memorystatus_assertion_test_demote_frozen()
 T_DECL(assertion_test_demote_frozen, "demoted frozen process goes to asserted priority.", T_META_ASROOT(true)) {
        memorystatus_assertion_test_demote_frozen();
 }
+
+T_DECL(budget_replenishment, "budget replenishes properly") {
+       size_t length;
+       int ret;
+       static unsigned int kTestIntervalSecs = 60 * 60 * 32; // 32 Hours
+       unsigned int memorystatus_freeze_daily_mb_max, memorystatus_freeze_daily_pages_max;
+       static unsigned int kFixedPointFactor = 100;
+       static unsigned int kNumSecondsInDay = 60 * 60 * 24;
+       unsigned int new_budget, expected_new_budget_pages;
+       size_t new_budget_ln;
+       unsigned int page_size = (unsigned int) get_vmpage_size();
+
+       /*
+        * Calculate a new budget as if the previous interval expired kTestIntervalSecs
+        * ago and we used up its entire budget.
+        */
+       length = sizeof(kTestIntervalSecs);
+       new_budget_ln = sizeof(new_budget);
+       ret = sysctlbyname("vm.memorystatus_freeze_calculate_new_budget", &new_budget, &new_budget_ln, &kTestIntervalSecs, length);
+       T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "vm.memorystatus_freeze_calculate_new_budget");
+
+       // Grab the daily budget.
+       length = sizeof(memorystatus_freeze_daily_mb_max);
+       ret = sysctlbyname("kern.memorystatus_freeze_daily_mb_max", &memorystatus_freeze_daily_mb_max, &length, NULL, 0);
+       T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kern.memorystatus_freeze_daily_mb_max");
+
+       memorystatus_freeze_daily_pages_max = memorystatus_freeze_daily_mb_max * 1024 * 1024 / page_size;
+
+       /*
+        * We're kTestIntervalSecs past a new interval. Which means we are owed kNumSecondsInDay
+        * seconds of budget.
+        */
+       expected_new_budget_pages = memorystatus_freeze_daily_pages_max;
+       expected_new_budget_pages += ((kTestIntervalSecs * kFixedPointFactor) / (kNumSecondsInDay)
+           * memorystatus_freeze_daily_pages_max) / kFixedPointFactor;
+
+       T_QUIET; T_ASSERT_EQ(new_budget, expected_new_budget_pages, "Calculate new budget behaves correctly.");
+}
diff --git a/tests/net_bridge.c b/tests/net_bridge.c
new file mode 100644 (file)
index 0000000..54ad5b6
--- /dev/null
@@ -0,0 +1,3587 @@
+/*
+ * Copyright (c) 2019 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * net_bridge.c
+ * - test if_bridge.c functionality
+ */
+
+#include <darwintest.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <sys/event.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include <netinet/bootp.h>
+#include <netinet/tcp.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <net/if_arp.h>
+#include <net/bpf.h>
+#include <net/if_bridgevar.h>
+#include <net/if_fake_var.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <errno.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <TargetConditionals.h>
+#include <darwintest_utils.h>
+#include "bpflib.h"
+#include "in_cksum.h"
+
+static bool S_debug;
+static bool S_cleaning_up;
+
+#define ALL_ADDRS (uint32_t)(-1)
+
+#define DHCP_PAYLOAD_MIN        sizeof(struct bootp)
+#define DHCP_FLAGS_BROADCAST    ((u_short)0x8000)
+
+typedef union {
+       char            bytes[DHCP_PAYLOAD_MIN];
+       /* force 4-byte alignment */
+       uint32_t        words[DHCP_PAYLOAD_MIN / sizeof(uint32_t)];
+} dhcp_min_payload, *dhcp_min_payload_t;
+
+#define ETHER_PKT_LEN           (ETHER_HDR_LEN + ETHERMTU)
+typedef union {
+       char            bytes[ETHER_PKT_LEN];
+       /* force 4-byte aligment */
+       uint32_t        words[ETHER_PKT_LEN / sizeof(uint32_t)];
+} ether_packet, *ether_packet_t;
+
+typedef struct {
+       struct ip       ip;
+       struct udphdr   udp;
+} ip_udp_header_t;
+
+typedef struct {
+       struct in_addr  src_ip;
+       struct in_addr  dst_ip;
+       char            zero;
+       char            proto;
+       unsigned short  length;
+} udp_pseudo_hdr_t;
+
+typedef struct {
+       struct ip       ip;
+       struct tcphdr   tcp;
+} ip_tcp_header_t;
+
+typedef union {
+       ip_udp_header_t udp;
+       ip_tcp_header_t tcp;
+} ip_udp_tcp_header_u;
+
+typedef struct {
+       struct in_addr  src_ip;
+       struct in_addr  dst_ip;
+       char            zero;
+       char            proto;
+       unsigned short  length;
+} tcp_pseudo_hdr_t;
+
+typedef struct {
+       struct ip6_hdr  ip6;
+       struct udphdr   udp;
+} ip6_udp_header_t;
+
+typedef struct {
+       struct in6_addr src_ip;
+       struct in6_addr dst_ip;
+       char            zero;
+       char            proto;
+       unsigned short  length;
+} udp6_pseudo_hdr_t;
+
+typedef struct {
+       char            ifname[IFNAMSIZ];
+       char            member_ifname[IFNAMSIZ]; /* member of bridge */
+       ether_addr_t    member_mac;
+       int             fd;
+       u_int           unit;
+       u_int           num_addrs;
+       void *          rx_buf;
+       int             rx_buf_size;
+       bool            mac_nat;
+
+       u_int           test_count;
+       u_int           test_address_count;
+       uint64_t        test_address_present;
+} switch_port, *switch_port_t;
+
+typedef struct {
+       u_int           size;
+       u_int           count;
+       bool            mac_nat;
+       switch_port     list[1];
+} switch_port_list, * switch_port_list_t;
+
+static struct ifbareq *
+bridge_rt_table_copy(u_int * ret_count);
+
+static void
+bridge_rt_table_log(struct ifbareq *rt_table, u_int count);
+
+static struct ifbrmne *
+bridge_mac_nat_entries_copy(u_int * ret_count);
+
+static void
+bridge_mac_nat_entries_log(struct ifbrmne * entries, u_int count);
+
+static void
+system_cmd(const char *cmd, bool fail_on_error);
+
+static int
+inet_dgram_socket(void)
+{
+       int     s;
+
+       s = socket(AF_INET, SOCK_DGRAM, 0);
+       T_QUIET;
+       T_ASSERT_POSIX_SUCCESS(s, "socket(AF_INET, SOCK_DGRAM, 0)");
+       return s;
+}
+
+
+/**
+** Packet creation/display
+**/
+#define BOOTP_SERVER_PORT       67
+#define BOOTP_CLIENT_PORT       68
+
+#define TEST_SOURCE_PORT        14
+#define TEST_DEST_PORT          15
+
+#define EA_UNIT_INDEX           4
+#define EA_ADDR_INDEX           5
+
+static void
+set_ethernet_address(ether_addr_t *eaddr, u_int unit, u_int addr_index)
+{
+       u_char  *a = eaddr->octet;
+
+       a[0] = 0x02;
+       a[2] = 0x00;
+       a[3] = 0x00;
+       a[1] = 0x00;
+       a[EA_UNIT_INDEX] = (u_char)unit;
+       a[EA_ADDR_INDEX] = (u_char)addr_index;
+}
+
+#define TEN_NET                 0x0a000000
+#define TEN_1_NET               (TEN_NET | 0x010000)
+
+static void
+get_ipv4_address(u_int unit, u_int addr_index, struct in_addr *ip)
+{
+       /* up to 255 units, 255 addresses */
+       ip->s_addr = htonl(TEN_1_NET | (unit << 8) | addr_index);
+       return;
+}
+
+#define IN6ADDR_ULA_INIT \
+       {{{ 0xfd, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+           0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}}
+
+static struct in6_addr ula_address = IN6ADDR_ULA_INIT;
+
+#define ULA_UNIT_INDEX  14
+#define ULA_ADDR_INDEX  15
+
+static void
+get_ipv6_address(u_int unit, u_int addr_index, struct in6_addr *ip)
+{
+       *ip = ula_address;
+       /* up to 255 units, 255 addresses */
+       ip->s6_addr[ULA_UNIT_INDEX] = (uint8_t)unit;
+       ip->s6_addr[ULA_ADDR_INDEX] = (uint8_t)addr_index;
+}
+
+
+static void
+get_ip_address(uint8_t af, u_int unit, u_int addr_index, union ifbrip *ip)
+{
+       switch (af) {
+       case AF_INET:
+               get_ipv4_address(unit, addr_index, &ip->ifbrip_addr);
+               break;
+       case AF_INET6:
+               get_ipv6_address(unit, addr_index, &ip->ifbrip_addr6);
+               break;
+       default:
+               T_FAIL("unrecognized address family %u", af);
+               break;
+       }
+}
+
+static bool
+ip_addresses_are_equal(uint8_t af, union ifbrip * ip1, union ifbrip * ip2)
+{
+       bool    equal;
+
+       switch (af) {
+       case AF_INET:
+               equal = (ip1->ifbrip_addr.s_addr == ip2->ifbrip_addr.s_addr);
+               break;
+       case AF_INET6:
+               equal = IN6_ARE_ADDR_EQUAL(&ip1->ifbrip_addr6,
+                   &ip2->ifbrip_addr6);
+               break;
+       default:
+               T_FAIL("unrecognized address family %u", af);
+               equal = false;
+               break;
+       }
+       return equal;
+}
+
+static ether_addr_t ether_broadcast = {
+       { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }
+};
+
+static ether_addr_t ether_external = {
+       { 0x80, 0x00, 0x00, 0x00, 0x00, 0x01 }
+};
+
+static inline struct in_addr
+get_external_ipv4_address(void)
+{
+       struct in_addr  ip;
+
+       /* IP 10.1.255.1 */
+       ip.s_addr = htonl(TEN_1_NET | 0xff01);
+       return ip;
+}
+
+static inline void
+get_external_ip_address(uint8_t af, union ifbrip * ip)
+{
+       switch (af) {
+       case AF_INET:
+               /* IP 10.1.255.1 */
+               ip->ifbrip_addr = get_external_ipv4_address();
+               break;
+       case AF_INET6:
+               /* fd80::1 */
+               ip->ifbrip_addr6 = ula_address;
+               ip->ifbrip_addr6.s6_addr[1] = 0x80;
+               ip->ifbrip_addr6.s6_addr[15] = 0x01;
+               break;
+       default:
+               T_FAIL("unrecognized address family %u", af);
+               break;
+       }
+}
+
+static inline void
+get_broadcast_ip_address(uint8_t af, union ifbrip * ip)
+{
+       switch (af) {
+       case AF_INET:
+               ip->ifbrip_addr.s_addr = INADDR_BROADCAST;
+               break;
+       case AF_INET6:
+               /* 0xff0e::0 linklocal scope multicast */
+               ip->ifbrip_addr6 = in6addr_any;
+               ip->ifbrip_addr6.s6_addr[0] = 0xff;
+               ip->ifbrip_addr6.s6_addr[1] = __IPV6_ADDR_SCOPE_LINKLOCAL;
+               break;
+       default:
+               T_FAIL("unrecognized address family %u", af);
+               break;
+       }
+}
+
+
+#define ETHER_NTOA_BUFSIZE      (ETHER_ADDR_LEN * 3)
+static const char *
+ether_ntoa_buf(const ether_addr_t *n, char * buf, int buf_size)
+{
+       char *  str;
+
+       str = ether_ntoa(n);
+       strlcpy(buf, str, buf_size);
+       return buf;
+}
+
+static const char *
+inet_ptrtop(int af, const void * ptr, char * buf, socklen_t buf_size)
+{
+       union {
+               struct in_addr  ip;
+               struct in6_addr ip6;
+       } u;
+
+       switch (af) {
+       case AF_INET:
+               bcopy(ptr, &u.ip, sizeof(u.ip));
+               break;
+       case AF_INET6:
+               bcopy(ptr, &u.ip6, sizeof(u.ip6));
+               break;
+       default:
+               return NULL;
+       }
+       return inet_ntop(af, &u, buf, buf_size);
+}
+
+static __inline__ char *
+arpop_name(u_int16_t op)
+{
+       switch (op) {
+       case ARPOP_REQUEST:
+               return "ARP REQUEST";
+       case ARPOP_REPLY:
+               return "ARP REPLY";
+       case ARPOP_REVREQUEST:
+               return "REVARP REQUEST";
+       case ARPOP_REVREPLY:
+               return "REVARP REPLY";
+       default:
+               break;
+       }
+       return "<unknown>";
+}
+
+static void
+arp_frame_validate(const struct ether_arp * earp, u_int len, bool dump)
+{
+       const struct arphdr *   arp_p;
+       int                     arphrd;
+       char                    buf_sender_ether[ETHER_NTOA_BUFSIZE];
+       char                    buf_sender_ip[INET_ADDRSTRLEN];
+       char                    buf_target_ether[ETHER_NTOA_BUFSIZE];
+       char                    buf_target_ip[INET_ADDRSTRLEN];
+
+       T_QUIET;
+       T_ASSERT_GE(len, (u_int)sizeof(*earp),
+           "%s ARP packet size %u need %u",
+           __func__, len, (u_int)sizeof(*earp));
+       if (!dump) {
+               return;
+       }
+       arp_p = &earp->ea_hdr;
+       arphrd = ntohs(arp_p->ar_hrd);
+       T_LOG("%s type=0x%x proto=0x%x", arpop_name(ntohs(arp_p->ar_op)),
+           arphrd, ntohs(arp_p->ar_pro));
+       if (arp_p->ar_hln == sizeof(earp->arp_sha)) {
+               ether_ntoa_buf((const ether_addr_t *)earp->arp_sha,
+                   buf_sender_ether,
+                   sizeof(buf_sender_ether));
+               ether_ntoa_buf((const ether_addr_t *)earp->arp_tha,
+                   buf_target_ether,
+                   sizeof(buf_target_ether));
+               T_LOG("Sender H/W\t%s", buf_sender_ether);
+               T_LOG("Target H/W\t%s", buf_target_ether);
+       }
+       inet_ptrtop(AF_INET, earp->arp_spa,
+           buf_sender_ip, sizeof(buf_sender_ip));
+       inet_ptrtop(AF_INET, earp->arp_tpa,
+           buf_target_ip, sizeof(buf_target_ip));
+       T_LOG("Sender IP\t%s", buf_sender_ip);
+       T_LOG("Target IP\t%s", buf_target_ip);
+       return;
+}
+
+static void
+ip_frame_validate(const void * buf, u_int buf_len, bool dump)
+{
+       char                    buf_dst[INET_ADDRSTRLEN];
+       char                    buf_src[INET_ADDRSTRLEN];
+       const ip_udp_header_t * ip_udp;
+       u_int                   ip_len;
+
+       T_QUIET;
+       T_ASSERT_GE(buf_len, (u_int)sizeof(struct ip), NULL);
+       ip_udp = (const ip_udp_header_t *)buf;
+       ip_len = ntohs(ip_udp->ip.ip_len);
+       inet_ptrtop(AF_INET, &ip_udp->ip.ip_src,
+           buf_src, sizeof(buf_src));
+       inet_ptrtop(AF_INET, &ip_udp->ip.ip_dst,
+           buf_dst, sizeof(buf_dst));
+       if (dump) {
+               T_LOG("ip src %s dst %s len %u id %d",
+                   buf_src, buf_dst, ip_len,
+                   ntohs(ip_udp->ip.ip_id));
+       }
+       T_QUIET;
+       T_ASSERT_GE(buf_len, ip_len, NULL);
+       T_QUIET;
+       T_ASSERT_EQ(ip_udp->ip.ip_v, IPVERSION, NULL);
+       T_QUIET;
+       T_ASSERT_EQ((u_int)(ip_udp->ip.ip_hl << 2),
+           (u_int)sizeof(struct ip), NULL);
+       if (ip_udp->ip.ip_p == IPPROTO_UDP) {
+               u_int   udp_len;
+               u_int   data_len;
+
+               T_QUIET;
+               T_ASSERT_GE(buf_len, (u_int)sizeof(*ip_udp), NULL);
+               udp_len = ntohs(ip_udp->udp.uh_ulen);
+               T_QUIET;
+               T_ASSERT_GE(udp_len, (u_int)sizeof(ip_udp->udp), NULL);
+               data_len = udp_len - (u_int)sizeof(ip_udp->udp);
+               if (dump) {
+                       T_LOG("udp src 0x%x dst 0x%x len %u"
+                           " csum 0x%x datalen %u",
+                           ntohs(ip_udp->udp.uh_sport),
+                           ntohs(ip_udp->udp.uh_dport),
+                           udp_len,
+                           ntohs(ip_udp->udp.uh_sum),
+                           data_len);
+               }
+       }
+}
+
+static void
+ip6_frame_validate(const void * buf, u_int buf_len, bool dump)
+{
+       char                    buf_dst[INET6_ADDRSTRLEN];
+       char                    buf_src[INET6_ADDRSTRLEN];
+       const struct ip6_hdr *  ip6;
+       u_int                   ip6_len;
+
+       T_QUIET;
+       T_ASSERT_GE(buf_len, (u_int)sizeof(struct ip6_hdr), NULL);
+       ip6 = (const struct ip6_hdr *)buf;
+       ip6_len = ntohs(ip6->ip6_plen);
+       inet_ptrtop(AF_INET6, &ip6->ip6_src, buf_src, sizeof(buf_src));
+       inet_ptrtop(AF_INET6, &ip6->ip6_dst, buf_dst, sizeof(buf_dst));
+       if (dump) {
+               T_LOG("ip6 src %s dst %s len %u", buf_src, buf_dst, ip6_len);
+       }
+       T_QUIET;
+       T_ASSERT_GE(buf_len, ip6_len + (u_int)sizeof(struct ip6_hdr), NULL);
+       T_QUIET;
+       T_ASSERT_EQ((ip6->ip6_vfc & IPV6_VERSION_MASK),
+           IPV6_VERSION, NULL);
+       T_QUIET;
+       switch (ip6->ip6_nxt) {
+       case IPPROTO_UDP: {
+               u_int                   data_len;
+               const ip6_udp_header_t *ip6_udp;
+               u_int                   udp_len;
+
+               ip6_udp = (const ip6_udp_header_t *)buf;
+               T_QUIET;
+               T_ASSERT_GE(buf_len, (u_int)sizeof(*ip6_udp), NULL);
+               udp_len = ntohs(ip6_udp->udp.uh_ulen);
+               T_QUIET;
+               T_ASSERT_GE(udp_len, (u_int)sizeof(ip6_udp->udp), NULL);
+               data_len = udp_len - (u_int)sizeof(ip6_udp->udp);
+               if (dump) {
+                       T_LOG("udp src 0x%x dst 0x%x len %u"
+                           " csum 0x%x datalen %u",
+                           ntohs(ip6_udp->udp.uh_sport),
+                           ntohs(ip6_udp->udp.uh_dport),
+                           udp_len,
+                           ntohs(ip6_udp->udp.uh_sum),
+                           data_len);
+               }
+               break;
+       }
+       case IPPROTO_ICMPV6: {
+               const struct icmp6_hdr *icmp6;
+               u_int                   icmp6_len;
+
+               icmp6_len = buf_len - sizeof(*ip6);
+               T_QUIET;
+               T_ASSERT_GE(buf_len, icmp6_len, NULL);
+               icmp6 = (const struct icmp6_hdr *)(ip6 + 1);
+               switch (icmp6->icmp6_type) {
+               case ND_NEIGHBOR_SOLICIT:
+                       if (dump) {
+                               T_LOG("neighbor solicit");
+                       }
+                       break;
+               case ND_NEIGHBOR_ADVERT:
+                       if (dump) {
+                               T_LOG("neighbor advert");
+                       }
+                       break;
+               case ND_ROUTER_SOLICIT:
+                       if (dump) {
+                               T_LOG("router solicit");
+                       }
+                       break;
+               default:
+                       if (dump) {
+                               T_LOG("icmp6 code 0x%x", icmp6->icmp6_type);
+                       }
+                       break;
+               }
+               break;
+       }
+       default:
+               break;
+       }
+}
+
+static void
+ethernet_frame_validate(const void * buf, u_int buf_len, bool dump)
+{
+       char                    ether_dst[ETHER_NTOA_BUFSIZE];
+       char                    ether_src[ETHER_NTOA_BUFSIZE];
+       uint16_t                ether_type;
+       const ether_header_t *  eh_p;
+
+       T_QUIET;
+       T_ASSERT_GE(buf_len, (u_int)sizeof(*eh_p), NULL);
+       eh_p = (const ether_header_t *)buf;
+       ether_type = ntohs(eh_p->ether_type);
+       ether_ntoa_buf((const ether_addr_t *)&eh_p->ether_dhost,
+           ether_dst, sizeof(ether_dst));
+       ether_ntoa_buf((const ether_addr_t *)&eh_p->ether_shost,
+           ether_src, sizeof(ether_src));
+       if (dump) {
+               T_LOG("ether dst %s src %s type 0x%x",
+                   ether_dst, ether_src, ether_type);
+       }
+       switch (ether_type) {
+       case ETHERTYPE_IP:
+               ip_frame_validate(eh_p + 1, (u_int)(buf_len - sizeof(*eh_p)),
+                   dump);
+               break;
+       case ETHERTYPE_ARP:
+               arp_frame_validate((const struct ether_arp *)(eh_p + 1),
+                   (u_int)(buf_len - sizeof(*eh_p)),
+                   dump);
+               break;
+       case ETHERTYPE_IPV6:
+               ip6_frame_validate(eh_p + 1, (u_int)(buf_len - sizeof(*eh_p)),
+                   dump);
+               break;
+       default:
+               T_FAIL("unrecognized ethertype 0x%x", ether_type);
+               break;
+       }
+}
+
+static u_int
+ethernet_udp4_frame_populate(void * buf, size_t buf_len,
+    const ether_addr_t * src,
+    struct in_addr src_ip,
+    uint16_t src_port,
+    const ether_addr_t * dst,
+    struct in_addr dst_ip,
+    uint16_t dst_port,
+    const void * data, u_int data_len)
+{
+       ether_header_t *        eh_p;
+       u_int                   frame_length;
+       static int              ip_id;
+       ip_udp_header_t *       ip_udp;
+       char *                  payload;
+       udp_pseudo_hdr_t *      udp_pseudo;
+
+       frame_length = (u_int)(sizeof(*eh_p) + sizeof(*ip_udp)) + data_len;
+       if (buf_len < frame_length) {
+               return 0;
+       }
+
+       /* determine frame offsets */
+       eh_p = (ether_header_t *)buf;
+       ip_udp = (ip_udp_header_t *)(void *)(eh_p + 1);
+       udp_pseudo = (udp_pseudo_hdr_t *)(void *)
+           (((char *)&ip_udp->udp) - sizeof(*udp_pseudo));
+       payload = (char *)(eh_p + 1) + sizeof(*ip_udp);
+
+       /* ethernet_header */
+       bcopy(src, eh_p->ether_shost, ETHER_ADDR_LEN);
+       bcopy(dst, eh_p->ether_dhost, ETHER_ADDR_LEN);
+       eh_p->ether_type = htons(ETHERTYPE_IP);
+
+       /* copy the data */
+       bcopy(data, payload, data_len);
+
+       /* fill in UDP pseudo header (gets overwritten by IP header below) */
+       bcopy(&src_ip, &udp_pseudo->src_ip, sizeof(src_ip));
+       bcopy(&dst_ip, &udp_pseudo->dst_ip, sizeof(dst_ip));
+       udp_pseudo->zero = 0;
+       udp_pseudo->proto = IPPROTO_UDP;
+       udp_pseudo->length = htons(sizeof(ip_udp->udp) + data_len);
+
+       /* fill in UDP header */
+       ip_udp->udp.uh_sport = htons(src_port);
+       ip_udp->udp.uh_dport = htons(dst_port);
+       ip_udp->udp.uh_ulen = htons(sizeof(ip_udp->udp) + data_len);
+       ip_udp->udp.uh_sum = 0;
+       ip_udp->udp.uh_sum = in_cksum(udp_pseudo, (int)(sizeof(*udp_pseudo)
+           + sizeof(ip_udp->udp) + data_len));
+
+       /* fill in IP header */
+       bzero(ip_udp, sizeof(ip_udp->ip));
+       ip_udp->ip.ip_v = IPVERSION;
+       ip_udp->ip.ip_hl = sizeof(struct ip) >> 2;
+       ip_udp->ip.ip_ttl = MAXTTL;
+       ip_udp->ip.ip_p = IPPROTO_UDP;
+       bcopy(&src_ip, &ip_udp->ip.ip_src, sizeof(src_ip));
+       bcopy(&dst_ip, &ip_udp->ip.ip_dst, sizeof(dst_ip));
+       ip_udp->ip.ip_len = htons(sizeof(*ip_udp) + data_len);
+       ip_udp->ip.ip_id = htons(ip_id++);
+
+       /* compute the IP checksum */
+       ip_udp->ip.ip_sum = 0; /* needs to be zero for checksum */
+       ip_udp->ip.ip_sum = in_cksum(&ip_udp->ip, sizeof(ip_udp->ip));
+
+       return frame_length;
+}
+
+static u_int
+ethernet_udp6_frame_populate(void * buf, size_t buf_len,
+    const ether_addr_t * src,
+    struct in6_addr *src_ip,
+    uint16_t src_port,
+    const ether_addr_t * dst,
+    struct in6_addr * dst_ip,
+    uint16_t dst_port,
+    const void * data, u_int data_len)
+{
+       ether_header_t *        eh_p;
+       u_int                   frame_length;
+       ip6_udp_header_t *      ip6_udp;
+       char *                  payload;
+       udp6_pseudo_hdr_t *     udp6_pseudo;
+
+       frame_length = (u_int)(sizeof(*eh_p) + sizeof(*ip6_udp)) + data_len;
+       if (buf_len < frame_length) {
+               return 0;
+       }
+
+       /* determine frame offsets */
+       eh_p = (ether_header_t *)buf;
+       ip6_udp = (ip6_udp_header_t *)(void *)(eh_p + 1);
+       udp6_pseudo = (udp6_pseudo_hdr_t *)(void *)
+           (((char *)&ip6_udp->udp) - sizeof(*udp6_pseudo));
+       payload = (char *)(eh_p + 1) + sizeof(*ip6_udp);
+
+       /* ethernet_header */
+       bcopy(src, eh_p->ether_shost, ETHER_ADDR_LEN);
+       bcopy(dst, eh_p->ether_dhost, ETHER_ADDR_LEN);
+       eh_p->ether_type = htons(ETHERTYPE_IPV6);
+
+       /* copy the data */
+       bcopy(data, payload, data_len);
+
+       /* fill in UDP pseudo header (gets overwritten by IP header below) */
+       bcopy(src_ip, &udp6_pseudo->src_ip, sizeof(*src_ip));
+       bcopy(dst_ip, &udp6_pseudo->dst_ip, sizeof(*dst_ip));
+       udp6_pseudo->zero = 0;
+       udp6_pseudo->proto = IPPROTO_UDP;
+       udp6_pseudo->length = htons(sizeof(ip6_udp->udp) + data_len);
+
+       /* fill in UDP header */
+       ip6_udp->udp.uh_sport = htons(src_port);
+       ip6_udp->udp.uh_dport = htons(dst_port);
+       ip6_udp->udp.uh_ulen = htons(sizeof(ip6_udp->udp) + data_len);
+       ip6_udp->udp.uh_sum = 0;
+       ip6_udp->udp.uh_sum = in_cksum(udp6_pseudo, (int)(sizeof(*udp6_pseudo)
+           + sizeof(ip6_udp->udp) + data_len));
+
+       /* fill in IP header */
+       bzero(&ip6_udp->ip6, sizeof(ip6_udp->ip6));
+       ip6_udp->ip6.ip6_vfc = IPV6_VERSION;
+       ip6_udp->ip6.ip6_nxt = IPPROTO_UDP;
+       bcopy(src_ip, &ip6_udp->ip6.ip6_src, sizeof(*src_ip));
+       bcopy(dst_ip, &ip6_udp->ip6.ip6_dst, sizeof(*dst_ip));
+       ip6_udp->ip6.ip6_plen = htons(sizeof(struct udphdr) + data_len);
+       /* ip6_udp->ip6.ip6_flow = ? */
+       return frame_length;
+}
+
+static u_int
+ethernet_udp_frame_populate(void * buf, size_t buf_len,
+    uint8_t af,
+    const ether_addr_t * src,
+    union ifbrip * src_ip,
+    uint16_t src_port,
+    const ether_addr_t * dst,
+    union ifbrip * dst_ip,
+    uint16_t dst_port,
+    const void * data, u_int data_len)
+{
+       u_int   len;
+
+       switch (af) {
+       case AF_INET:
+               len = ethernet_udp4_frame_populate(buf, buf_len,
+                   src,
+                   src_ip->ifbrip_addr,
+                   src_port,
+                   dst,
+                   dst_ip->ifbrip_addr,
+                   dst_port,
+                   data, data_len);
+               break;
+       case AF_INET6:
+               len = ethernet_udp6_frame_populate(buf, buf_len,
+                   src,
+                   &src_ip->ifbrip_addr6,
+                   src_port,
+                   dst,
+                   &dst_ip->ifbrip_addr6,
+                   dst_port,
+                   data, data_len);
+               break;
+       default:
+               T_FAIL("unrecognized address family %u", af);
+               len = 0;
+               break;
+       }
+       return len;
+}
+
+static u_int
+ethernet_arp_frame_populate(void * buf, u_int buf_len,
+    uint16_t op,
+    const ether_addr_t * sender_hw,
+    struct in_addr sender_ip,
+    const ether_addr_t * target_hw,
+    struct in_addr target_ip)
+{
+       ether_header_t *        eh_p;
+       struct ether_arp *      earp;
+       struct arphdr *         arp_p;
+       u_int                   frame_length;
+
+       frame_length = sizeof(*earp) + sizeof(*eh_p);
+       T_QUIET;
+       T_ASSERT_GE(buf_len, frame_length,
+           "%s buffer size %u needed %u",
+           __func__, buf_len, frame_length);
+
+       /* ethernet_header */
+       eh_p = (ether_header_t *)buf;
+       bcopy(sender_hw, eh_p->ether_shost, ETHER_ADDR_LEN);
+       if (target_hw != NULL) {
+               bcopy(target_hw, eh_p->ether_dhost,
+                   sizeof(eh_p->ether_dhost));
+       } else {
+               bcopy(&ether_broadcast, eh_p->ether_dhost,
+                   sizeof(eh_p->ether_dhost));
+       }
+       eh_p->ether_type = htons(ETHERTYPE_ARP);
+
+       /* ARP payload */
+       earp = (struct ether_arp *)(void *)(eh_p + 1);
+       arp_p = &earp->ea_hdr;
+       arp_p->ar_hrd = htons(ARPHRD_ETHER);
+       arp_p->ar_pro = htons(ETHERTYPE_IP);
+       arp_p->ar_hln = sizeof(earp->arp_sha);
+       arp_p->ar_pln = sizeof(struct in_addr);
+       arp_p->ar_op = htons(op);
+       bcopy(sender_hw, earp->arp_sha, sizeof(earp->arp_sha));
+       bcopy(&sender_ip, earp->arp_spa, sizeof(earp->arp_spa));
+       if (target_hw != NULL) {
+               bcopy(target_hw, earp->arp_tha, sizeof(earp->arp_tha));
+       } else {
+               bzero(earp->arp_tha, sizeof(earp->arp_tha));
+       }
+       bcopy(&target_ip, earp->arp_tpa, sizeof(earp->arp_tpa));
+       return frame_length;
+}
+
+static uint32_t G_generation;
+
+static uint32_t
+next_generation(void)
+{
+       return G_generation++;
+}
+
+static const void *
+ethernet_frame_get_udp4_payload(void * buf, u_int buf_len,
+    u_int * ret_payload_length)
+{
+       ether_header_t *        eh_p;
+       uint16_t                ether_type;
+       ip_udp_header_t *       ip_udp;
+       u_int                   ip_len;
+       u_int                   left;
+       const void *            payload = NULL;
+       u_int                   payload_length = 0;
+       u_int                   udp_len;
+
+       T_QUIET;
+       T_ASSERT_GE(buf_len, (u_int)(sizeof(*eh_p) + sizeof(*ip_udp)), NULL);
+       left = buf_len;
+       eh_p = (ether_header_t *)buf;
+       ether_type = ntohs(eh_p->ether_type);
+       T_QUIET;
+       T_ASSERT_EQ((int)ether_type, ETHERTYPE_IP, NULL);
+       ip_udp = (ip_udp_header_t *)(void *)(eh_p + 1);
+       left -= sizeof(*eh_p);
+       ip_len = ntohs(ip_udp->ip.ip_len);
+       T_QUIET;
+       T_ASSERT_GE(left, ip_len, NULL);
+       T_QUIET;
+       T_ASSERT_EQ((int)ip_udp->ip.ip_v, IPVERSION, NULL);
+       T_QUIET;
+       T_ASSERT_EQ((u_int)ip_udp->ip.ip_hl << 2, (u_int)sizeof(struct ip),
+               NULL);
+       T_QUIET;
+       T_ASSERT_EQ((int)ip_udp->ip.ip_p, IPPROTO_UDP, NULL);
+       T_QUIET;
+       T_ASSERT_GE(buf_len, (u_int)sizeof(*ip_udp), NULL);
+       udp_len = ntohs(ip_udp->udp.uh_ulen);
+       T_QUIET;
+       T_ASSERT_GE(udp_len, (u_int)sizeof(ip_udp->udp), NULL);
+       payload_length = udp_len - (int)sizeof(ip_udp->udp);
+       if (payload_length > 0) {
+               payload = (ip_udp + 1);
+       }
+       if (payload == NULL) {
+               payload_length = 0;
+       }
+       *ret_payload_length = payload_length;
+       return payload;
+}
+
+static const void *
+ethernet_frame_get_udp6_payload(void * buf, u_int buf_len,
+    u_int * ret_payload_length)
+{
+       ether_header_t *        eh_p;
+       uint16_t                ether_type;
+       ip6_udp_header_t *      ip6_udp;
+       u_int                   ip6_len;
+       u_int                   left;
+       const void *            payload = NULL;
+       u_int                   payload_length = 0;
+       u_int                   udp_len;
+
+       T_QUIET;
+       T_ASSERT_GE(buf_len, (u_int)(sizeof(*eh_p) + sizeof(*ip6_udp)), NULL);
+       left = buf_len;
+       eh_p = (ether_header_t *)buf;
+       ether_type = ntohs(eh_p->ether_type);
+       T_QUIET;
+       T_ASSERT_EQ((int)ether_type, ETHERTYPE_IPV6, NULL);
+       ip6_udp = (ip6_udp_header_t *)(void *)(eh_p + 1);
+       left -= sizeof(*eh_p);
+       ip6_len = ntohs(ip6_udp->ip6.ip6_plen);
+       T_QUIET;
+       T_ASSERT_GE(left, ip6_len + (u_int)sizeof(struct ip6_hdr), NULL);
+       T_QUIET;
+       T_ASSERT_EQ((int)(ip6_udp->ip6.ip6_vfc & IPV6_VERSION_MASK),
+           IPV6_VERSION, NULL);
+       T_QUIET;
+       T_ASSERT_EQ((int)ip6_udp->ip6.ip6_nxt, IPPROTO_UDP, NULL);
+       T_QUIET;
+       T_ASSERT_GE(buf_len, (u_int)sizeof(*ip6_udp), NULL);
+       udp_len = ntohs(ip6_udp->udp.uh_ulen);
+       T_QUIET;
+       T_ASSERT_GE(udp_len, (u_int)sizeof(ip6_udp->udp), NULL);
+       payload_length = udp_len - (int)sizeof(ip6_udp->udp);
+       if (payload_length > 0) {
+               payload = (ip6_udp + 1);
+       }
+       if (payload == NULL) {
+               payload_length = 0;
+       }
+       *ret_payload_length = payload_length;
+       return payload;
+}
+
+static const void *
+ethernet_frame_get_udp_payload(uint8_t af, void * buf, u_int buf_len,
+    u_int * ret_payload_length)
+{
+       const void *    payload;
+
+       switch (af) {
+       case AF_INET:
+               payload = ethernet_frame_get_udp4_payload(buf, buf_len,
+                   ret_payload_length);
+               break;
+       case AF_INET6:
+               payload = ethernet_frame_get_udp6_payload(buf, buf_len,
+                   ret_payload_length);
+               break;
+       default:
+               T_FAIL("unrecognized address family %u", af);
+               payload = NULL;
+               break;
+       }
+       return payload;
+}
+
+#define MIN_ICMP6_LEN           ((u_int)(sizeof(ether_header_t) +       \
+                                        sizeof(struct ip6_hdr) +       \
+                                        sizeof(struct icmp6_hdr)))
+#define ALIGNED_ND_OPT_LEN      8
+#define SET_ND_OPT_LEN(a)       (u_int)((a) >> 3)
+#define GET_ND_OPT_LEN(a)       (u_int)((a) << 3)
+#define ALIGN_ND_OPT(a)         (u_int)roundup(a, ALIGNED_ND_OPT_LEN)
+#define LINKADDR_OPT_LEN        (ALIGN_ND_OPT(sizeof(struct nd_opt_hdr) + \
+                                             sizeof(ether_addr_t)))
+#define ETHER_IPV6_LEN  (sizeof(*eh_p) + sizeof(*ip6))
+
+
+
+static u_int
+ethernet_nd6_frame_populate(void * buf, u_int buf_len,
+    uint8_t type,
+    const ether_addr_t * sender_hw,
+    struct in6_addr * sender_ip,
+    const ether_addr_t * dest_ether,
+    const ether_addr_t * target_hw,
+    struct in6_addr * target_ip)
+{
+       u_int                           data_len = 0;
+       ether_header_t *                eh_p;
+       u_int                           frame_length;
+       struct icmp6_hdr *              icmp6;
+       struct ip6_hdr *                ip6;
+       struct nd_opt_hdr *             nd_opt;
+
+       switch (type) {
+       case ND_ROUTER_SOLICIT:
+       case ND_NEIGHBOR_ADVERT:
+       case ND_NEIGHBOR_SOLICIT:
+               break;
+       default:
+               T_FAIL("%s: unsupported type %u", __func__, type);
+               return 0;
+       }
+
+       T_QUIET;
+       T_ASSERT_GE(buf_len, MIN_ICMP6_LEN, NULL);
+
+       eh_p = (ether_header_t *)buf;
+       ip6 = (struct ip6_hdr *)(void *)(eh_p + 1);
+       icmp6 = (struct icmp6_hdr *)(void *)(ip6 + 1);
+       frame_length = sizeof(*eh_p) + sizeof(*ip6);
+       switch (type) {
+       case ND_NEIGHBOR_SOLICIT: {
+               struct nd_neighbor_solicit *    nd_ns;
+               bool                            sender_is_specified;
+
+               sender_is_specified = !IN6_IS_ADDR_UNSPECIFIED(sender_ip);
+               data_len = sizeof(*nd_ns);
+               if (sender_is_specified) {
+                       data_len += LINKADDR_OPT_LEN;
+               }
+               frame_length += data_len;
+               T_QUIET;
+               T_ASSERT_GE(buf_len, frame_length, NULL);
+               nd_ns = (struct nd_neighbor_solicit *)(void *)icmp6;
+               if (sender_is_specified) {
+                       /* add the source lladdr option */
+                       nd_opt = (struct nd_opt_hdr *)(nd_ns + 1);
+                       nd_opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR;
+                       nd_opt->nd_opt_len = SET_ND_OPT_LEN(LINKADDR_OPT_LEN);
+                       bcopy(sender_hw, (nd_opt + 1), sizeof(*sender_hw));
+               }
+               bcopy(target_ip, &nd_ns->nd_ns_target,
+                   sizeof(nd_ns->nd_ns_target));
+               break;
+       }
+       case ND_NEIGHBOR_ADVERT: {
+               struct nd_neighbor_advert *     nd_na;
+
+               data_len = sizeof(*nd_na) + LINKADDR_OPT_LEN;
+               frame_length += data_len;
+               T_QUIET;
+               T_ASSERT_GE(buf_len, frame_length, NULL);
+
+               nd_na = (struct nd_neighbor_advert *)(void *)icmp6;
+               bcopy(target_ip, &nd_na->nd_na_target,
+                   sizeof(nd_na->nd_na_target));
+               /* add the target lladdr option */
+               nd_opt = (struct nd_opt_hdr *)(nd_na + 1);
+               nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
+               nd_opt->nd_opt_len = SET_ND_OPT_LEN(LINKADDR_OPT_LEN);
+               bcopy(target_hw, (nd_opt + 1), sizeof(*target_hw));
+               break;
+       }
+       case ND_ROUTER_SOLICIT: {
+               struct nd_router_solicit *      nd_rs;
+
+               data_len = sizeof(*nd_rs) + LINKADDR_OPT_LEN;
+               frame_length += data_len;
+               T_QUIET;
+               T_ASSERT_GE(buf_len, frame_length, NULL);
+
+               nd_rs = (struct nd_router_solicit *)(void *)icmp6;
+
+               /* add the source lladdr option */
+               nd_opt = (struct nd_opt_hdr *)(nd_rs + 1);
+               nd_opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR;
+               nd_opt->nd_opt_len = SET_ND_OPT_LEN(LINKADDR_OPT_LEN);
+               bcopy(sender_hw, (nd_opt + 1), sizeof(*sender_hw));
+               break;
+       }
+       default:
+               T_FAIL("%s: unsupported type %u", __func__, type);
+               return 0;
+       }
+       /* icmp6 header */
+       icmp6->icmp6_type = type;
+       icmp6->icmp6_code = 0;
+       icmp6->icmp6_cksum = 0;
+       icmp6->icmp6_data32[0] = 0;
+
+       /* ethernet_header */
+       bcopy(sender_hw, eh_p->ether_shost, ETHER_ADDR_LEN);
+       if (dest_ether != NULL) {
+               bcopy(dest_ether, eh_p->ether_dhost,
+                   sizeof(eh_p->ether_dhost));
+       } else {
+               /* XXX ether_dhost should be multicast */
+               bcopy(&ether_broadcast, eh_p->ether_dhost,
+                   sizeof(eh_p->ether_dhost));
+       }
+       eh_p->ether_type = htons(ETHERTYPE_IPV6);
+
+       /* IPv6 header */
+       bzero(ip6, sizeof(*ip6));
+       ip6->ip6_nxt = IPPROTO_ICMPV6;
+       ip6->ip6_vfc = IPV6_VERSION;
+       bcopy(sender_ip, &ip6->ip6_src, sizeof(ip6->ip6_src));
+       /* XXX ip6_dst should be specific multicast */
+       bcopy(&in6addr_linklocal_allnodes, &ip6->ip6_dst, sizeof(ip6->ip6_dst));
+       ip6->ip6_plen = htons(data_len);
+
+       return frame_length;
+}
+
+/**
+** Switch port
+**/
+static void
+switch_port_check_tx(switch_port_t port)
+{
+       int             error;
+       struct kevent   kev;
+       int             kq;
+       struct timespec ts = { .tv_sec = 0, .tv_nsec = 1000 * 1000};
+
+       kq = kqueue();
+       T_QUIET;
+       T_ASSERT_POSIX_SUCCESS(kq, "kqueue check_tx");
+       EV_SET(&kev, port->fd, EVFILT_WRITE, EV_ADD | EV_ENABLE, 0, 0, NULL);
+       error = kevent(kq, &kev, 1, &kev, 1, &ts);
+       T_QUIET;
+       T_ASSERT_EQ(error, 1, "kevent");
+       T_QUIET;
+       T_ASSERT_EQ((int)kev.filter, EVFILT_WRITE, NULL);
+       T_QUIET;
+       T_ASSERT_EQ((int)kev.ident, port->fd, NULL);
+       T_QUIET;
+       T_ASSERT_NULL(kev.udata, NULL);
+       close(kq);
+       return;
+}
+
+static void
+switch_port_send_arp(switch_port_t port,
+    uint16_t op,
+    const ether_addr_t * sender_hw,
+    struct in_addr sender_ip,
+    const ether_addr_t * target_hw,
+    struct in_addr target_ip)
+{
+       u_int           frame_length;
+       ether_packet    pkt;
+       ssize_t         n;
+
+       /* make sure we can send */
+       switch_port_check_tx(port);
+       frame_length = ethernet_arp_frame_populate(&pkt, sizeof(pkt),
+           op,
+           sender_hw,
+           sender_ip,
+           target_hw,
+           target_ip);
+       T_QUIET;
+       T_ASSERT_GT(frame_length, 0, "%s: frame_length %u",
+           __func__, frame_length);
+       if (S_debug) {
+               T_LOG("Port %s -> %s transmitting %u bytes",
+                   port->ifname, port->member_ifname, frame_length);
+       }
+       ethernet_frame_validate(&pkt, frame_length, S_debug);
+       n = write(port->fd, &pkt, frame_length);
+       if (n < 0) {
+               T_ASSERT_POSIX_SUCCESS(n, "%s write fd %d failed %ld",
+                   port->ifname, port->fd, n);
+       }
+       T_QUIET;
+       T_ASSERT_EQ((u_int)n, frame_length,
+           "%s fd %d wrote %ld",
+           port->ifname, port->fd, n);
+}
+
+
+static void
+switch_port_send_nd6(switch_port_t port,
+    uint8_t type,
+    const ether_addr_t * sender_hw,
+    struct in6_addr * sender_ip,
+    const ether_addr_t * dest_ether,
+    const ether_addr_t * target_hw,
+    struct in6_addr * target_ip)
+{
+       u_int           frame_length;
+       ether_packet    pkt;
+       ssize_t         n;
+
+       /* make sure we can send */
+       switch_port_check_tx(port);
+       frame_length = ethernet_nd6_frame_populate(&pkt, sizeof(pkt),
+           type,
+           sender_hw,
+           sender_ip,
+           dest_ether,
+           target_hw,
+           target_ip);
+       T_QUIET;
+       T_ASSERT_GT(frame_length, 0, "%s: frame_length %u",
+           __func__, frame_length);
+       if (S_debug) {
+               T_LOG("Port %s -> %s transmitting %u bytes",
+                   port->ifname, port->member_ifname, frame_length);
+       }
+       ethernet_frame_validate(&pkt, frame_length, S_debug);
+       n = write(port->fd, &pkt, frame_length);
+       if (n < 0) {
+               T_ASSERT_POSIX_SUCCESS(n, "%s write fd %d failed %ld",
+                   port->ifname, port->fd, n);
+       }
+       T_QUIET;
+       T_ASSERT_EQ((u_int)n, frame_length,
+           "%s fd %d wrote %ld",
+           port->ifname, port->fd, n);
+}
+
+
+static void
+switch_port_send_udp(switch_port_t port,
+    uint8_t af,
+    const ether_addr_t * src_eaddr,
+    union ifbrip * src_ip,
+    uint16_t src_port,
+    const ether_addr_t * dst_eaddr,
+    union ifbrip * dst_ip,
+    uint16_t dst_port,
+    const void * payload, u_int payload_length)
+{
+       u_int                   frame_length;
+       ether_packet            pkt;
+       ssize_t                 n;
+
+       /* make sure we can send */
+       switch_port_check_tx(port);
+
+       /* generate the packet */
+       frame_length
+               = ethernet_udp_frame_populate((void *)&pkt,
+           (u_int)sizeof(pkt),
+           af,
+           src_eaddr,
+           src_ip,
+           src_port,
+           dst_eaddr,
+           dst_ip,
+           dst_port,
+           payload,
+           payload_length);
+       T_QUIET;
+       T_ASSERT_GT(frame_length, 0, NULL);
+       if (S_debug) {
+               T_LOG("Port %s transmitting %u bytes",
+                   port->ifname, frame_length);
+       }
+       ethernet_frame_validate(&pkt, frame_length, S_debug);
+       n = write(port->fd, &pkt, frame_length);
+       if (n < 0) {
+               T_ASSERT_POSIX_SUCCESS(n, "%s write fd %d failed %ld",
+                   port->ifname, port->fd, n);
+       }
+       T_QUIET;
+       T_ASSERT_EQ((u_int)n, frame_length,
+           "%s fd %d wrote %ld",
+           port->ifname, port->fd, n);
+}
+
+
+
+static void
+switch_port_send_udp_addr_index(switch_port_t port,
+    uint8_t af,
+    u_int addr_index,
+    const ether_addr_t * dst_eaddr,
+    union ifbrip * dst_ip,
+    const void * payload, u_int payload_length)
+{
+       ether_addr_t    eaddr;
+       union ifbrip    ip;
+
+       /* generate traffic for the unit and address */
+       set_ethernet_address(&eaddr, port->unit, addr_index);
+       get_ip_address(af, port->unit, addr_index, &ip);
+       switch_port_send_udp(port, af,
+           &eaddr, &ip, TEST_SOURCE_PORT,
+           dst_eaddr, dst_ip, TEST_DEST_PORT,
+           payload, payload_length);
+}
+
+typedef void
+(packet_validator)(switch_port_t port, const ether_header_t * eh_p,
+    u_int pkt_len, void * context);
+typedef packet_validator * packet_validator_t;
+
+static void
+switch_port_receive(switch_port_t port,
+    uint8_t af,
+    const void * payload, u_int payload_length,
+    packet_validator_t validator,
+    void * context)
+{
+       ether_header_t *        eh_p;
+       ssize_t                 n;
+       char *                  offset;
+
+       n = read(port->fd, port->rx_buf, (unsigned)port->rx_buf_size);
+       if (n < 0) {
+               if (errno == EAGAIN) {
+                       return;
+               }
+               T_QUIET;
+               T_ASSERT_POSIX_SUCCESS(n, "read %s port %d fd %d",
+                   port->ifname, port->unit, port->fd);
+               return;
+       }
+       for (offset = port->rx_buf; n > 0;) {
+               struct bpf_hdr *        bpf = (struct bpf_hdr *)(void *)offset;
+               u_int                   pkt_len;
+               char *                  pkt;
+               u_int                   skip;
+
+               pkt = offset + bpf->bh_hdrlen;
+               pkt_len = bpf->bh_caplen;
+
+               eh_p = (ether_header_t *)(void *)pkt;
+               T_QUIET;
+               T_ASSERT_GE(pkt_len, (u_int)sizeof(*eh_p),
+                   "short packet %ld", n);
+
+               /* source shouldn't be broadcast/multicast */
+               T_QUIET;
+               T_ASSERT_EQ(eh_p->ether_shost[0] & 0x01, 0,
+                   "broadcast/multicast source");
+
+               if (S_debug) {
+                       T_LOG("Port %s [unit %d] [fd %d] Received %u bytes",
+                           port->ifname, port->unit, port->fd, pkt_len);
+               }
+               ethernet_frame_validate(pkt, pkt_len, S_debug);
+
+               /* call the validation function */
+               (*validator)(port, eh_p, pkt_len, context);
+
+               if (payload != NULL) {
+                       const void *    p;
+                       u_int           p_len;
+
+                       p = ethernet_frame_get_udp_payload(af, pkt, pkt_len,
+                           &p_len);
+                       T_QUIET;
+                       T_ASSERT_NOTNULL(p, "ethernet_frame_get_udp_payload");
+                       T_QUIET;
+                       T_ASSERT_EQ(p_len, payload_length,
+                           "payload length %u < expected %u",
+                           p_len, payload_length);
+                       T_QUIET;
+                       T_ASSERT_EQ(bcmp(payload, p, payload_length), 0,
+                           "unexpected payload");
+               }
+               skip = BPF_WORDALIGN(pkt_len + bpf->bh_hdrlen);
+               if (skip == 0) {
+                       break;
+               }
+               offset += skip;
+               n -= skip;
+       }
+       return;
+}
+
+static void
+switch_port_log(switch_port_t port)
+{
+       T_LOG("%s [unit %d] [member %s]%s bpf fd %d bufsize %d\n",
+           port->ifname, port->unit,
+           port->member_ifname,
+           port->mac_nat ? " [mac-nat]" : "",
+           port->fd, port->rx_buf_size);
+}
+
+#define switch_port_list_size(port_count)               \
+       offsetof(switch_port_list, list[port_count])
+
+static switch_port_list_t
+switch_port_list_alloc(u_int port_count, bool mac_nat)
+{
+       switch_port_list_t      list;
+
+       list = (switch_port_list_t)
+           calloc(1, switch_port_list_size(port_count));;
+       list->size = port_count;
+       list->mac_nat = mac_nat;
+       return list;
+}
+
+static void
+switch_port_list_dealloc(switch_port_list_t list)
+{
+       u_int           i;
+       switch_port_t   port;
+
+       for (i = 0, port = list->list; i < list->count; i++, port++) {
+               close(port->fd);
+               free(port->rx_buf);
+       }
+       free(list);
+       return;
+}
+
+static errno_t
+switch_port_list_add_port(switch_port_list_t port_list, u_int unit,
+    const char * ifname, const char * member_ifname,
+    ether_addr_t * member_mac,
+    u_int num_addrs, bool mac_nat)
+{
+       int             buf_size;
+       errno_t         err = EINVAL;
+       int             fd = -1;
+       int             opt;
+       switch_port_t   p;
+
+       if (port_list->count >= port_list->size) {
+               T_LOG("Internal error: port_list count %u >= size %u\n",
+                   port_list->count, port_list->size);
+               goto failed;
+       }
+       fd = bpf_new();
+       if (fd < 0) {
+               err = errno;
+               T_LOG("bpf_new");
+               goto failed;
+       }
+       opt = 1;
+       T_QUIET;
+       T_ASSERT_POSIX_SUCCESS(ioctl(fd, FIONBIO, &opt), NULL);
+       T_QUIET;
+       T_ASSERT_POSIX_SUCCESS(bpf_set_immediate(fd, 1), NULL);
+       T_QUIET;
+       T_ASSERT_POSIX_SUCCESS(bpf_setif(fd, ifname), "bpf set if %s",
+           ifname);
+       T_QUIET;
+       T_ASSERT_POSIX_SUCCESS(bpf_set_see_sent(fd, 0), NULL);
+       T_QUIET;
+       T_ASSERT_POSIX_SUCCESS(bpf_set_header_complete(fd, 1), NULL);
+       T_QUIET;
+       T_ASSERT_POSIX_SUCCESS(bpf_get_blen(fd, &buf_size), NULL);
+       if (S_debug) {
+               T_LOG("%s [unit %d] [member %s] bpf fd %d bufsize %d\n",
+                   ifname, unit,
+                   member_ifname, fd, buf_size);
+       }
+       p = port_list->list + port_list->count++;
+       p->fd = fd;
+       p->unit = unit;
+       strlcpy(p->ifname, ifname, sizeof(p->ifname));
+       strlcpy(p->member_ifname, member_ifname, sizeof(p->member_ifname));
+       p->num_addrs = num_addrs;
+       p->rx_buf_size = buf_size;
+       p->rx_buf = malloc((unsigned)buf_size);
+       p->mac_nat = mac_nat;
+       p->member_mac = *member_mac;
+       return 0;
+
+failed:
+       if (fd >= 0) {
+               close(fd);
+       }
+       return err;
+}
+
+static switch_port_t
+switch_port_list_find_fd(switch_port_list_t ports, int fd)
+{
+       u_int           i;
+       switch_port_t   port;
+
+       for (i = 0, port = ports->list; i < ports->count; i++, port++) {
+               if (port->fd == fd) {
+                       return port;
+               }
+       }
+       return NULL;
+}
+
+static void
+switch_port_list_log(switch_port_list_t port_list)
+{
+       u_int           i;
+       switch_port_t   port;
+
+       for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+               switch_port_log(port);
+       }
+       return;
+}
+
+static switch_port_t
+switch_port_list_find_member(switch_port_list_t ports, const char * member_ifname)
+{
+       u_int           i;
+       switch_port_t   port;
+
+       for (i = 0, port = ports->list; i < ports->count; i++, port++) {
+               if (strcmp(port->member_ifname, member_ifname) == 0) {
+                       return port;
+               }
+       }
+       return NULL;
+}
+
+static void
+switch_port_list_check_receive(switch_port_list_t ports, uint8_t af,
+    const void * payload, u_int payload_length,
+    packet_validator_t validator,
+    void * context)
+{
+       int             i;
+       int             n_events;
+       struct kevent   kev[ports->count];
+       int             kq;
+       switch_port_t   port;
+       struct timespec ts = { .tv_sec = 0, .tv_nsec = 10 * 1000 * 1000};
+       u_int           u;
+
+       kq = kqueue();
+       T_QUIET;
+       T_ASSERT_POSIX_SUCCESS(kq, "kqueue check_receive");
+       for (u = 0, port = ports->list; u < ports->count; u++, port++) {
+               port->test_count = 0;
+               EV_SET(kev + u, port->fd,
+                   EVFILT_READ, EV_ADD | EV_ENABLE, 0, 0, NULL);
+       }
+
+       do {
+               n_events = kevent(kq, kev, (int)ports->count, kev,
+                   (int)ports->count, &ts);
+               T_QUIET;
+               T_ASSERT_POSIX_SUCCESS(n_events, "kevent receive %d", n_events);
+               for (i = 0; i < n_events; i++) {
+                       T_QUIET;
+                       T_ASSERT_EQ((int)kev[i].filter, EVFILT_READ, NULL);
+                       T_QUIET;
+                       T_ASSERT_NULL(kev[i].udata, NULL);
+                       port = switch_port_list_find_fd(ports,
+                           (int)kev[i].ident);
+                       T_QUIET;
+                       T_ASSERT_NE(port, NULL,
+                           "port %p fd %d", (void *)port,
+                           (int)kev[i].ident);
+                       switch_port_receive(port, af, payload, payload_length,
+                           validator, context);
+               }
+       } while (n_events != 0);
+       close(kq);
+}
+
+static bool
+switch_port_list_verify_rt_table(switch_port_list_t port_list, bool log)
+{
+       bool            all_present = true;
+       u_int           i;
+       u_int           count;
+       struct ifbareq *ifba;
+       struct ifbareq *rt_table;
+       switch_port_t   port;
+
+       /* clear out current notion of how many addresses are present */
+       for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+               port->test_address_count = 0;
+               port->test_address_present = 0;
+       }
+       rt_table = bridge_rt_table_copy(&count);
+       if (rt_table == NULL) {
+               return false;
+       }
+       if (log) {
+               bridge_rt_table_log(rt_table, count);
+       }
+       for (i = 0, ifba = rt_table; i < count; i++, ifba++) {
+               uint64_t        addr_bit;
+               u_int           addr_index;
+               u_int           unit_index;
+               u_char *        ea;
+               ether_addr_t *  eaddr;
+
+               eaddr = (ether_addr_t *)&ifba->ifba_dst;
+               ea = eaddr->octet;
+               addr_index = ea[EA_ADDR_INDEX];
+               unit_index = ea[EA_UNIT_INDEX];
+               port = switch_port_list_find_member(port_list,
+                   ifba->ifba_ifsname);
+               T_QUIET;
+               T_ASSERT_NOTNULL(port, "switch_port_list_find_member %s",
+                   ifba->ifba_ifsname);
+               if (!S_cleaning_up) {
+                       T_QUIET;
+                       T_ASSERT_EQ(unit_index, port->unit, NULL);
+                       addr_bit = 1 << addr_index;
+                       T_QUIET;
+                       T_ASSERT_BITS_NOTSET(port->test_address_present,
+                           addr_bit, "%s address %u",
+                           ifba->ifba_ifsname, addr_index);
+                       port->test_address_present |= addr_bit;
+                       port->test_address_count++;
+               }
+       }
+       for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+               if (S_debug) {
+                       T_LOG("%s unit %d [member %s] %u expect %u",
+                           port->ifname, port->unit, port->member_ifname,
+                           port->test_address_count, port->num_addrs);
+               }
+               if (port->test_address_count != port->num_addrs) {
+                       all_present = false;
+               }
+       }
+
+       free(rt_table);
+       return all_present;
+}
+
+static bool
+switch_port_list_verify_mac_nat(switch_port_list_t port_list, bool log)
+{
+       bool                    all_present = true;
+       u_int                   i;
+       u_int                   count;
+       static struct ifbrmne * entries;
+       switch_port_t           port;
+       struct ifbrmne *        scan;
+
+
+       /* clear out current notion of how many addresses are present */
+       for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+               port->test_address_count = 0;
+               port->test_address_present = 0;
+       }
+       entries = bridge_mac_nat_entries_copy(&count);
+       if (entries == NULL) {
+               return false;
+       }
+       if (log) {
+               bridge_mac_nat_entries_log(entries, count);
+       }
+       for (i = 0, scan = entries; i < count; i++, scan++) {
+               uint8_t         af;
+               uint64_t        addr_bit;
+               u_int           addr_index;
+               char            buf_ip1[INET6_ADDRSTRLEN];
+               char            buf_ip2[INET6_ADDRSTRLEN];
+               u_char *        ea;
+               ether_addr_t *  eaddr;
+               union ifbrip    ip;
+               u_int           unit_index;
+
+               eaddr = (ether_addr_t *)&scan->ifbmne_mac;
+               ea = eaddr->octet;
+               addr_index = ea[EA_ADDR_INDEX];
+               unit_index = ea[EA_UNIT_INDEX];
+               port = switch_port_list_find_member(port_list,
+                   scan->ifbmne_ifname);
+               T_QUIET;
+               T_ASSERT_NOTNULL(port,
+                   "switch_port_list_find_member %s",
+                   scan->ifbmne_ifname);
+               T_QUIET;
+               T_ASSERT_EQ(unit_index, port->unit, NULL);
+               af = scan->ifbmne_af;
+               get_ip_address(af, port->unit, addr_index, &ip);
+               addr_bit = 1 << addr_index;
+               T_QUIET;
+               T_ASSERT_TRUE(ip_addresses_are_equal(af, &ip, &scan->ifbmne_ip),
+                   "mac nat entry IP address %s expected %s",
+                   inet_ntop(af, &scan->ifbmne_ip_addr,
+                   buf_ip1, sizeof(buf_ip1)),
+                   inet_ntop(af, &ip,
+                   buf_ip2, sizeof(buf_ip2)));
+               T_QUIET;
+               T_ASSERT_BITS_NOTSET(port->test_address_present,
+                   addr_bit, "%s address %u",
+                   scan->ifbmne_ifname, addr_index);
+               port->test_address_present |= addr_bit;
+               port->test_address_count++;
+       }
+       for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+               if (port->mac_nat) {
+                       /* MAC-NAT interface should have no entries */
+                       T_QUIET;
+                       T_ASSERT_EQ(port->test_address_count, 0,
+                           "mac nat interface %s has %u entries",
+                           port->member_ifname,
+                           port->test_address_count);
+               } else {
+                       if (S_debug) {
+                               T_LOG("%s unit %d [member %s] %u expect %u",
+                                   port->ifname, port->unit,
+                                   port->member_ifname,
+                                   port->test_address_count, port->num_addrs);
+                       }
+                       if (port->test_address_count != port->num_addrs) {
+                               all_present = false;
+                       }
+               }
+       }
+
+       free(entries);
+
+       return all_present;
+}
+
+/**
+** Basic Bridge Tests
+**/
+static void
+send_generation(switch_port_t port, uint8_t af, u_int addr_index,
+    const ether_addr_t * dst_eaddr, union ifbrip * dst_ip,
+    uint32_t generation)
+{
+       uint32_t        payload;
+
+       payload = htonl(generation);
+       switch_port_send_udp_addr_index(port, af, addr_index, dst_eaddr, dst_ip,
+           &payload, sizeof(payload));
+}
+
+static void
+check_receive_generation(switch_port_list_t ports, uint8_t af,
+    uint32_t generation, packet_validator_t validator,
+    __unused void * context)
+{
+       uint32_t        payload;
+
+       payload = htonl(generation);
+       switch_port_list_check_receive(ports, af, &payload, sizeof(payload),
+           validator, context);
+}
+
+static void
+validate_source_ether_mismatch(switch_port_t port, const ether_header_t * eh_p)
+{
+       /* source shouldn't be our own MAC addresses */
+       T_QUIET;
+       T_ASSERT_NE(eh_p->ether_shost[EA_UNIT_INDEX], port->unit,
+           "ether source matches unit %d", port->unit);
+}
+
+static void
+validate_not_present_dhost(switch_port_t port, const ether_header_t * eh_p,
+    __unused u_int pkt_len,
+    __unused void * context)
+{
+       validate_source_ether_mismatch(port, eh_p);
+       T_QUIET;
+       T_ASSERT_EQ(bcmp(eh_p->ether_dhost, &ether_external,
+           sizeof(eh_p->ether_dhost)), 0,
+           "%s", __func__);
+       port->test_count++;
+}
+
+static void
+validate_broadcast_dhost(switch_port_t port, const ether_header_t * eh_p,
+    __unused u_int pkt_len,
+    __unused void * context)
+{
+       validate_source_ether_mismatch(port, eh_p);
+       T_QUIET;
+       T_ASSERT_NE((eh_p->ether_dhost[0] & 0x01), 0,
+           "%s", __func__);
+       port->test_count++;
+}
+
+static void
+validate_port_dhost(switch_port_t port, const ether_header_t * eh_p,
+    __unused u_int pkt_len,
+    __unused void * context)
+{
+       validate_source_ether_mismatch(port, eh_p);
+       T_QUIET;
+       T_ASSERT_EQ(eh_p->ether_dhost[EA_UNIT_INDEX], port->unit,
+           "wrong dhost unit %d != %d",
+           eh_p->ether_dhost[EA_UNIT_INDEX], port->unit);
+       port->test_count++;
+}
+
+
+static void
+check_received_count(switch_port_list_t port_list,
+    switch_port_t port, uint32_t expected_packets)
+{
+       u_int           i;
+       switch_port_t   scan;
+
+       for (i = 0, scan = port_list->list; i < port_list->count; i++, scan++) {
+               if (scan == port) {
+                       T_QUIET;
+                       T_ASSERT_EQ(port->test_count, 0,
+                           "unexpected receive on port %d",
+                           port->unit);
+               } else if (expected_packets == ALL_ADDRS) {
+                       T_QUIET;
+                       T_ASSERT_EQ(scan->test_count, scan->num_addrs,
+                           "didn't receive on all addrs");
+               } else {
+                       T_QUIET;
+                       T_ASSERT_EQ(scan->test_count, expected_packets,
+                           "wrong receive count on port %s", scan->member_ifname);
+               }
+       }
+}
+
+static void
+unicast_send_all(switch_port_list_t port_list, uint8_t af, switch_port_t port)
+{
+       u_int           i;
+       switch_port_t   scan;
+
+       for (i = 0, scan = port_list->list; i < port_list->count; i++, scan++) {
+               if (S_debug) {
+                       T_LOG("Unicast send on %s", port->ifname);
+               }
+               for (u_int j = 0; j < scan->num_addrs; j++) {
+                       ether_addr_t    eaddr;
+                       union ifbrip    ip;
+
+                       set_ethernet_address(&eaddr, scan->unit, j);
+                       get_ip_address(af, scan->unit, j, &ip);
+                       switch_port_send_udp_addr_index(port, af, 0, &eaddr, &ip,
+                           NULL, 0);
+               }
+       }
+}
+
+
+static void
+bridge_learning_test_once(switch_port_list_t port_list,
+    uint8_t af,
+    packet_validator_t validator,
+    void * context,
+    const ether_addr_t * dst_eaddr,
+    bool retry)
+{
+       u_int           i;
+       union ifbrip    dst_ip;
+       switch_port_t   port;
+
+       get_broadcast_ip_address(af, &dst_ip);
+       for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+               if (port->test_address_count == port->num_addrs) {
+                       /* already populated */
+                       continue;
+               }
+               if (S_debug) {
+                       T_LOG("Sending on %s", port->ifname);
+               }
+               for (u_int j = 0; j < port->num_addrs; j++) {
+                       uint32_t        generation;
+
+                       if (retry) {
+                               uint64_t        addr_bit;
+
+                               addr_bit = 1 << j;
+                               if ((port->test_address_present & addr_bit)
+                                   != 0) {
+                                       /* already present */
+                                       continue;
+                               }
+                               T_LOG("Retry port %s unit %u address %u",
+                                   port->ifname, port->unit, j);
+                       }
+                       generation = next_generation();
+                       send_generation(port,
+                           af,
+                           j,
+                           dst_eaddr,
+                           &dst_ip,
+                           generation);
+
+                       /* receive across all ports */
+                       check_receive_generation(port_list,
+                           af,
+                           generation,
+                           validator,
+                           context);
+
+                       /* ensure that every port saw the packet */
+                       check_received_count(port_list, port, 1);
+               }
+       }
+       return;
+}
+
+static inline const char *
+af_get_str(uint8_t af)
+{
+       return (af == AF_INET) ? "IPv4" : "IPv6";
+}
+
+static void
+bridge_learning_test(switch_port_list_t port_list,
+    uint8_t af,
+    packet_validator_t validator,
+    void * context,
+    const ether_addr_t * dst_eaddr)
+{
+       char            ntoabuf[ETHER_NTOA_BUFSIZE];
+       u_int           i;
+       switch_port_t   port;
+       bool            verified = false;
+
+       ether_ntoa_buf(dst_eaddr, ntoabuf, sizeof(ntoabuf));
+
+       /*
+        * Send a broadcast frame from every port in the list so that the bridge
+        * learns our MAC address.
+        */
+#define BROADCAST_MAX_TRIES             20
+       for (int try = 1; try < BROADCAST_MAX_TRIES; try++) {
+               bool    retry = (try > 1);
+
+               if (!retry) {
+                       T_LOG("%s: %s #ports %u #addrs %u dest %s",
+                           __func__,
+                           af_get_str(af),
+                           port_list->count, port_list->list->num_addrs,
+                           ntoabuf);
+               } else {
+                       T_LOG("%s: %s #ports %u #addrs %u dest %s (TRY=%d)",
+                           __func__,
+                           af_get_str(af),
+                           port_list->count, port_list->list->num_addrs,
+                           ntoabuf, try);
+               }
+               bridge_learning_test_once(port_list, af, validator, context,
+                   dst_eaddr, retry);
+               /*
+                * In the event of a memory allocation failure, it's possible
+                * that the address was not learned. Figure out whether
+                * all addresses are present, and if not, we'll retry on
+                * those that are not present.
+                */
+               verified = switch_port_list_verify_rt_table(port_list, false);
+               if (verified) {
+                       break;
+               }
+               /* wait a short time to allow the system to recover */
+               usleep(100 * 1000);
+       }
+       T_QUIET;
+       T_ASSERT_TRUE(verified, "All addresses present");
+
+       /*
+        * Since we just broadcast on every port in the switch, the bridge knows
+        * the port's MAC addresses. The bridge should not need to broadcast the
+        * packet to learn, which means the unicast traffic should only arrive
+        * on the intended port.
+        */
+       for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+               /* send unicast packets to every other port's MAC addresses */
+               unicast_send_all(port_list, af, port);
+
+               /* receive all of that generated traffic */
+               switch_port_list_check_receive(port_list, af, NULL, 0,
+                   validate_port_dhost, NULL);
+               /* check that we saw all of the unicast packets */
+               check_received_count(port_list, port, ALL_ADDRS);
+       }
+       T_PASS("%s", __func__);
+}
+
+/**
+** MAC-NAT tests
+**/
+static void
+mac_nat_check_received_count(switch_port_list_t port_list, switch_port_t port)
+{
+       u_int           i;
+       switch_port_t   scan;
+
+       for (i = 0, scan = port_list->list; i < port_list->count; i++, scan++) {
+               u_int   expected = 0;
+
+               if (scan == port) {
+                       expected = scan->num_addrs;
+               }
+               T_QUIET;
+               T_ASSERT_EQ(scan->test_count, expected,
+                   "%s [member %s]%s expected %u actual %u",
+                   scan->ifname, scan->member_ifname,
+                   scan->mac_nat ? " [mac-nat]" : "",
+                   expected, scan->test_count);
+       }
+}
+
+static void
+validate_mac_nat(switch_port_t port, const ether_header_t * eh_p,
+    __unused u_int pkt_len,
+    __unused void * context)
+{
+       if (port->mac_nat) {
+               bool    equal;
+
+               /* source must match MAC-NAT interface */
+               equal = (bcmp(eh_p->ether_shost, &port->member_mac,
+                   sizeof(port->member_mac)) == 0);
+               if (!equal) {
+                       ethernet_frame_validate(eh_p, pkt_len, true);
+               }
+               T_QUIET;
+               T_ASSERT_TRUE(equal, "source address match");
+               port->test_count++;
+       } else {
+               validate_not_present_dhost(port, eh_p, pkt_len, NULL);
+       }
+}
+
+static void
+validate_mac_nat_in(switch_port_t port, const ether_header_t * eh_p,
+    u_int pkt_len, __unused void * context)
+{
+       if (S_debug) {
+               T_LOG("%s received %u bytes", port->member_ifname, pkt_len);
+               ethernet_frame_validate(eh_p, pkt_len, true);
+       }
+       T_QUIET;
+       T_ASSERT_EQ(eh_p->ether_dhost[EA_UNIT_INDEX], port->unit,
+           "dhost unit %u expected %u",
+           eh_p->ether_dhost[EA_UNIT_INDEX], port->unit);
+       port->test_count++;
+}
+
+static void
+validate_mac_nat_arp_out(switch_port_t port, const ether_header_t * eh_p,
+    u_int pkt_len, void * context)
+{
+       const struct ether_arp *        earp;
+       switch_port_t                   send_port = (switch_port_t)context;
+
+       if (S_debug) {
+               T_LOG("%s received %u bytes", port->member_ifname, pkt_len);
+               ethernet_frame_validate(eh_p, pkt_len, true);
+       }
+       T_QUIET;
+       T_ASSERT_EQ((int)ntohs(eh_p->ether_type), (int)ETHERTYPE_ARP, NULL);
+       earp = (const struct ether_arp *)(const void *)(eh_p + 1);
+       T_QUIET;
+       T_ASSERT_GE(pkt_len, (u_int)(sizeof(*eh_p) + sizeof(*earp)), NULL);
+       if (port->mac_nat) {
+               bool            equal;
+
+               /* source ethernet must match MAC-NAT interface */
+               equal = (bcmp(eh_p->ether_shost, &port->member_mac,
+                   sizeof(port->member_mac)) == 0);
+               if (!equal) {
+                       ethernet_frame_validate(eh_p, pkt_len, true);
+               }
+               T_QUIET;
+               T_ASSERT_TRUE(equal, "%s -> %s source address translated",
+                   send_port->member_ifname,
+                   port->member_ifname);
+               /* sender hw must match MAC-NAT interface */
+               equal = (bcmp(earp->arp_sha, &port->member_mac,
+                   sizeof(port->member_mac)) == 0);
+               if (!equal) {
+                       ethernet_frame_validate(eh_p, pkt_len, true);
+               }
+               T_QUIET;
+               T_ASSERT_TRUE(equal, "%s -> %s sender hardware translated",
+                   send_port->member_ifname,
+                   port->member_ifname);
+       } else {
+               /* source ethernet must match the sender */
+               T_QUIET;
+               T_ASSERT_EQ(eh_p->ether_shost[EA_UNIT_INDEX], send_port->unit,
+                   "%s -> %s unit %u expected %u",
+                   send_port->member_ifname,
+                   port->member_ifname,
+                   eh_p->ether_shost[EA_UNIT_INDEX], send_port->unit);
+               /* source hw must match the sender */
+               T_QUIET;
+               T_ASSERT_EQ(earp->arp_sha[EA_UNIT_INDEX], send_port->unit,
+                   "%s -> %s unit %u expected %u",
+                   send_port->member_ifname,
+                   port->member_ifname,
+                   earp->arp_sha[EA_UNIT_INDEX], send_port->unit);
+       }
+       port->test_count++;
+}
+
+static void
+validate_mac_nat_arp_in(switch_port_t port, const ether_header_t * eh_p,
+    u_int pkt_len, void * context)
+{
+       const struct ether_arp *        earp;
+       switch_port_t                   send_port = (switch_port_t)context;
+
+       if (S_debug) {
+               T_LOG("%s received %u bytes", port->member_ifname, pkt_len);
+               ethernet_frame_validate(eh_p, pkt_len, true);
+       }
+       earp = (const struct ether_arp *)(const void *)(eh_p + 1);
+       T_QUIET;
+       T_ASSERT_EQ((int)ntohs(eh_p->ether_type), (int)ETHERTYPE_ARP, NULL);
+       T_QUIET;
+       T_ASSERT_GE(pkt_len, (u_int)(sizeof(*eh_p) + sizeof(*earp)), NULL);
+       T_QUIET;
+       T_ASSERT_FALSE(port->mac_nat, NULL);
+
+       /* destination ethernet must match the unit */
+       T_QUIET;
+       T_ASSERT_EQ(eh_p->ether_dhost[EA_UNIT_INDEX], port->unit,
+           "%s -> %s unit %u expected %u",
+           send_port->member_ifname,
+           port->member_ifname,
+           eh_p->ether_dhost[EA_UNIT_INDEX], port->unit);
+       /* source hw must match the sender */
+       T_QUIET;
+       T_ASSERT_EQ(earp->arp_tha[EA_UNIT_INDEX], port->unit,
+           "%s -> %s unit %u expected %u",
+           send_port->member_ifname,
+           port->member_ifname,
+           earp->arp_tha[EA_UNIT_INDEX], port->unit);
+       port->test_count++;
+}
+
+static void
+mac_nat_test_arp_out(switch_port_list_t port_list)
+{
+       u_int           i;
+       struct in_addr  ip_dst;
+       switch_port_t   port;
+
+       ip_dst = get_external_ipv4_address();
+       for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+               if (port->mac_nat) {
+                       continue;
+               }
+               for (u_int j = 0; j < port->num_addrs; j++) {
+                       ether_addr_t    eaddr;
+                       struct in_addr  ip_src;
+
+                       set_ethernet_address(&eaddr, port->unit, j);
+                       get_ipv4_address(port->unit, j, &ip_src);
+                       switch_port_send_arp(port,
+                           ARPOP_REQUEST,
+                           &eaddr,
+                           ip_src,
+                           NULL,
+                           ip_dst);
+                       switch_port_list_check_receive(port_list, AF_INET,
+                           NULL, 0,
+                           validate_mac_nat_arp_out,
+                           port);
+                       check_received_count(port_list, port, 1);
+               }
+       }
+       T_PASS("%s", __func__);
+}
+
+static void
+mac_nat_send_arp_response(switch_port_t ext_port, switch_port_t port)
+{
+       struct in_addr  ip_src;
+
+       T_QUIET;
+       T_ASSERT_TRUE(ext_port->mac_nat, "%s is MAC-NAT interface",
+           ext_port->member_ifname);
+       ip_src = get_external_ipv4_address();
+       for (u_int j = 0; j < port->num_addrs; j++) {
+               struct in_addr  ip_dst;
+
+               get_ipv4_address(port->unit, j, &ip_dst);
+               if (S_debug) {
+                       T_LOG("Generating ARP destined to %s %s",
+                           port->ifname, inet_ntoa(ip_dst));
+               }
+               switch_port_send_arp(ext_port,
+                   ARPOP_REPLY,
+                   &ether_external,
+                   ip_src,
+                   &ext_port->member_mac,
+                   ip_dst);
+       }
+}
+
+static void
+mac_nat_test_arp_in(switch_port_list_t port_list)
+{
+       u_int           i;
+       struct in_addr  ip_src;
+       switch_port_t   port;
+
+       ip_src = get_external_ipv4_address();
+       for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+               if (port->mac_nat) {
+                       continue;
+               }
+               mac_nat_send_arp_response(port_list->list, port);
+
+               /* receive the generated traffic */
+               switch_port_list_check_receive(port_list, AF_INET, NULL, 0,
+                   validate_mac_nat_arp_in,
+                   port_list->list);
+
+               /* verify that only the single port got the packet */
+               mac_nat_check_received_count(port_list, port);
+       }
+       T_PASS("%s", __func__);
+}
+
+static void
+validate_mac_nat_dhcp(switch_port_t port, const ether_header_t * eh_p,
+    u_int pkt_len, void * context)
+{
+       u_int                           dp_flags;
+       const struct bootp_packet *     pkt;
+       switch_port_t                   send_port = (switch_port_t)context;
+
+
+       T_QUIET;
+       T_ASSERT_GE(pkt_len, (u_int)sizeof(*pkt), NULL);
+       T_QUIET;
+       T_ASSERT_EQ((int)ntohs(eh_p->ether_type), (int)ETHERTYPE_IP, NULL);
+       pkt = (const struct bootp_packet *)(const void *)(eh_p + 1);
+
+       dp_flags = ntohs(pkt->bp_bootp.bp_unused);
+       if (port->mac_nat) {
+               bool            equal;
+
+               /* Broadcast bit must be set */
+               T_QUIET;
+               T_ASSERT_BITS_SET(dp_flags, (u_int)DHCP_FLAGS_BROADCAST,
+                   "%s -> %s: flags 0x%x must have 0x%x",
+                   send_port->member_ifname,
+                   port->member_ifname,
+                   dp_flags, DHCP_FLAGS_BROADCAST);
+
+               /* source must match MAC-NAT interface */
+               equal = (bcmp(eh_p->ether_shost, &port->member_mac,
+                   sizeof(port->member_mac)) == 0);
+               if (!equal) {
+                       ethernet_frame_validate(eh_p, pkt_len, true);
+               }
+               T_QUIET;
+               T_ASSERT_TRUE(equal, "%s -> %s source address translated",
+                   send_port->member_ifname,
+                   port->member_ifname);
+       } else {
+               /* Broadcast bit must not be set */
+               T_QUIET;
+               T_ASSERT_BITS_NOTSET(dp_flags, DHCP_FLAGS_BROADCAST,
+                   "%s -> %s flags 0x%x must not have 0x%x",
+                   send_port->member_ifname,
+                   port->member_ifname,
+                   dp_flags, DHCP_FLAGS_BROADCAST);
+               T_QUIET;
+               T_ASSERT_EQ(eh_p->ether_shost[EA_UNIT_INDEX], send_port->unit,
+                   "%s -> %s unit %u expected %u",
+                   send_port->member_ifname,
+                   port->member_ifname,
+                   eh_p->ether_shost[EA_UNIT_INDEX], send_port->unit);
+       }
+       port->test_count++;
+}
+
+static u_int
+make_dhcp_payload(dhcp_min_payload_t payload, ether_addr_t *eaddr)
+{
+       struct bootp *  dhcp;
+       u_int           payload_length;
+
+       /* create a minimal BOOTP packet */
+       payload_length = sizeof(*payload);
+       dhcp = (struct bootp *)payload;
+       bzero(dhcp, payload_length);
+       dhcp->bp_op = BOOTREQUEST;
+       dhcp->bp_htype = ARPHRD_ETHER;
+       dhcp->bp_hlen = sizeof(*eaddr);
+       bcopy(eaddr->octet, dhcp->bp_chaddr, sizeof(eaddr->octet));
+       return payload_length;
+}
+
+static void
+mac_nat_test_dhcp(switch_port_list_t port_list)
+{
+       u_int           i;
+       struct in_addr  ip_dst = { INADDR_BROADCAST };
+       struct in_addr  ip_src = { INADDR_ANY };
+       switch_port_t   port;
+
+       for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+               ether_addr_t            eaddr;
+               dhcp_min_payload        payload;
+               u_int                   payload_len;
+
+               if (port->mac_nat) {
+                       continue;
+               }
+               set_ethernet_address(&eaddr, port->unit, 0);
+               payload_len = make_dhcp_payload(&payload, &eaddr);
+               if (S_debug) {
+                       T_LOG("%s: transmit DHCP packet (member %s)",
+                           port->ifname, port->member_ifname);
+               }
+               switch_port_send_udp(port,
+                   AF_INET,
+                   &eaddr,
+                   (union ifbrip *)&ip_src,
+                   BOOTP_CLIENT_PORT,
+                   &ether_broadcast,
+                   (union ifbrip *)&ip_dst,
+                   BOOTP_SERVER_PORT,
+                   &payload,
+                   payload_len);
+
+               switch_port_list_check_receive(port_list, AF_INET, NULL, 0,
+                   validate_mac_nat_dhcp,
+                   port);
+
+               check_received_count(port_list, port, 1);
+       }
+       T_PASS("%s", __func__);
+}
+
+
+static void
+validate_mac_nat_nd6(switch_port_t port,
+    const struct icmp6_hdr * icmp6,
+    u_int icmp6_len,
+    uint8_t opt_type,
+    u_int nd_hdr_size,
+    switch_port_t send_port)
+{
+       const uint8_t *                 linkaddr;
+       const uint8_t *                 ptr;
+       const struct nd_opt_hdr *       nd_opt;
+       u_int                           nd_size;
+
+       ptr = (const uint8_t *)icmp6;
+       nd_size = nd_hdr_size + LINKADDR_OPT_LEN;
+       if (icmp6_len < nd_size) {
+               /* no LINKADDR option */
+               return;
+       }
+       nd_opt = (const struct nd_opt_hdr *)(const void *)(ptr + nd_hdr_size);
+       T_QUIET;
+       T_ASSERT_EQ(nd_opt->nd_opt_type, opt_type, NULL);
+       T_QUIET;
+       T_ASSERT_EQ(GET_ND_OPT_LEN(nd_opt->nd_opt_len), LINKADDR_OPT_LEN, NULL);
+       linkaddr = (const uint8_t *)(nd_opt + 1);
+       if (port->mac_nat) {
+               bool    equal;
+
+               equal = (bcmp(linkaddr, &port->member_mac,
+                   sizeof(port->member_mac)) == 0);
+               T_QUIET;
+               T_ASSERT_TRUE(equal, "%s -> %s sender hardware translated",
+                   send_port->member_ifname,
+                   port->member_ifname);
+       } else {
+               /* source hw must match the sender */
+               T_QUIET;
+               T_ASSERT_EQ(linkaddr[EA_UNIT_INDEX], send_port->unit,
+                   "%s -> %s unit %u expected %u",
+                   send_port->member_ifname,
+                   port->member_ifname,
+                   linkaddr[EA_UNIT_INDEX], send_port->unit);
+       }
+}
+
+static void
+validate_mac_nat_icmp6_out(switch_port_t port, const struct icmp6_hdr * icmp6,
+    u_int icmp6_len, switch_port_t send_port)
+{
+       switch (icmp6->icmp6_type) {
+       case ND_NEIGHBOR_ADVERT:
+               validate_mac_nat_nd6(port, icmp6, icmp6_len,
+                   ND_OPT_TARGET_LINKADDR,
+                   sizeof(struct nd_neighbor_advert),
+                   send_port);
+               break;
+       case ND_NEIGHBOR_SOLICIT:
+               validate_mac_nat_nd6(port, icmp6, icmp6_len,
+                   ND_OPT_SOURCE_LINKADDR,
+                   sizeof(struct nd_neighbor_solicit),
+                   send_port);
+               break;
+       case ND_ROUTER_SOLICIT:
+               validate_mac_nat_nd6(port, icmp6, icmp6_len,
+                   ND_OPT_SOURCE_LINKADDR,
+                   sizeof(struct nd_router_solicit),
+                   send_port);
+               break;
+       default:
+               T_FAIL("Unsupported icmp6 type %d", icmp6->icmp6_type);
+               break;
+       }
+}
+
+static void
+validate_mac_nat_nd6_out(switch_port_t port, const ether_header_t * eh_p,
+    u_int pkt_len, void * context)
+{
+       const struct icmp6_hdr *        icmp6;
+       const struct ip6_hdr *          ip6;
+       switch_port_t                   send_port = (switch_port_t)context;
+
+       if (S_debug) {
+               T_LOG("%s received %u bytes", port->member_ifname, pkt_len);
+               ethernet_frame_validate(eh_p, pkt_len, true);
+       }
+       T_QUIET;
+       T_ASSERT_EQ(ntohs(eh_p->ether_type), (u_short)ETHERTYPE_IPV6, NULL);
+       ip6 = (const struct ip6_hdr *)(const void *)(eh_p + 1);
+       icmp6 = (const struct icmp6_hdr *)(const void *)(ip6 + 1);
+       T_QUIET;
+       T_ASSERT_GE(pkt_len, (u_int)MIN_ICMP6_LEN, NULL);
+       T_QUIET;
+       T_ASSERT_EQ(ip6->ip6_nxt, IPPROTO_ICMPV6, NULL);
+
+       /* validate the ethernet header */
+       if (port->mac_nat) {
+               bool            equal;
+
+               /* source ethernet must match MAC-NAT interface */
+               equal = (bcmp(eh_p->ether_shost, &port->member_mac,
+                   sizeof(port->member_mac)) == 0);
+               if (!equal) {
+                       ethernet_frame_validate(eh_p, pkt_len, true);
+               }
+               T_QUIET;
+               T_ASSERT_TRUE(equal, "%s -> %s source address translated",
+                   send_port->member_ifname,
+                   port->member_ifname);
+       } else {
+               /* source ethernet must match the sender */
+               T_QUIET;
+               T_ASSERT_EQ(eh_p->ether_shost[EA_UNIT_INDEX], send_port->unit,
+                   "%s -> %s unit %u expected %u",
+                   send_port->member_ifname,
+                   port->member_ifname,
+                   eh_p->ether_shost[EA_UNIT_INDEX], send_port->unit);
+       }
+       /* validate the icmp6 payload */
+       validate_mac_nat_icmp6_out(port, icmp6,
+           pkt_len - ETHER_IPV6_LEN,
+           send_port);
+       port->test_count++;
+}
+
+static void
+mac_nat_test_nd6_out(switch_port_list_t port_list)
+{
+       ether_addr_t *  ext_mac;
+       switch_port_t   ext_port;
+       u_int           i;
+       union ifbrip    ip_dst;
+       switch_port_t   port;
+
+       get_external_ip_address(AF_INET6, &ip_dst);
+       ext_port = port_list->list;
+       T_QUIET;
+       T_ASSERT_TRUE(ext_port->mac_nat, NULL);
+       ext_mac = &ext_port->member_mac;
+       for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+               if (port->mac_nat) {
+                       continue;
+               }
+               /* neighbor solicit */
+               for (u_int j = 0; j < port->num_addrs; j++) {
+                       ether_addr_t    eaddr;
+                       union ifbrip    ip_src;
+
+                       set_ethernet_address(&eaddr, port->unit, j);
+                       get_ip_address(AF_INET6, port->unit, j, &ip_src);
+                       switch_port_send_nd6(port,
+                           ND_NEIGHBOR_SOLICIT,
+                           &eaddr,
+                           &ip_src.ifbrip_addr6,
+                           NULL,
+                           NULL,
+                           &ip_dst.ifbrip_addr6);
+                       switch_port_list_check_receive(port_list, AF_INET,
+                           NULL, 0,
+                           validate_mac_nat_nd6_out,
+                           port);
+                       check_received_count(port_list, port, 1);
+               }
+               /* neighbor advert */
+               for (u_int j = 0; j < port->num_addrs; j++) {
+                       ether_addr_t    eaddr;
+                       union ifbrip    ip_src;
+
+                       set_ethernet_address(&eaddr, port->unit, j);
+                       get_ip_address(AF_INET6, port->unit, j, &ip_src);
+                       switch_port_send_nd6(port,
+                           ND_NEIGHBOR_ADVERT,
+                           &eaddr,
+                           &ip_src.ifbrip_addr6,
+                           NULL,
+                           &eaddr,
+                           &ip_src.ifbrip_addr6);
+                       switch_port_list_check_receive(port_list, AF_INET,
+                           NULL, 0,
+                           validate_mac_nat_nd6_out,
+                           port);
+                       check_received_count(port_list, port, 1);
+               }
+               /* router solicit */
+               for (u_int j = 0; j < port->num_addrs; j++) {
+                       ether_addr_t    eaddr;
+                       union ifbrip    ip_src;
+
+                       set_ethernet_address(&eaddr, port->unit, j);
+                       get_ip_address(AF_INET6, port->unit, j, &ip_src);
+                       //get_ipv6ll_address(port->unit, j, &ip_src.ifbrip_addr6);
+                       switch_port_send_nd6(port,
+                           ND_ROUTER_SOLICIT,
+                           &eaddr,
+                           &ip_src.ifbrip_addr6,
+                           NULL,
+                           NULL,
+                           NULL);
+                       switch_port_list_check_receive(port_list, AF_INET,
+                           NULL, 0,
+                           validate_mac_nat_nd6_out,
+                           port);
+                       check_received_count(port_list, port, 1);
+               }
+       }
+       T_PASS("%s", __func__);
+}
+
+static void
+mac_nat_send_response(switch_port_t ext_port, uint8_t af, switch_port_t port)
+{
+       union ifbrip    src_ip;
+
+       T_QUIET;
+       T_ASSERT_TRUE(ext_port->mac_nat, "%s is MAC-NAT interface",
+           ext_port->member_ifname);
+       if (S_debug) {
+               T_LOG("Generating UDP traffic destined to %s", port->ifname);
+       }
+       get_external_ip_address(af, &src_ip);
+       for (u_int j = 0; j < port->num_addrs; j++) {
+               union ifbrip    ip;
+
+               get_ip_address(af, port->unit, j, &ip);
+               switch_port_send_udp(ext_port,
+                   af,
+                   &ether_external,
+                   &src_ip,
+                   TEST_DEST_PORT,
+                   &ext_port->member_mac,
+                   &ip,
+                   TEST_SOURCE_PORT,
+                   NULL, 0);
+       }
+}
+
+
+static void
+mac_nat_test_ip_once(switch_port_list_t port_list, uint8_t af, bool retry)
+{
+       union ifbrip    dst_ip;
+       u_int           i;
+       switch_port_t   port;
+
+       get_external_ip_address(af, &dst_ip);
+       for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+               if (port->test_address_count == port->num_addrs) {
+                       /* already populated */
+                       continue;
+               }
+               if (S_debug) {
+                       T_LOG("Sending on %s", port->ifname);
+               }
+               for (u_int j = 0; j < port->num_addrs; j++) {
+                       uint32_t        generation;
+
+                       if (retry) {
+                               uint64_t        addr_bit;
+
+                               addr_bit = 1 << j;
+                               if ((port->test_address_present & addr_bit)
+                                   != 0) {
+                                       /* already present */
+                                       continue;
+                               }
+                               T_LOG("Retry port %s unit %u address %u",
+                                   port->ifname, port->unit, j);
+                       }
+
+                       generation = next_generation();
+                       send_generation(port,
+                           af,
+                           j,
+                           &ether_external,
+                           &dst_ip,
+                           generation);
+
+                       /* receive across all ports */
+                       check_receive_generation(port_list,
+                           af,
+                           generation,
+                           validate_mac_nat,
+                           NULL);
+
+                       /* ensure that every port saw the packet */
+                       check_received_count(port_list, port, 1);
+               }
+       }
+       return;
+}
+
+static void
+mac_nat_test_ip(switch_port_list_t port_list, uint8_t af)
+{
+       u_int           i;
+       switch_port_t   port;
+       bool            verified = false;
+
+       /*
+        * Send a packet from every port in the list so that the bridge
+        * learns the MAC addresses and IP addresses.
+        */
+#define MAC_NAT_MAX_TRIES               20
+       for (int try = 1; try < BROADCAST_MAX_TRIES; try++) {
+               bool    retry = (try > 1);
+
+               if (!retry) {
+                       T_LOG("%s: #ports %u #addrs %u",
+                           __func__,
+                           port_list->count, port_list->list->num_addrs);
+               } else {
+                       T_LOG("%s: #ports %u #addrs %u destination (TRY=%d)",
+                           __func__,
+                           port_list->count, port_list->list->num_addrs,
+                           try);
+               }
+               mac_nat_test_ip_once(port_list, af, retry);
+               /*
+                * In the event of a memory allocation failure, it's possible
+                * that the address was not learned. Figure out whether
+                * all addresses are present, and if not, we'll retry on
+                * those that are not present.
+                */
+               verified = switch_port_list_verify_mac_nat(port_list, false);
+               if (verified) {
+                       break;
+               }
+               /* wait a short time to allow the system to recover */
+               usleep(100 * 1000);
+       }
+       T_QUIET;
+       T_ASSERT_TRUE(verified, "All addresses present");
+
+       /*
+        * The bridge now has an IP address <-> MAC address binding for every
+        * address on each internal interface.
+        *
+        * Generate an inbound packet on the MAC-NAT interface targeting
+        * each interface address. Verify that the packet appears on
+        * the appropriate internal address with appropriate translation.
+        */
+       for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+               if (port->mac_nat) {
+                       continue;
+               }
+               mac_nat_send_response(port_list->list, af, port);
+
+               /* receive the generated traffic */
+               switch_port_list_check_receive(port_list, AF_INET, NULL, 0,
+                   validate_mac_nat_in,
+                   NULL);
+
+               /* verify that only the single port got the packet */
+               mac_nat_check_received_count(port_list, port);
+       }
+       T_PASS("%s", __func__);
+}
+
+/**
+** interface management
+**/
+
+static int
+ifnet_get_lladdr(int s, const char * ifname, ether_addr_t * eaddr)
+{
+       int err;
+       struct ifreq ifr;
+
+       bzero(&ifr, sizeof(ifr));
+       strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+       ifr.ifr_addr.sa_family = AF_LINK;
+       ifr.ifr_addr.sa_len = ETHER_ADDR_LEN;
+       err = ioctl(s, SIOCGIFLLADDR, &ifr);
+       T_QUIET;
+       T_ASSERT_POSIX_SUCCESS(err, "SIOCGIFLLADDR %s", ifname);
+       bcopy(ifr.ifr_addr.sa_data, eaddr->octet, ETHER_ADDR_LEN);
+       return err;
+}
+
+
+static int
+ifnet_attach_ip(int s, char * name)
+{
+       int                     err;
+       struct ifreq    ifr;
+
+       bzero(&ifr, sizeof(ifr));
+       strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
+       err = ioctl(s, SIOCPROTOATTACH, &ifr);
+       T_QUIET;
+       T_ASSERT_POSIX_SUCCESS(err, "SIOCPROTOATTACH %s", ifr.ifr_name);
+       return err;
+}
+
+#if 0
+static int
+ifnet_detach_ip(int s, char * name)
+{
+       int                     err;
+       struct ifreq    ifr;
+
+       bzero(&ifr, sizeof(ifr));
+       strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
+       err = ioctl(s, SIOCPROTODETACH, &ifr);
+       T_QUIET;
+       T_ASSERT_POSIX_SUCCESS(err, "SIOCPROTODETACH %s", ifr.ifr_name);
+       return err;
+}
+#endif
+
+static int
+ifnet_destroy(int s, const char * ifname, bool fail_on_error)
+{
+       int             err;
+       struct ifreq    ifr;
+
+       bzero(&ifr, sizeof(ifr));
+       strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+       err = ioctl(s, SIOCIFDESTROY, &ifr);
+       if (fail_on_error) {
+               T_QUIET;
+               T_ASSERT_POSIX_SUCCESS(err, "SIOCSIFDESTROY %s", ifr.ifr_name);
+       }
+       if (err < 0) {
+               T_LOG("SIOCSIFDESTROY %s", ifr.ifr_name);
+       }
+       return err;
+}
+
+static int
+ifnet_set_flags(int s, const char * ifname,
+    uint16_t flags_set, uint16_t flags_clear)
+{
+       uint16_t        flags_after;
+       uint16_t        flags_before;
+       struct ifreq    ifr;
+       int             ret;
+
+       bzero(&ifr, sizeof(ifr));
+       strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+       ret = ioctl(s, SIOCGIFFLAGS, (caddr_t)&ifr);
+       if (ret != 0) {
+               T_LOG("SIOCGIFFLAGS %s", ifr.ifr_name);
+               return ret;
+       }
+       flags_before = (uint16_t)ifr.ifr_flags;
+       ifr.ifr_flags |= flags_set;
+       ifr.ifr_flags &= ~(flags_clear);
+       flags_after = (uint16_t)ifr.ifr_flags;
+       if (flags_before == flags_after) {
+               /* nothing to do */
+               ret = 0;
+       } else {
+               /* issue the ioctl */
+               T_QUIET;
+               T_ASSERT_POSIX_SUCCESS(ioctl(s, SIOCSIFFLAGS, &ifr),
+                   "SIOCSIFFLAGS %s 0x%x",
+                   ifr.ifr_name, (uint16_t)ifr.ifr_flags);
+               if (S_debug) {
+                       T_LOG("setflags(%s set 0x%x clear 0x%x) 0x%x => 0x%x",
+                           ifr.ifr_name, flags_set, flags_clear,
+                           flags_before, flags_after);
+               }
+       }
+       return ret;
+}
+
+#define BRIDGE_NAME     "bridge"
+#define BRIDGE200       BRIDGE_NAME "200"
+
+#define FETH_NAME       "feth"
+
+/* On some platforms with DEBUG kernel, we need to wait a while */
+#define SIFCREATE_RETRY 600
+
+static int
+ifnet_create(int s, const char * ifname)
+{
+       int             error = 0;
+       struct ifreq    ifr;
+
+       bzero(&ifr, sizeof(ifr));
+       strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+
+       for (int i = 0; i < SIFCREATE_RETRY; i++) {
+               if (ioctl(s, SIOCIFCREATE, &ifr) < 0) {
+                       error = errno;
+                       T_LOG("SIOCSIFCREATE %s: %s", ifname,
+                           strerror(error));
+                       if (error == EBUSY) {
+                               /* interface is tearing down, try again */
+                               usleep(10000);
+                       } else if (error == EEXIST) {
+                               /* interface exists, try destroying it */
+                               (void)ifnet_destroy(s, ifname, false);
+                       } else {
+                               /* unexpected failure */
+                               break;
+                       }
+               } else {
+                       error = 0;
+                       break;
+               }
+       }
+       if (error == 0) {
+               error = ifnet_set_flags(s, ifname, IFF_UP, 0);
+       }
+       return error;
+}
+
+static int
+siocdrvspec(int s, const char * ifname,
+    u_long op, void *arg, size_t argsize, bool set)
+{
+       struct ifdrv    ifd;
+
+       memset(&ifd, 0, sizeof(ifd));
+       strlcpy(ifd.ifd_name, ifname, sizeof(ifd.ifd_name));
+       ifd.ifd_cmd = op;
+       ifd.ifd_len = argsize;
+       ifd.ifd_data = arg;
+       return ioctl(s, set ? SIOCSDRVSPEC : SIOCGDRVSPEC, &ifd);
+}
+
+
+static int
+fake_set_peer(int s, const char * feth, const char * feth_peer)
+{
+       struct if_fake_request  iffr;
+       int                     ret;
+
+       bzero((char *)&iffr, sizeof(iffr));
+       if (feth_peer != NULL) {
+               strlcpy(iffr.iffr_peer_name, feth_peer,
+                   sizeof(iffr.iffr_peer_name));
+       }
+       ret = siocdrvspec(s, feth, IF_FAKE_S_CMD_SET_PEER,
+           &iffr, sizeof(iffr), true);
+       T_QUIET;
+       T_ASSERT_POSIX_SUCCESS(ret,
+           "SIOCDRVSPEC(%s, IF_FAKE_S_CMD_SET_PEER, %s)",
+           feth, (feth_peer != NULL) ? feth_peer : "<none>");
+       return ret;
+}
+
+static int
+bridge_add_member(int s, const char * bridge, const char * member)
+{
+       struct ifbreq           req;
+       int                     ret;
+
+       memset(&req, 0, sizeof(req));
+       strlcpy(req.ifbr_ifsname, member, sizeof(req.ifbr_ifsname));
+       ret = siocdrvspec(s, bridge, BRDGADD, &req, sizeof(req), true);
+       T_QUIET;
+       T_ASSERT_POSIX_SUCCESS(ret, "%s %s %s", __func__, bridge, member);
+       return ret;
+}
+
+
+static int
+bridge_set_mac_nat(int s, const char * bridge, const char * member, bool enable)
+{
+       uint32_t        flags;
+       bool            need_set = false;
+       struct ifbreq   req;
+       int             ret;
+
+       memset(&req, 0, sizeof(req));
+       strlcpy(req.ifbr_ifsname, member, sizeof(req.ifbr_ifsname));
+       ret = siocdrvspec(s, bridge, BRDGGIFFLGS, &req, sizeof(req), false);
+       T_QUIET;
+       T_ASSERT_POSIX_SUCCESS(ret, "BRDGGIFFLGS %s %s", bridge, member);
+       flags = req.ifbr_ifsflags;
+       if (enable) {
+               if ((flags & IFBIF_MAC_NAT) == 0) {
+                       need_set = true;
+                       req.ifbr_ifsflags |= IFBIF_MAC_NAT;
+               }
+               /* need to set it */
+       } else if ((flags & IFBIF_MAC_NAT) != 0) {
+               /* need to clear it */
+               need_set = true;
+               req.ifbr_ifsflags &= ~(uint32_t)IFBIF_MAC_NAT;
+       }
+       if (need_set) {
+               ret = siocdrvspec(s, bridge, BRDGSIFFLGS,
+                   &req, sizeof(req), true);
+               T_QUIET;
+               T_ASSERT_POSIX_SUCCESS(ret, "BRDGSIFFLGS %s %s 0x%x => 0x%x",
+                   bridge, member,
+                   flags, req.ifbr_ifsflags);
+       }
+       return ret;
+}
+
+static struct ifbareq *
+bridge_rt_table_copy_common(const char * bridge, u_int * ret_count)
+{
+       struct ifbaconf         ifbac;
+       u_int                   len = 8 * 1024;
+       char *                  inbuf = NULL;
+       char *                  ninbuf;
+       int                     ret;
+       struct ifbareq *        rt_table = NULL;
+       int                     s;
+
+       s = inet_dgram_socket();
+
+       /*
+        * BRDGRTS should work like other ioctl's where passing in NULL
+        * for the buffer says "tell me how many there are". Unfortunately,
+        * it doesn't so we have to pass in a buffer, then check that it
+        * was too big.
+        */
+       for (;;) {
+               ninbuf = realloc(inbuf, len);
+               T_QUIET;
+               T_ASSERT_NOTNULL((void *)ninbuf, "realloc %u", len);
+               ifbac.ifbac_len = len;
+               ifbac.ifbac_buf = inbuf = ninbuf;
+               ret = siocdrvspec(s, bridge, BRDGRTS,
+                   &ifbac, sizeof(ifbac), false);
+               T_QUIET;
+               T_ASSERT_POSIX_SUCCESS(ret, "%s %s", __func__, bridge);
+               if ((ifbac.ifbac_len + sizeof(*rt_table)) < len) {
+                       /* we passed a buffer larger than what was required */
+                       break;
+               }
+               len *= 2;
+       }
+       if (ifbac.ifbac_len == 0) {
+               free(ninbuf);
+               T_LOG("No bridge routing entries");
+               goto done;
+       }
+       *ret_count = ifbac.ifbac_len / sizeof(*rt_table);
+       rt_table = (struct ifbareq *)(void *)ninbuf;
+done:
+       if (rt_table == NULL) {
+               *ret_count = 0;
+       }
+       if (s >= 0) {
+               close(s);
+       }
+       return rt_table;
+}
+
+static struct ifbareq *
+bridge_rt_table_copy(u_int * ret_count)
+{
+       return bridge_rt_table_copy_common(BRIDGE200, ret_count);
+}
+
+static void
+bridge_rt_table_log(struct ifbareq *rt_table, u_int count)
+{
+       u_int                   i;
+       char                    ntoabuf[ETHER_NTOA_BUFSIZE];
+       struct ifbareq *        ifba;
+
+       for (i = 0, ifba = rt_table; i < count; i++, ifba++) {
+               ether_ntoa_buf((const ether_addr_t *)&ifba->ifba_dst,
+                   ntoabuf, sizeof(ntoabuf));
+               T_LOG("%s %s %lu", ifba->ifba_ifsname, ntoabuf,
+                   ifba->ifba_expire);
+       }
+       return;
+}
+
+static struct ifbrmne *
+bridge_mac_nat_entries_copy_common(const char * bridge, u_int * ret_count)
+{
+       char *                  buf = NULL;
+       u_int                   count = 0;
+       int                     err;
+       u_int                   i;
+       struct ifbrmnelist      mnl;
+       struct ifbrmne *        ret_list = NULL;
+       int                     s;
+       char *                  scan;
+
+
+       s = inet_dgram_socket();
+
+       /* find out how many there are */
+       bzero(&mnl, sizeof(mnl));
+       err = siocdrvspec(s, bridge, BRDGGMACNATLIST, &mnl, sizeof(mnl), false);
+       if (err != 0 && S_cleaning_up) {
+               T_LOG("BRDGGMACNATLIST %s failed %d", bridge, errno);
+               goto done;
+       }
+       T_QUIET;
+       T_ASSERT_POSIX_SUCCESS(err, "BRDGGMACNATLIST %s", bridge);
+       T_QUIET;
+       T_ASSERT_GE(mnl.ifbml_elsize, (uint16_t)sizeof(struct ifbrmne),
+           "mac nat entry size %u minsize %u",
+           mnl.ifbml_elsize, (u_int)sizeof(struct ifbrmne));
+       if (mnl.ifbml_len == 0) {
+               goto done;
+       }
+
+       /* call again with a buffer large enough to hold them */
+       buf = malloc(mnl.ifbml_len);
+       T_QUIET;
+       T_ASSERT_NOTNULL(buf, "mac nat entries buffer");
+       mnl.ifbml_buf = buf;
+       err = siocdrvspec(s, bridge, BRDGGMACNATLIST, &mnl, sizeof(mnl), false);
+       T_QUIET;
+       T_ASSERT_POSIX_SUCCESS(err, "BRDGGMACNATLIST %s", bridge);
+       count = mnl.ifbml_len / mnl.ifbml_elsize;
+       if (count == 0) {
+               goto done;
+       }
+       if (mnl.ifbml_elsize == sizeof(struct ifbrmne)) {
+               /* element size is expected size, no need to "right-size" it */
+               ret_list = (struct ifbrmne *)(void *)buf;
+               buf = NULL;
+               goto done;
+       }
+       /* element size is larger than we expect, create a "right-sized" array */
+       ret_list = malloc(count * sizeof(*ret_list));
+       T_QUIET;
+       T_ASSERT_NOTNULL(ret_list, "mac nat entries list");
+       for (i = 0, scan = buf; i < count; i++, scan += mnl.ifbml_elsize) {
+               struct ifbrmne *        ifbmne;
+
+               ifbmne = (struct ifbrmne *)(void *)scan;
+               ret_list[i] = *ifbmne;
+       }
+done:
+       if (s >= 0) {
+               close(s);
+       }
+       if (buf != NULL) {
+               free(buf);
+       }
+       *ret_count = count;
+       return ret_list;
+}
+
+static struct ifbrmne *
+bridge_mac_nat_entries_copy(u_int * ret_count)
+{
+       return bridge_mac_nat_entries_copy_common(BRIDGE200, ret_count);
+}
+
+static void
+bridge_mac_nat_entries_log(struct ifbrmne * entries, u_int count)
+{
+       u_int                   i;
+       char                    ntoabuf[ETHER_NTOA_BUFSIZE];
+       char                    ntopbuf[INET6_ADDRSTRLEN];
+       struct ifbrmne *        scan;
+
+       for (i = 0, scan = entries; i < count; i++, scan++) {
+               ether_ntoa_buf((const ether_addr_t *)&scan->ifbmne_mac,
+                   ntoabuf, sizeof(ntoabuf));
+               inet_ntop(scan->ifbmne_af, &scan->ifbmne_ip,
+                   ntopbuf, sizeof(ntopbuf));
+               printf("%s %s %s %lu\n",
+                   scan->ifbmne_ifname, ntopbuf, ntoabuf,
+                   (unsigned long)scan->ifbmne_expire);
+       }
+       return;
+}
+
+/**
+** Test Main
+**/
+static u_int                    S_n_ports;
+static switch_port_list_t       S_port_list;
+
+static void
+bridge_cleanup(const char * bridge, u_int n_ports, bool fail_on_error);
+
+static void
+cleanup_common(bool dump_table)
+{
+       if (S_n_ports == 0) {
+               return;
+       }
+       S_cleaning_up = true;
+       if ((S_port_list != NULL && S_port_list->mac_nat)
+           || (dump_table && S_port_list != NULL)) {
+               switch_port_list_log(S_port_list);
+               if (S_port_list->mac_nat) {
+                       switch_port_list_verify_mac_nat(S_port_list, true);
+               }
+               (void)switch_port_list_verify_rt_table(S_port_list, true);
+       }
+       if (S_debug) {
+               T_LOG("sleeping for 5 seconds\n");
+               sleep(5);
+       }
+       bridge_cleanup(BRIDGE200, S_n_ports, false);
+       return;
+}
+
+static void
+cleanup(void)
+{
+       cleanup_common(true);
+       return;
+}
+
+static void
+sigint_handler(__unused int sig)
+{
+       cleanup_common(false);
+       signal(SIGINT, SIG_DFL);
+}
+
+static switch_port_list_t
+bridge_setup(char * bridge, u_int n_ports, u_int num_addrs, bool mac_nat)
+{
+       errno_t                 err;
+       switch_port_list_t      list = NULL;
+       int                     s;
+
+       S_n_ports = n_ports;
+       T_ATEND(cleanup);
+       T_SETUPBEGIN;
+       s = inet_dgram_socket();
+       err = ifnet_create(s, bridge);
+       if (err != 0) {
+               goto done;
+       }
+       list = switch_port_list_alloc(n_ports, mac_nat);
+       for (u_int i = 0; i < n_ports; i++) {
+               bool    do_mac_nat;
+               char    ifname[IFNAMSIZ];
+               char    member_ifname[IFNAMSIZ];
+               ether_addr_t member_mac;
+
+               snprintf(ifname, sizeof(ifname), "%s%d",
+                   FETH_NAME, i);
+               snprintf(member_ifname, sizeof(member_ifname), "%s%d",
+                   FETH_NAME, i + n_ports);
+               err = ifnet_create(s, ifname);
+               if (err != 0) {
+                       goto done;
+               }
+               ifnet_attach_ip(s, ifname);
+               err = ifnet_create(s, member_ifname);
+               if (err != 0) {
+                       goto done;
+               }
+               err = ifnet_get_lladdr(s, member_ifname, &member_mac);
+               if (err != 0) {
+                       goto done;
+               }
+               err = fake_set_peer(s, ifname, member_ifname);
+               if (err != 0) {
+                       goto done;
+               }
+               /* add the interface's peer to the bridge */
+               err = bridge_add_member(s, bridge, member_ifname);
+               if (err != 0) {
+                       goto done;
+               }
+
+               do_mac_nat = (i == 0 && mac_nat);
+               if (do_mac_nat) {
+                       /* enable MAC NAT on unit 0 */
+                       err = bridge_set_mac_nat(s, bridge, member_ifname,
+                           true);
+                       if (err != 0) {
+                               goto done;
+                       }
+               }
+               /* we'll send/receive on the interface */
+               err = switch_port_list_add_port(list, i, ifname, member_ifname,
+                   &member_mac, num_addrs,
+                   do_mac_nat);
+               if (err != 0) {
+                       goto done;
+               }
+       }
+done:
+       if (s >= 0) {
+               close(s);
+       }
+       if (err != 0 && list != NULL) {
+               switch_port_list_dealloc(list);
+               list = NULL;
+       }
+       T_SETUPEND;
+       return list;
+}
+
+static void
+bridge_cleanup(const char * bridge, u_int n_ports, bool fail_on_error)
+{
+       int s;
+
+       s = inet_dgram_socket();
+       ifnet_destroy(s, bridge, fail_on_error);
+       for (u_int i = 0; i < n_ports; i++) {
+               char    ifname[IFNAMSIZ];
+               char    member_ifname[IFNAMSIZ];
+
+               snprintf(ifname, sizeof(ifname), "%s%d",
+                   FETH_NAME, i);
+               snprintf(member_ifname, sizeof(member_ifname), "%s%d",
+                   FETH_NAME, i + n_ports);
+               ifnet_destroy(s, ifname, fail_on_error);
+               ifnet_destroy(s, member_ifname, fail_on_error);
+       }
+       if (s >= 0) {
+               close(s);
+       }
+       S_n_ports = 0;
+       return;
+}
+
+/*
+ *  Basic Bridge Tests
+ *
+ *  Broadcast
+ *  - two cases: actual broadcast, unknown ethernet
+ *  - send broadcast packets
+ *  - verify all received
+ *  - check bridge rt list contains all expected MAC addresses
+ *  - send unicast ARP packets
+ *  - verify packets received only on expected port
+ *
+ *  MAC-NAT
+ *  - verify ARP translation
+ *  - verify IPv4 translation
+ *  - verify DHCP broadcast bit conversion
+ *  - verify IPv6 translation
+ *  - verify ND6 translation (Neighbor, Router)
+ */
+
+static void
+bridge_test(packet_validator_t validator,
+    void * context,
+    const ether_addr_t * dst_eaddr,
+    uint8_t af, u_int n_ports, u_int num_addrs)
+{
+#if TARGET_OS_BRIDGE
+       T_SKIP("Test uses too much memory");
+#else /* TARGET_OS_BRIDGE */
+       switch_port_list_t port_list;
+
+       signal(SIGINT, sigint_handler);
+       port_list = bridge_setup(BRIDGE200, n_ports, num_addrs, false);
+       if (port_list == NULL) {
+               T_FAIL("bridge_setup");
+               return;
+       }
+       S_port_list = port_list;
+       bridge_learning_test(port_list, af, validator, context, dst_eaddr);
+
+       //T_LOG("Sleeping for 5 seconds");
+       //sleep(5);
+       bridge_cleanup(BRIDGE200, n_ports, true);
+       switch_port_list_dealloc(port_list);
+       return;
+#endif /* TARGET_OS_BRIDGE */
+}
+
+static void
+bridge_test_mac_nat_ipv4(u_int n_ports, u_int num_addrs)
+{
+#if TARGET_OS_BRIDGE
+       T_SKIP("Test uses too much memory");
+#else /* TARGET_OS_BRIDGE */
+       switch_port_list_t port_list;
+
+       signal(SIGINT, sigint_handler);
+       port_list = bridge_setup(BRIDGE200, n_ports, num_addrs, true);
+       if (port_list == NULL) {
+               T_FAIL("bridge_setup");
+               return;
+       }
+       S_port_list = port_list;
+
+       /* verify that IPv4 packets get translated when necessary */
+       mac_nat_test_ip(port_list, AF_INET);
+
+       /* verify the DHCP broadcast bit gets set appropriately */
+       mac_nat_test_dhcp(port_list);
+
+       /* verify that ARP packet gets translated when necessary */
+       mac_nat_test_arp_out(port_list);
+       mac_nat_test_arp_in(port_list);
+
+       if (S_debug) {
+               T_LOG("Sleeping for 5 seconds");
+               sleep(5);
+       }
+       bridge_cleanup(BRIDGE200, n_ports, true);
+       switch_port_list_dealloc(port_list);
+       return;
+#endif /* TARGET_OS_BRIDGE */
+}
+
+static void
+bridge_test_mac_nat_ipv6(u_int n_ports, u_int num_addrs)
+{
+#if TARGET_OS_BRIDGE
+       T_SKIP("Test uses too much memory");
+#else /* TARGET_OS_BRIDGE */
+       switch_port_list_t port_list;
+
+       signal(SIGINT, sigint_handler);
+       port_list = bridge_setup(BRIDGE200, n_ports, num_addrs, true);
+       if (port_list == NULL) {
+               T_FAIL("bridge_setup");
+               return;
+       }
+       S_port_list = port_list;
+
+       /* verify that IPv6 packets get translated when necessary */
+       mac_nat_test_ip(port_list, AF_INET6);
+
+       /* verify that ND6 packet gets translated when necessary */
+       mac_nat_test_nd6_out(port_list);
+       if (S_debug) {
+               T_LOG("Sleeping for 5 seconds");
+               sleep(5);
+       }
+       bridge_cleanup(BRIDGE200, n_ports, true);
+       switch_port_list_dealloc(port_list);
+       return;
+#endif /* TARGET_OS_BRIDGE */
+}
+
+static void
+system_cmd(const char *cmd, bool fail_on_error)
+{
+       pid_t pid = -1;
+       int exit_status = 0;
+       const char *argv[] = {
+               "/usr/local/bin/bash",
+               "-c",
+               cmd,
+               NULL
+       };
+
+       int rc = dt_launch_tool(&pid, (char **)(void *)argv, false, NULL, NULL);
+       T_QUIET;
+       T_ASSERT_EQ(rc, 0, "dt_launch_tool(%s) failed", cmd);
+
+       if (dt_waitpid(pid, &exit_status, NULL, 30)) {
+               T_QUIET;
+               T_ASSERT_MACH_SUCCESS(exit_status, "command(%s)", cmd);
+       } else {
+               if (fail_on_error) {
+                       T_FAIL("dt_waitpid(%s) failed", cmd);
+               }
+       }
+}
+
+static void
+cleanup_pf(void)
+{
+       struct ifbrparam param;
+       int s = inet_dgram_socket();
+
+       system_cmd("pfctl -d", false);
+       system_cmd("pfctl -F all", false);
+
+       param.ifbrp_filter = 0;
+       siocdrvspec(s, BRIDGE200, BRDGSFILT,
+           &param, sizeof(param), true);
+       return;
+}
+
+static void
+block_all_traffic(bool input, const char* infname1, const char* infname2)
+{
+       int s = inet_dgram_socket();
+       int ret;
+       struct ifbrparam param;
+       char command[512];
+       char *dir = input ? "in" : "out";
+
+       snprintf(command, sizeof(command), "echo \"block %s on %s all\nblock %s on %s all\n\" | pfctl -vvv -f -",
+           dir, infname1, dir, infname2);
+       /* enable block all filter */
+       param.ifbrp_filter = IFBF_FILT_MEMBER | IFBF_FILT_ONLYIP;
+       ret = siocdrvspec(s, BRIDGE200, BRDGSFILT,
+           &param, sizeof(param), true);
+       T_ASSERT_POSIX_SUCCESS(ret,
+           "SIOCDRVSPEC(BRDGSFILT %s, 0x%x)",
+           BRIDGE200, param.ifbrp_filter);
+       // ignore errors such that not having pf.os doesn't raise any issues
+       system_cmd(command, false);
+       system_cmd("pfctl -e", true);
+       system_cmd("pfctl -s all", true);
+}
+
+/*
+ *  Basic bridge filter test
+ *
+ *  For both broadcast and unicast transfers ensure that data can
+ *  be blocked using pf on the bridge
+ */
+
+static void
+filter_test(uint8_t af)
+{
+#if TARGET_OS_BRIDGE
+       T_SKIP("pfctl isn't valid on this platform");
+#else /* TARGET_OS_BRIDGE */
+       switch_port_list_t port_list;
+       switch_port_t   port;
+       const u_int n_ports = 2;
+       u_int num_addrs = 1;
+       u_int i;
+       char ntoabuf[ETHER_NTOA_BUFSIZE];
+       union ifbrip dst_ip;
+       bool blocked = true;
+       bool input = true;
+       const char* ifnames[2];
+
+       signal(SIGINT, sigint_handler);
+
+       T_ATEND(cleanup);
+       T_ATEND(cleanup_pf);
+
+       port_list = bridge_setup(BRIDGE200, n_ports, num_addrs, false);
+       if (port_list == NULL) {
+               T_FAIL("bridge_setup");
+               return;
+       }
+
+       ether_ntoa_buf(&ether_broadcast, ntoabuf, sizeof(ntoabuf));
+
+       S_port_list = port_list;
+       for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+               ifnames[i] = port->member_ifname;
+       }
+
+       get_broadcast_ip_address(af, &dst_ip);
+       do {
+               do {
+                       if (blocked) {
+                               block_all_traffic(input, ifnames[0], ifnames[1]);
+                       }
+                       for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+                               if (S_debug) {
+                                       T_LOG("Sending on %s", port->ifname);
+                               }
+                               for (u_int j = 0; j < port->num_addrs; j++) {
+                                       uint32_t        generation;
+
+                                       generation = next_generation();
+                                       send_generation(port,
+                                           af,
+                                           j,
+                                           &ether_broadcast,
+                                           &dst_ip,
+                                           generation);
+
+                                       /* receive across all ports */
+                                       check_receive_generation(port_list,
+                                           af,
+                                           generation,
+                                           validate_broadcast_dhost,
+                                           NULL);
+
+                                       /* ensure that every port saw the right amount of packets*/
+                                       if (blocked) {
+                                               check_received_count(port_list, port, 0);
+                                       } else {
+                                               check_received_count(port_list, port, 1);
+                                       }
+                               }
+                       }
+                       T_PASS("%s broadcast %s %s", __func__, blocked ? "blocked" : "not blocked", input ? "input" : "output");
+                       input = !input;
+                       cleanup_pf();
+               } while (input == false && blocked);
+               blocked = !blocked;
+       } while (blocked == false);
+
+       do {
+               do {
+                       if (blocked) {
+                               block_all_traffic(input, ifnames[0], ifnames[1]);
+                       }
+                       for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+                               /* send unicast packets to every other port's MAC addresses */
+                               unicast_send_all(port_list, af, port);
+
+                               /* receive all of that generated traffic */
+                               switch_port_list_check_receive(port_list, af, NULL, 0,
+                                   validate_port_dhost, NULL);
+
+                               /* ensure that every port saw the right amount of packets*/
+                               if (blocked) {
+                                       check_received_count(port_list, port, 0);
+                               } else {
+                                       check_received_count(port_list, port, 1);
+                               }
+                       }
+                       T_PASS("%s unicast %s %s", __func__, blocked ? "blocked" : "not blocked", input ? "input" : "output");
+                       input = !input;
+                       cleanup_pf();
+               } while (input == false && blocked);
+               blocked = !blocked;
+       } while (blocked == false);
+
+       bridge_cleanup(BRIDGE200, n_ports, true);
+       switch_port_list_dealloc(port_list);
+       return;
+#endif /* TARGET_OS_BRIDGE */
+}
+
+T_DECL(if_bridge_bcast,
+    "bridge broadcast IPv4",
+    T_META_ASROOT(true))
+{
+       bridge_test(validate_broadcast_dhost, NULL, &ether_broadcast,
+           AF_INET, 5, 1);
+}
+
+T_DECL(if_bridge_bcast_many,
+    "bridge broadcast many IPv4",
+    T_META_ASROOT(true))
+{
+       bridge_test(validate_broadcast_dhost, NULL, &ether_broadcast,
+           AF_INET, 5, 20);
+}
+
+T_DECL(if_bridge_unknown,
+    "bridge unknown host IPv4",
+    T_META_ASROOT(true))
+{
+       bridge_test(validate_not_present_dhost, NULL, &ether_external,
+           AF_INET, 5, 1);
+}
+
+T_DECL(if_bridge_bcast_v6,
+    "bridge broadcast IPv6",
+    T_META_ASROOT(true))
+{
+       bridge_test(validate_broadcast_dhost, NULL, &ether_broadcast,
+           AF_INET6, 5, 1);
+}
+
+T_DECL(if_bridge_bcast_many_v6,
+    "bridge broadcast many IPv6",
+    T_META_ASROOT(true))
+{
+       bridge_test(validate_broadcast_dhost, NULL, &ether_broadcast,
+           AF_INET6, 5, 20);
+}
+
+T_DECL(if_bridge_unknown_v6,
+    "bridge unknown host IPv6",
+    T_META_ASROOT(true))
+{
+       bridge_test(validate_not_present_dhost, NULL, &ether_external,
+           AF_INET6, 5, 1);
+}
+
+T_DECL(if_bridge_mac_nat_ipv4,
+    "bridge mac nat ipv4",
+    T_META_ASROOT(true))
+{
+       bridge_test_mac_nat_ipv4(5, 10);
+}
+
+T_DECL(if_bridge_mac_nat_ipv6,
+    "bridge mac nat ipv6",
+    T_META_ASROOT(true))
+{
+       bridge_test_mac_nat_ipv6(5, 10);
+}
+
+T_DECL(if_bridge_filter_ipv4,
+    "bridge filter ipv4",
+    T_META_ASROOT(true))
+{
+       filter_test(AF_INET);
+}
+
+T_DECL(if_bridge_filter_ipv6,
+    "bridge filter ipv6",
+    T_META_ASROOT(true))
+{
+       filter_test(AF_INET6);
+}
diff --git a/tests/netagent_race_infodisc_56244905.c b/tests/netagent_race_infodisc_56244905.c
new file mode 100644 (file)
index 0000000..cc451d8
--- /dev/null
@@ -0,0 +1,198 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+
+#include <pthread.h>
+#include <sys/kern_control.h>
+#include <sys/kern_event.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/sys_domain.h>
+#include <unistd.h>
+#include <uuid/uuid.h>
+
+#include <darwintest.h>
+
+static int finished = 0;
+
+#ifndef KEV_NETAGENT_SUBCLASS
+#define KEV_NETAGENT_SUBCLASS 9
+#endif
+
+#ifndef NETAGENT_MESSAGE_TYPE_REGISTER
+#define NETAGENT_MESSAGE_TYPE_REGISTER 1
+#endif
+
+#ifndef NETAGENT_MESSAGE_TYPE_UNREGISTER
+#define NETAGENT_MESSAGE_TYPE_UNREGISTER 2
+#endif
+
+struct netagent_message_header {
+       uint8_t message_type;
+       uint8_t message_flags;
+       uint32_t message_id;
+       uint32_t message_error;
+       uint32_t message_payload_length;
+};
+
+struct kev_msg {
+       uint32_t total_size;
+       uint32_t vendor_code;
+       uint32_t kev_class;
+       uint32_t kev_subclass;
+       uint32_t id;
+       uint32_t event_code;
+};
+
+struct kev_netagent_data {
+       uuid_t netagent_uuid;
+};
+
+struct netagent {
+       uuid_t netagent_uuid;
+       char netagent_domain[32];
+       char netagent_type[32];
+       char netagent_desc[128];
+       uint32_t netagent_flags;
+       uint32_t netagent_data_size;
+       uint8_t netagent_data[0];
+};
+
+static void *
+register_sockopt_racer(void *data)
+{
+       int s = *(int *)data;
+       struct {
+               struct netagent_message_header header;
+               struct netagent netagent;
+       } msg;
+
+       bzero(&msg, sizeof(msg));
+       msg.header.message_type = NETAGENT_MESSAGE_TYPE_REGISTER;
+       msg.header.message_payload_length = sizeof(struct netagent);
+
+       while (!finished) {
+               send(s, &msg, sizeof(msg), 0);
+       }
+
+       return NULL;
+}
+
+static void *
+register_message_racer(void *data)
+{
+       int s = *(int *)data;
+       struct netagent netagent;
+
+       bzero(&netagent, sizeof(netagent));
+       while (!finished) {
+               setsockopt(s, SYSPROTO_CONTROL, NETAGENT_MESSAGE_TYPE_REGISTER, &netagent, sizeof(netagent));
+       }
+
+       return NULL;
+}
+
+#define SIZEOF_STRUCT_NETAGENT_WRAPPER 280
+
+static void *
+unregister_racer(void *data)
+{
+       int s = *(int *)data;
+       uint8_t spraybuf[SIZEOF_STRUCT_NETAGENT_WRAPPER];
+
+       memset(spraybuf, 0x41, sizeof(spraybuf));
+
+       while (!finished) {
+               setsockopt(s, SYSPROTO_CONTROL, NETAGENT_MESSAGE_TYPE_UNREGISTER, NULL, 0);
+               ioctl(-1, _IOW('x', 0, spraybuf), spraybuf);
+       }
+
+       return NULL;
+}
+
+#define NITERS 200000
+
+T_DECL(netagent_race_infodisc_56244905, "Netagent race between register and post event.")
+{
+       int s;
+       int evsock;
+       pthread_t reg_th;
+       pthread_t unreg_th;
+       struct kev_request kev_req = {
+               .vendor_code = KEV_VENDOR_APPLE,
+               .kev_class = KEV_NETWORK_CLASS,
+               .kev_subclass = KEV_NETAGENT_SUBCLASS
+       };
+       struct ctl_info ci;
+       struct sockaddr_ctl sc;
+       struct {
+               struct kev_msg msg;
+               struct kev_netagent_data nd;
+       } ev;
+       int n;
+
+       T_SETUPBEGIN;
+       /* set up the event socket so we can receive notifications: */
+       T_ASSERT_POSIX_SUCCESS(evsock = socket(AF_SYSTEM, SOCK_RAW, SYSPROTO_EVENT), NULL);
+       T_ASSERT_POSIX_SUCCESS(ioctl(evsock, SIOCSKEVFILT, &kev_req), NULL);
+
+       /* this is the socket we'll race on: */
+       T_ASSERT_POSIX_SUCCESS(s = socket(AF_SYSTEM, SOCK_DGRAM, SYSPROTO_CONTROL), NULL);
+
+       /* connect to netagent: */
+       bzero(&ci, sizeof(ci));
+       strcpy(ci.ctl_name, "com.apple.net.netagent");
+       T_ASSERT_POSIX_SUCCESS(ioctl(s, CTLIOCGINFO, &ci), NULL);
+
+       bzero(&sc, sizeof(sc));
+       sc.sc_id = ci.ctl_id;
+
+       T_ASSERT_POSIX_SUCCESS(connect(s, (const struct sockaddr *)&sc, sizeof(sc)), NULL);
+       T_SETUPEND;
+
+       /* variant 1: */
+       /* spin off the racer threads: */
+       T_ASSERT_POSIX_ZERO(pthread_create(&reg_th, NULL, register_message_racer, &s), NULL);
+       T_ASSERT_POSIX_ZERO(pthread_create(&unreg_th, NULL, unregister_racer, &s), NULL);
+
+       /* keep going until we're done: */
+       for (n = 0; n < NITERS; ++n) {
+               bzero(&ev, sizeof(ev));
+               T_ASSERT_POSIX_SUCCESS(recv(evsock, &ev, sizeof(ev), 0), NULL);
+
+               if (ev.nd.netagent_uuid[0] != 0) {
+                       finished = 1;
+                       T_ASSERT_FAIL("netagent register event leaked data: 0x%08lx", *(unsigned long *)ev.nd.netagent_uuid);
+               }
+       }
+
+       finished = 1;
+
+       T_ASSERT_POSIX_ZERO(pthread_join(reg_th, NULL), NULL);
+       T_ASSERT_POSIX_ZERO(pthread_join(unreg_th, NULL), NULL);
+
+       finished = 0;
+
+       /* variant 2: */
+       /* spin off the racer threads: */
+       T_ASSERT_POSIX_ZERO(pthread_create(&reg_th, NULL, register_sockopt_racer, &s), NULL);
+       T_ASSERT_POSIX_ZERO(pthread_create(&unreg_th, NULL, unregister_racer, &s), NULL);
+
+       /* keep going until we're done: */
+       for (n = 0; n < NITERS; ++n) {
+               bzero(&ev, sizeof(ev));
+               T_ASSERT_POSIX_SUCCESS(recv(evsock, &ev, sizeof(ev), 0), NULL);
+
+               if (ev.nd.netagent_uuid[0] != 0) {
+                       finished = 1;
+                       T_ASSERT_FAIL("netagent register event leaked data: 0x%08lx", *(unsigned long *)ev.nd.netagent_uuid);
+               }
+       }
+
+       finished = 1;
+
+       T_ASSERT_POSIX_ZERO(pthread_join(reg_th, NULL), NULL);
+       T_ASSERT_POSIX_ZERO(pthread_join(unreg_th, NULL), NULL);
+}
diff --git a/tests/socket_0byte_udp_poll_58140856.c b/tests/socket_0byte_udp_poll_58140856.c
new file mode 100644 (file)
index 0000000..e87db48
--- /dev/null
@@ -0,0 +1,108 @@
+#include <darwintest.h>
+#include <arpa/inet.h>
+#include <sys/types.h>
+#include <sys/event.h>
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+
+T_GLOBAL_META(T_META_RUN_CONCURRENTLY(true));
+
+#define TEST_ADDR "127.0.0.1"
+#define TEST_PORT 4242
+
+static struct {
+       int fd;
+       struct sockaddr_in addr;
+} server;
+
+static void
+server_listen(void)
+{
+       int r;
+
+       server.fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+       T_ASSERT_POSIX_SUCCESS(server.fd, "socket");
+
+       memset(&server.addr, 0, sizeof(server.addr));
+       server.addr.sin_family = AF_INET;
+       server.addr.sin_port = htons(TEST_PORT);
+
+       inet_pton(AF_INET, TEST_ADDR, &server.addr.sin_addr);
+
+       r = bind(server.fd, (struct sockaddr*) &server.addr, sizeof(server.addr));
+       T_ASSERT_POSIX_SUCCESS(r, "bind");
+}
+
+static void
+send_message(void)
+{
+       int fd;
+       struct msghdr msg;
+       struct iovec iov;
+
+       fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+       T_ASSERT_POSIX_SUCCESS(fd, "socket");
+
+       memset(&msg, 0, sizeof(msg));
+
+       msg.msg_name = &server.addr;
+       msg.msg_namelen = sizeof(server.addr);
+
+       iov.iov_base = "";
+       iov.iov_len = 0;
+       msg.msg_iov = &iov;
+       msg.msg_iovlen = 1;
+
+       ssize_t r = sendmsg(fd, &msg, 0);
+       T_ASSERT_EQ(r, (ssize_t)iov.iov_len, "sendmsg");
+
+       close(fd);
+}
+
+static void
+server_poll(void)
+{
+       int kq;
+       struct kevent event = {
+               .flags  = EV_ADD,
+               .filter = EVFILT_READ,
+               .ident  = (unsigned long)server.fd,
+       };
+       int r;
+
+       kq = kqueue();
+       T_ASSERT_POSIX_SUCCESS(kq, "kqueue");
+
+       /* Add and poll */
+       r = kevent(kq, &event, 1, &event, 1, NULL);
+       T_EXPECT_EQ(r, 1, "should return an event");
+
+       close(kq);
+}
+
+T_DECL(socket_0byte_udp_poll_58140856,
+    "Tests that 0-sized UDP packets wake up kevent")
+{
+       T_LOG("Starting...\n");
+
+       /* Listen on UDP port */
+       server_listen();
+
+       T_LOG("Server bound to [%s]:%d\n", TEST_ADDR, TEST_PORT);
+
+       /* Send 0-UDP packet to that port */
+       send_message();
+
+       T_LOG("Sent message to server\n");
+
+       /* Poll kqueue events */
+       server_poll();
+
+       T_LOG("Got kqueue event\n");
+
+       close(server.fd);
+}
index a183a87dd656830225a8549b2c56bbd18a507738..9ff129091bb16c0e0205ed3d75072336c4b6cd1f 100644 (file)
@@ -275,9 +275,7 @@ child_init(void)
 #if !TARGET_OS_OSX
        /* allow us to be frozen */
        freeze_state = memorystatus_control(MEMORYSTATUS_CMD_GET_PROCESS_IS_FREEZABLE, pid, 0, NULL, 0);
-       if (freeze_state == -1) {
-               T_SKIP("This device doesn't have CONFIG_FREEZE enabled.");
-       } else if (freeze_state == 0) {
+       if (freeze_state == 0) {
                T_LOG("CHILD was found to be UNFREEZABLE, enabling freezing.");
                memorystatus_control(MEMORYSTATUS_CMD_SET_PROCESS_IS_FREEZABLE, pid, 1, NULL, 0);
                freeze_state = memorystatus_control(MEMORYSTATUS_CMD_GET_PROCESS_IS_FREEZABLE, pid, 0, NULL, 0);
@@ -341,6 +339,14 @@ T_DECL(basic, "test that no-fault stackshot works correctly")
        T_LOG("parent pid: %d\n", getpid());
        T_QUIET; T_ASSERT_POSIX_ZERO(_NSGetExecutablePath(path, &path_size), "_NSGetExecutablePath");
 
+       /* check if we can run the child successfully */
+#if !TARGET_OS_OSX
+       int freeze_state = memorystatus_control(MEMORYSTATUS_CMD_GET_PROCESS_IS_FREEZABLE, getpid(), 0, NULL, 0);
+       if (freeze_state == -1) {
+               T_SKIP("This device doesn't have CONFIG_FREEZE enabled.");
+       }
+#endif
+
        /* setup signal handling */
        signal(SIGUSR1, SIG_IGN);
        child_sig_src = dispatch_source_create(DISPATCH_SOURCE_TYPE_SIGNAL, SIGUSR1, 0, dq);
@@ -354,7 +360,7 @@ T_DECL(basic, "test that no-fault stackshot works correctly")
        T_ATEND(kill_children);
 
        /* wait until the child has recursed enough */
-       dispatch_semaphore_wait(child_done_sema, DISPATCH_TIME_FOREVER);
+       dispatch_semaphore_wait(child_done_sema, dispatch_time(DISPATCH_TIME_NOW, 10 /*seconds*/ * 1000000000ULL));
 
        T_LOG("child finished, parent executing");
 
index 29fa817e17f1a8a833ae4255c5ba85754714e1db..1777335c37bfb2e74c654251eae20abc6177f750 100644 (file)
@@ -2,6 +2,7 @@
 #include <darwintest_utils.h>
 #include <kern/debug.h>
 #include <kern/kern_cdata.h>
+#include <kern/block_hint.h>
 #include <kdd.h>
 #include <libproc.h>
 #include <mach-o/dyld.h>
@@ -32,6 +33,9 @@ static void initialize_thread(void);
 #define PARSE_STACKSHOT_SHAREDCACHE_LAYOUT   0x04
 #define PARSE_STACKSHOT_DISPATCH_QUEUE_LABEL 0x08
 #define PARSE_STACKSHOT_TURNSTILEINFO        0x10
+#define PARSE_STACKSHOT_WAITINFO_CSEG        0x40
+
+static uint64_t cseg_expected_threadid = 0;
 
 #define TEST_STACKSHOT_QUEUE_LABEL        "houston.we.had.a.problem"
 #define TEST_STACKSHOT_QUEUE_LABEL_LENGTH sizeof(TEST_STACKSHOT_QUEUE_LABEL)
@@ -891,6 +895,34 @@ T_DECL(proc_uuid_info, "tests that the main binary UUID for a proc is always pop
        });
 }
 
+T_DECL(cseg_waitinfo, "test that threads stuck in the compressor report correct waitinfo")
+{
+       int val = 1;
+       struct scenario scenario = {
+               .name = "cseg_waitinfo",
+               .quiet = false,
+               .flags = (STACKSHOT_THREAD_WAITINFO | STACKSHOT_KCDATA_FORMAT),
+       };
+
+       dispatch_queue_t dq = dispatch_queue_create("com.apple.stackshot.cseg_waitinfo", NULL);
+       dispatch_semaphore_t child_ok = dispatch_semaphore_create(0);
+
+       dispatch_async(dq, ^{
+               pthread_threadid_np(NULL, &cseg_expected_threadid);
+               dispatch_semaphore_signal(child_ok);
+               T_ASSERT_POSIX_SUCCESS(sysctlbyname("kern.cseg_wedge_thread", NULL, NULL, &val, sizeof(val)), "wedge child thread");
+       });
+
+       dispatch_semaphore_wait(child_ok, DISPATCH_TIME_FOREVER);
+       sleep(1);
+
+       T_LOG("taking stackshot");
+       take_stackshot(&scenario, ^(void *ssbuf, size_t sslen) {
+               T_ASSERT_POSIX_SUCCESS(sysctlbyname("kern.cseg_unwedge_thread", NULL, NULL, &val, sizeof(val)), "unwedge child thread");
+               parse_stackshot(PARSE_STACKSHOT_WAITINFO_CSEG, ssbuf, sslen, -1);
+       });
+}
+
 #pragma mark performance tests
 
 #define SHOULD_REUSE_SIZE_HINT 0x01
@@ -1086,12 +1118,14 @@ parse_stackshot(uint64_t stackshot_parsing_flags, void *ssbuf, size_t sslen, int
 {
        bool delta = (stackshot_parsing_flags & PARSE_STACKSHOT_DELTA);
        bool expect_zombie_child = (stackshot_parsing_flags & PARSE_STACKSHOT_ZOMBIE);
+       bool expect_cseg_waitinfo = (stackshot_parsing_flags & PARSE_STACKSHOT_WAITINFO_CSEG);
        bool expect_shared_cache_layout = false;
        bool expect_shared_cache_uuid = !delta;
        bool expect_dispatch_queue_label = (stackshot_parsing_flags & PARSE_STACKSHOT_DISPATCH_QUEUE_LABEL);
        bool expect_turnstile_lock = (stackshot_parsing_flags & PARSE_STACKSHOT_TURNSTILEINFO);
        bool found_zombie_child = false, found_shared_cache_layout = false, found_shared_cache_uuid = false;
        bool found_dispatch_queue_label = false, found_turnstile_lock = false;
+       bool found_cseg_waitinfo = false;
 
        if (expect_shared_cache_uuid) {
                uuid_t shared_cache_uuid;
@@ -1179,6 +1213,17 @@ parse_stackshot(uint64_t stackshot_parsing_flags, void *ssbuf, size_t sslen, int
                                }
                        }
 
+                       if (expect_cseg_waitinfo) {
+                               NSArray *winfos = container[@"task_snapshots"][@"thread_waitinfo"];
+
+                               for (id i in winfos) {
+                                       if ([i[@"wait_type"] intValue] == kThreadWaitCompressor && [i[@"owner"] intValue] == cseg_expected_threadid) {
+                                               found_cseg_waitinfo = true;
+                                               break;
+                                       }
+                               }
+                       }
+
                        int pid = [container[@"task_snapshots"][@"task_snapshot"][@"ts_pid"] intValue];
                        if (expect_zombie_child && (pid == child_pid)) {
                                        found_zombie_child = true;
@@ -1276,6 +1321,10 @@ parse_stackshot(uint64_t stackshot_parsing_flags, void *ssbuf, size_t sslen, int
                T_QUIET; T_ASSERT_TRUE(found_turnstile_lock, "found expected deadlock");
        }
 
+       if (expect_cseg_waitinfo) {
+               T_QUIET; T_ASSERT_TRUE(found_cseg_waitinfo, "found c_seg waitinfo");
+       }
+
        T_ASSERT_FALSE(KCDATA_ITER_FOREACH_FAILED(iter), "successfully iterated kcdata");
 }
 
diff --git a/tests/task_create_suid_cred.c b/tests/task_create_suid_cred.c
new file mode 100644 (file)
index 0000000..9787918
--- /dev/null
@@ -0,0 +1,326 @@
+#include <mach/mach.h>
+
+#include <bootstrap.h>
+#include <darwintest.h>
+#include <darwintest_multiprocess.h>
+#include <spawn.h>
+#include <unistd.h>
+
+#if defined(UNENTITLED)
+
+/*
+ * Creating an suid credential should fail without an entitlement.
+ */
+T_DECL(task_create_suid_cred_unentitled, "task_create_suid_cred (no entitlment)", T_META_ASROOT(true))
+{
+       kern_return_t ret = KERN_FAILURE;
+       suid_cred_t sc = SUID_CRED_NULL;
+
+       ret = task_create_suid_cred(mach_task_self(), "/usr/bin/id", 0, &sc);
+       T_ASSERT_MACH_ERROR(ret, KERN_NO_ACCESS, "create a new suid cred for id (no entitlement)");
+}
+
+#else /* ENTITLED */
+
+extern char **environ;
+static const char *server_name = "com.apple.xnu.test.task_create_suid_cred";
+
+/*
+ * This is a positive test case which spawns /usr/bin/id with a properly created
+ * suid credential and verifies that it correctly produces "euid=0"
+ * Not running as root.
+ */
+static void
+test_id_cred(suid_cred_t sc_id)
+{
+       posix_spawnattr_t attr;
+       posix_spawn_file_actions_t file_actions;
+       pid_t pid = -1;
+       int status = -1;
+       char template[] = "/tmp/suid_cred.XXXXXX";
+       char *path = NULL;
+       FILE *file = NULL;
+       char *line = NULL;
+       size_t linecap = 0;
+       ssize_t linelen = 0;
+       char *id[] = {"/usr/bin/id", NULL};
+       kern_return_t ret = KERN_FAILURE;
+
+       /* Send stdout to a temporary file. */
+       path = mktemp(template);
+       T_QUIET; T_ASSERT_NOTNULL(path, NULL);
+
+       ret = posix_spawn_file_actions_init(&file_actions);
+       T_QUIET; T_ASSERT_POSIX_ZERO(ret, NULL);
+
+       ret = posix_spawn_file_actions_addopen(&file_actions, 1, path,
+           O_WRONLY | O_CREAT | O_TRUNC, 0666);
+       T_QUIET; T_ASSERT_POSIX_ZERO(ret, NULL);
+
+       ret = posix_spawnattr_init(&attr);
+       T_QUIET; T_ASSERT_POSIX_ZERO(ret, NULL);
+       T_QUIET; T_ASSERT_NOTNULL(attr, NULL);
+
+       // Attach the suid cred port
+       ret = posix_spawnattr_setsuidcredport_np(&attr, sc_id);
+       T_QUIET; T_ASSERT_POSIX_ZERO(ret, NULL);
+
+       ret = posix_spawnp(&pid, id[0], &file_actions, &attr, id, environ);
+       T_ASSERT_POSIX_ZERO(ret, "spawn with suid cred");
+
+       ret = posix_spawnattr_destroy(&attr);
+       T_QUIET; T_ASSERT_POSIX_ZERO(ret, NULL);
+
+       ret = posix_spawn_file_actions_destroy(&file_actions);
+       T_QUIET; T_ASSERT_POSIX_ZERO(ret, NULL);
+
+       // Wait for id to finish executing and exit.
+       do {
+               ret = waitpid(pid, &status, 0);
+       } while (ret < 0 && errno == EINTR);
+       T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, NULL);
+
+       // Read from the temp file and verify that euid is 0.
+       file = fopen(path, "re");
+       T_QUIET; T_ASSERT_NOTNULL(file, NULL);
+
+       linelen = getline(&line, &linecap, file);
+       T_QUIET; T_ASSERT_GT_LONG(linelen, 0L, NULL);
+
+       T_ASSERT_NOTNULL(strstr(line, "euid=0"), "verify that euid is zero");
+
+       free(line);
+       ret = fclose(file);
+       T_QUIET; T_ASSERT_POSIX_ZERO(ret, NULL);
+
+       ret = unlink(path);
+       T_QUIET; T_ASSERT_POSIX_ZERO(ret, NULL);
+}
+
+/*
+ * This is a negative test case which tries to spawn /usr/bin/id with a
+ * previously used credential.  It is expected that posix_spawn() fails.
+ * sc_id should have already been used to successfully spawn /usr/bin/id.
+ */
+static void
+test_id_cred_reuse(suid_cred_t sc_id)
+{
+       posix_spawnattr_t attr;
+       char *id[] = {"/usr/bin/id", NULL};
+       kern_return_t ret = KERN_FAILURE;
+
+       ret = posix_spawnattr_init(&attr);
+       T_QUIET; T_ASSERT_POSIX_ZERO(ret, NULL);
+       T_QUIET; T_ASSERT_NOTNULL(attr, NULL);
+
+       // Attach the suid cred port
+       ret = posix_spawnattr_setsuidcredport_np(&attr, sc_id);
+       T_QUIET; T_ASSERT_POSIX_ZERO(ret, NULL);
+
+       ret = posix_spawnp(NULL, id[0], NULL, &attr, id, environ);
+       T_ASSERT_NE(ret, 0, "spawn with used suid cred");
+
+       ret = posix_spawnattr_destroy(&attr);
+       T_QUIET; T_ASSERT_POSIX_ZERO(ret, NULL);
+}
+
+/*
+ * This is a negative test case which tries to spawn /usr/bin/id with a
+ * credential for /bin/ls. It is expected that posix_spawn() fails.
+ */
+static void
+test_ls_cred(suid_cred_t sc_ls)
+{
+       posix_spawnattr_t attr;
+       char *id[] = {"/usr/bin/id", NULL};
+       kern_return_t ret = KERN_FAILURE;
+
+       ret = posix_spawnattr_init(&attr);
+       T_QUIET; T_ASSERT_POSIX_ZERO(ret, NULL);
+       T_QUIET; T_ASSERT_NOTNULL(attr, NULL);
+
+       // Attach the suid cred port
+       ret = posix_spawnattr_setsuidcredport_np(&attr, sc_ls);
+       T_QUIET; T_ASSERT_POSIX_ZERO(ret, NULL);
+
+       ret = posix_spawnp(NULL, id[0], NULL, &attr, id, environ);
+       T_ASSERT_NE(ret, 0, "spawn with bad suid cred");
+
+       ret = posix_spawnattr_destroy(&attr);
+       T_QUIET; T_ASSERT_POSIX_ZERO(ret, NULL);
+}
+
+/*
+ * The privileged/entitled "server" which creates suid credentials to pass to a
+ * client. Two creds are created, one for /usr/bin/id and the other for /bin/ls.
+ * It waits for the client to contact and replies with the above ports.
+ */
+T_HELPER_DECL(suid_cred_server_helper, "suid cred server")
+{
+       mach_port_t server_port = MACH_PORT_NULL;
+       kern_return_t ret = KERN_FAILURE;
+       suid_cred_t sc_id = SUID_CRED_NULL;
+       suid_cred_t sc_ls = SUID_CRED_NULL;
+       mach_msg_empty_rcv_t rmsg = {};
+       struct {
+               mach_msg_header_t          header;
+               mach_msg_body_t            body;
+               mach_msg_port_descriptor_t id_port;
+               mach_msg_port_descriptor_t ls_port;
+       } smsg = {};
+
+       T_SETUPBEGIN;
+
+       ret = bootstrap_check_in(bootstrap_port, server_name, &server_port);
+       T_ASSERT_MACH_SUCCESS(ret, NULL);
+
+       T_SETUPEND;
+
+       // Wait for a message to reply to.
+       rmsg.header.msgh_size = sizeof(rmsg);
+       rmsg.header.msgh_local_port = server_port;
+
+       ret = mach_msg_receive(&rmsg.header);
+       T_QUIET; T_ASSERT_MACH_SUCCESS(ret, NULL);
+
+       // Setup the reply.
+       smsg.header.msgh_remote_port = rmsg.header.msgh_remote_port;
+       smsg.header.msgh_local_port = MACH_PORT_NULL;
+       smsg.header.msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_MOVE_SEND_ONCE, 0) | MACH_MSGH_BITS_COMPLEX;
+       smsg.header.msgh_size = sizeof(smsg);
+
+       smsg.body.msgh_descriptor_count = 2;
+
+       // Create an suid cred for 'id'
+       ret = task_create_suid_cred(mach_task_self(), "/usr/bin/id", 0, &sc_id);
+       T_QUIET; T_ASSERT_MACH_SUCCESS(ret, "create a new suid cred for id");
+       T_QUIET; T_ASSERT_NE(sc_id, SUID_CRED_NULL, NULL);
+
+       smsg.id_port.name = sc_id;
+       smsg.id_port.disposition = MACH_MSG_TYPE_COPY_SEND;
+       smsg.id_port.type = MACH_MSG_PORT_DESCRIPTOR;
+
+       // Create an suid cred for 'ls'
+       ret = task_create_suid_cred(mach_task_self(), "/bin/ls", 0, &sc_ls);
+       T_QUIET; T_ASSERT_MACH_SUCCESS(ret, "create a new suid cred for ls");
+       T_QUIET; T_ASSERT_NE(sc_ls, SUID_CRED_NULL, NULL);
+
+       smsg.ls_port.name = sc_ls;
+       smsg.ls_port.disposition = MACH_MSG_TYPE_COPY_SEND;
+       smsg.ls_port.type = MACH_MSG_PORT_DESCRIPTOR;
+
+       ret = mach_msg_send(&smsg.header);
+       T_QUIET; T_ASSERT_MACH_SUCCESS(ret, NULL);
+}
+
+/*
+ * The unprivileged "client" which requests suid credentials from the "server",
+ * and runs some test cases with those credentials:
+ *  - A positive test case to spawn something with euid 0
+ *  - A negative test case to check that a cred can't be used twice
+ *  - A negative test case to check that only the approved binary can be used
+ *  with the credential.
+ */
+T_HELPER_DECL(suid_cred_client_helper, "suid cred client")
+{
+       mach_port_t server_port = MACH_PORT_NULL;
+       mach_port_t client_port = MACH_PORT_NULL;
+       kern_return_t ret = KERN_FAILURE;
+       suid_cred_t sc_id = SUID_CRED_NULL;
+       suid_cred_t sc_ls = SUID_CRED_NULL;
+       mach_msg_empty_send_t smsg = {};
+       struct {
+               mach_msg_header_t          header;
+               mach_msg_body_t            body;
+               mach_msg_port_descriptor_t id_port;
+               mach_msg_port_descriptor_t ls_port;
+               mach_msg_trailer_t         trailer;
+       } rmsg = {};
+
+       uid_t euid = geteuid();
+
+       T_SETUPBEGIN;
+
+       // Make sure the effective UID is non-root.
+       if (euid == 0) {
+               ret = setuid(501);
+               T_ASSERT_POSIX_ZERO(ret, "setuid");
+       }
+
+       /*
+        * As this can race with the "server" starting, give it time to
+        * start up.
+        */
+       for (int i = 0; i < 30; i++) {
+               ret = bootstrap_look_up(bootstrap_port, server_name, &server_port);
+               if (ret != BOOTSTRAP_UNKNOWN_SERVICE) {
+                       break;
+               }
+               sleep(1);
+       }
+
+       T_QUIET; T_ASSERT_NE(server_port, MACH_PORT_NULL, NULL);
+
+       // Create a report to receive the reply on.
+       ret = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &client_port);
+       T_ASSERT_MACH_SUCCESS(ret, NULL);
+
+       T_SETUPEND;
+
+       // Request the SUID cred ports
+       smsg.header.msgh_remote_port = server_port;
+       smsg.header.msgh_local_port = client_port;
+       smsg.header.msgh_bits = MACH_MSGH_BITS_SET(MACH_MSG_TYPE_MOVE_SEND, MACH_MSG_TYPE_MAKE_SEND_ONCE, 0, 0);
+       smsg.header.msgh_size = sizeof(smsg);
+
+       ret = mach_msg_send(&smsg.header);
+       T_QUIET; T_ASSERT_MACH_SUCCESS(ret, NULL);
+
+       // Wait for the reply.
+       rmsg.header.msgh_size = sizeof(rmsg);
+       rmsg.header.msgh_local_port = client_port;
+
+       ret = mach_msg_receive(&rmsg.header);
+       T_QUIET; T_ASSERT_MACH_SUCCESS(ret, NULL);
+
+       sc_id = rmsg.id_port.name;
+       T_QUIET; T_ASSERT_NE(sc_id, SUID_CRED_NULL, NULL);
+       test_id_cred(sc_id);
+       test_id_cred_reuse(sc_id);
+
+       sc_ls = rmsg.ls_port.name;
+       T_QUIET; T_ASSERT_NE(sc_ls, SUID_CRED_NULL, NULL);
+       test_ls_cred(sc_ls);
+}
+
+T_DECL(task_create_suid_cred, "task_create_suid_cred", T_META_ASROOT(true))
+{
+       dt_helper_t helpers[] = {
+               dt_launchd_helper_domain("com.apple.xnu.test.task_create_suid_cred.plist",
+           "suid_cred_server_helper", NULL, LAUNCH_SYSTEM_DOMAIN),
+               dt_fork_helper("suid_cred_client_helper"),
+       };
+
+       dt_run_helpers(helpers, sizeof(helpers) / sizeof(helpers[0]), 60);
+}
+
+/*
+ * Creating an suid credential should fail for non-root (even if entitled).
+ */
+T_DECL(task_create_suid_cred_no_root, "task_create_suid_cred (no root)", T_META_ASROOT(true))
+{
+       kern_return_t ret = KERN_FAILURE;
+       suid_cred_t sc = SUID_CRED_NULL;
+       uid_t euid = geteuid();
+
+       // Make sure the effective UID is non-root.
+       if (euid == 0) {
+               ret = setuid(501);
+               T_QUIET; T_ASSERT_POSIX_ZERO(ret, "setuid");
+       }
+
+       ret = task_create_suid_cred(mach_task_self(), "/usr/bin/id", 0, &sc);
+       T_ASSERT_MACH_ERROR(ret, KERN_NO_ACCESS, "create a new suid cred for id (non-root)");
+}
+
+#endif /* ENTITLED */
diff --git a/tests/task_create_suid_cred_entitlement.plist b/tests/task_create_suid_cred_entitlement.plist
new file mode 100644 (file)
index 0000000..03a8326
--- /dev/null
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+
+       <key>com.apple.private.suid_cred</key>
+       <true/>
+
+</dict>
+</plist>
index ff2376e2ef5afc37a0ba1b5db59736fc555bdfd9..01067a75defb80cca64487425332a991d3751a07 100755 (executable)
@@ -110,6 +110,30 @@ def IterateLinkageChain(queue_head, element_type, field_name, field_ofst=0):
         yield obj
         link = link.next
 
+def IterateCircleQueue(queue_head, element_ptr_type, element_field_name):
+    """ iterate over a circle queue in kernel of type circle_queue_head_t. refer to osfmk/kern/circle_queue.h
+        params:
+            queue_head         - lldb.SBValue : Value object for queue_head.
+            element_type       - lldb.SBType : a pointer type of the element 'next' points to. Typically its structs like thread, task etc..
+            element_field_name - str : name of the field in target struct.
+        returns:
+            A generator does not return. It is used for iterating.
+            SBValue  : an object thats of type (element_type) queue_head->next. Always a pointer object
+    """
+    head = queue_head.head.GetSBValue()
+    queue_head_addr = 0x0
+    if head.TypeIsPointerType():
+        queue_head_addr = head.GetValueAsUnsigned()
+    else:
+        queue_head_addr = head.GetAddress().GetLoadAddress(osplugin_target_obj)
+    cur_elt = head
+    while True:
+        if not cur_elt.IsValid() or cur_elt.GetValueAsUnsigned() == 0:
+            break
+        yield containerof(value(cur_elt), element_ptr_type, element_field_name)
+        cur_elt = cur_elt.GetChildMemberWithName('next')
+        if cur_elt.GetValueAsUnsigned() == queue_head_addr:
+            break
 
 def IterateQueue(queue_head, element_ptr_type, element_field_name, backwards=False, unpack_ptr_fn=None):
     """ Iterate over an Element Chain queue in kernel of type queue_head_t. (osfmk/kern/queue.h method 2)
index c1fc18cc341788330055e5fad27eadd6751f0307..2e7e21847cbb36c249bf269d3241aff402888c2b 100755 (executable)
@@ -649,32 +649,6 @@ def IterateQueue(queue_head, element_ptr_type, element_field_name):
         yield elt
         cur_elt = elt.GetChildMemberWithName(element_field_name).GetChildMemberWithName('next')
 
-def IterateCircleQueue(queue_head, element_ptr_type, element_field_name):
-    """ iterate over a circle queue in kernel of type circle_queue_head_t. refer to osfmk/kern/circle_queue.h
-        params:
-            queue_head         - lldb.SBValue : Value object for queue_head.
-            element_type       - lldb.SBType : a pointer type of the element 'next' points to. Typically its structs like thread, task etc..
-            element_field_name - str : name of the field in target struct.
-        returns:
-            A generator does not return. It is used for iterating.
-            SBValue  : an object thats of type (element_type) queue_head->next. Always a pointer object
-    """
-    head = queue_head.head
-    queue_head_addr = 0x0
-    if head.TypeIsPointerType():
-        queue_head_addr = head.GetValueAsUnsigned()
-    else:
-        queue_head_addr = head.GetAddress().GetLoadAddress(osplugin_target_obj)
-    cur_elt = head
-    while True:
-        if not cur_elt.IsValid() or cur_elt.GetValueAsUnsigned() == 0:
-            break
-        elt = cur_elt.Cast(element_ptr_type)
-        yield elt
-        cur_elt = elt.GetChildMemberWithName(element_field_name).GetChildMemberWithName('next')
-        if cur_elt.GetValueAsUnsigned() == queue_head_addr:
-            break
-
 def GetUniqueSessionID(process_obj):
     """ Create a unique session identifier.
         params:
index 88ea13b5bbb2fa9050477de7d637602226c130bf..a1b02fcedb7dc5a13f4a4e7315c25ac002971901 100755 (executable)
@@ -500,13 +500,17 @@ def GetKObjectFromPort(portval):
         params: portval - core.value representation of 'ipc_port *' object
         returns: str - string of kobject information
     """
-    kobject_str = "{0: <#020x}".format(portval.kdata.kobject)
     io_bits = unsigned(portval.ip_object.io_bits)
-    objtype_index = io_bits & 0x7ff
+    if io_bits & 0x400 :
+        kobject_val = portval.kdata.kolabel.ikol_kobject
+    else:
+        kobject_val = portval.kdata.kobject
+    kobject_str = "{0: <#020x}".format(kobject_val)
+    objtype_index = io_bits & 0x3ff
     if objtype_index < len(xnudefines.kobject_types) :
         objtype_str = xnudefines.kobject_types[objtype_index]
         if objtype_str == 'IOKIT_OBJ':
-            iokit_classnm = GetObjectTypeStr(portval.kdata.kobject)
+            iokit_classnm = GetObjectTypeStr(kobject_val)
             if not iokit_classnm:
                 iokit_classnm = "<unknown class>"
             else:
@@ -515,7 +519,7 @@ def GetKObjectFromPort(portval):
         else:
             desc_str = "kobject({0:s})".format(objtype_str)
             if xnudefines.kobject_types[objtype_index] in ('TASK_RESUME', 'TASK'):
-                desc_str += " " + GetProcNameForTask(Cast(portval.kdata.kobject, 'task *'))
+                desc_str += " " + GetProcNameForTask(Cast(kobject_val, 'task *'))
     else:
         desc_str = "kobject(UNKNOWN) {:d}".format(objtype_index)
     return kobject_str + " " + desc_str
index 5db5554e578e4353b5e0b8cb91117f999c80b4d7..dff3736300893f825bcc8053db3306e6c06856ec 100755 (executable)
@@ -1288,6 +1288,7 @@ kThreadWaitPThreadCondVar       = 0x0e
 kThreadWaitParkedWorkQueue      = 0x0f
 kThreadWaitWorkloopSyncWait     = 0x10
 kThreadWaitOnProcess            = 0x11
+kThreadWaitCompressor           = 0x14
 
 
 UINT64_MAX = 0xffffffffffffffff
@@ -1397,6 +1398,8 @@ def formatWaitInfo(info):
             s += "waitpid, for process group %d" % abs(owner - 2**64)
         else:
             s += "waitpid, for pid %d" % owner
+    elif type == kThreadWaitCompressor:
+        s += "in compressor segment %x, busy for thread %d" % (context, owner)
 
     else:
         s += "unknown type %d (owner %d, context %x)" % (type, owner, context)
index 0708c7658e2b23f0f42725e238bbf4d9e21bcac8..983a027e110ae98915c0ac601605c0d5a76ba962 100755 (executable)
@@ -162,7 +162,7 @@ def ShowCurremtAbsTime(cmd_args=None):
 
     print "Last dispatch time known: %d MATUs" % cur_abstime
 
-bucketStr = ["", "FIXPRI (>UI)", "TIMESHARE_FG", "TIMESHARE_IN", "TIMESHARE_DF", "TIMESHARE_UT", "TIMESHARE_BG"]
+bucketStr = ["FIXPRI (>UI)", "TIMESHARE_FG", "TIMESHARE_IN", "TIMESHARE_DF", "TIMESHARE_UT", "TIMESHARE_BG"]
 
 @header("     {:>18s} | {:>20s} | {:>20s} | {:>10s} | {:>10s}".format('Thread Group', 'Interactivity Score', 'Last Timeshare Tick', 'pri_shift', 'highq'))
 def GetSchedClutchBucketSummary(clutch_bucket):
@@ -176,13 +176,15 @@ def ShowSchedClutchForPset(pset):
     print "{:>10s} | {:>20s} | {:>30s} | 0x{:16x} | {:>10d} | {:>10d} | {:>30s} | {:>30s} | {:>15s} | ".format("Root", "*", "*", addressof(root_clutch), root_clutch.scr_priority, root_clutch.scr_thr_count, "*", "*", "*")
     print "-" * 300
 
-    for i in range(1, 7):
+    for i in range(0, 6):
         root_bucket = root_clutch.scr_buckets[i]
         print "{:>10s} | {:>20s} | {:>30s} | 0x{:16x} | {:>10s} | {:>10s} | {:>30s} | {:>30s} | {:>15d} | ".format("*", bucketStr[i], "*", addressof(root_bucket), "*", "*", "*", "*", root_bucket.scrb_deadline)
-        prioq = root_bucket.scrb_clutch_buckets
+        clutch_bucket_runq = root_bucket.scrb_clutch_buckets
         clutch_bucket_list = []
-        for clutch_bucket in IteratePriorityQueue(prioq, 'struct sched_clutch_bucket', 'scb_pqlink'):
-            clutch_bucket_list.append(clutch_bucket)
+        for pri in range(0,128):
+            clutch_bucket_circleq = clutch_bucket_runq.scbrq_queues[pri]
+            for clutch_bucket in IterateCircleQueue(clutch_bucket_circleq, 'struct sched_clutch_bucket', 'scb_runqlink'):
+                clutch_bucket_list.append(clutch_bucket)
         if len(clutch_bucket_list) > 0:
             clutch_bucket_list.sort(key=lambda x: x.scb_priority, reverse=True)
             for clutch_bucket in clutch_bucket_list:
@@ -236,10 +238,12 @@ def ShowSchedClutchRootBucket(cmd_args=[]):
     print "{:<30s} : {:d}".format("Deadline", root_bucket.scrb_deadline)
     print "{:<30s} : {:d}".format("Current Timestamp", GetRecentTimestamp())
     print "\n"
-    prioq = root_bucket.scrb_clutch_buckets
+    clutch_bucket_runq = root_bucket.scrb_clutch_buckets
     clutch_bucket_list = []
-    for clutch_bucket in IteratePriorityQueue(prioq, 'struct sched_clutch_bucket', 'scb_pqlink'):
-        clutch_bucket_list.append(clutch_bucket)
+    for pri in range(0,128):
+        clutch_bucket_circleq = clutch_bucket_runq.scbrq_queues[pri]
+        for clutch_bucket in IterateCircleQueue(clutch_bucket_circleq, 'struct sched_clutch_bucket', 'scb_runqlink'):
+            clutch_bucket_list.append(clutch_bucket)
     if len(clutch_bucket_list) > 0:
         print "=" * 240
         print "{:>30s} | {:>18s} | {:>20s} | {:>20s} | ".format("Name", "Clutch Bucket", "Priority", "Count") + GetSchedClutchBucketSummary.header