]> git.saurik.com Git - apple/xnu.git/commitdiff
xnu-3248.20.55.tar.gz os-x-10112 v3248.20.55
authorApple <opensource@apple.com>
Fri, 18 Dec 2015 07:47:17 +0000 (07:47 +0000)
committerApple <opensource@apple.com>
Fri, 18 Dec 2015 07:47:17 +0000 (07:47 +0000)
150 files changed:
Makefile
bsd/dev/i386/sysctl.c
bsd/hfs/hfs_hotfiles.c
bsd/hfs/hfs_vfsops.c
bsd/hfs/hfs_vnops.c
bsd/kern/bsd_init.c
bsd/kern/kdebug.c
bsd/kern/kern_credential.c
bsd/kern/kern_cs.c
bsd/kern/kern_fork.c
bsd/kern/kern_memorystatus.c
bsd/kern/kern_prot.c
bsd/kern/kern_sig.c
bsd/kern/kern_time.c
bsd/kern/proc_info.c
bsd/kern/sys_coalition.c
bsd/kern/sys_generic.c
bsd/kern/syscalls.master
bsd/kern/trace.codes
bsd/kern/ubc_subr.c
bsd/kern/uipc_socket.c
bsd/kern/uipc_syscalls.c
bsd/man/man2/Makefile
bsd/man/man2/pselect.2 [new file with mode: 0644]
bsd/net/classq/classq_sfb.c
bsd/net/dlil.c
bsd/net/if.c
bsd/net/if.h
bsd/net/if_var.h
bsd/net/necp.c
bsd/net/ntstat.c
bsd/net/ntstat.h
bsd/net/pf_if.c
bsd/netinet/in_gif.c
bsd/netinet/in_pcb.c
bsd/netinet/in_pcb.h
bsd/netinet/in_pcblist.c
bsd/netinet/ip_ecn.c
bsd/netinet/tcp.h
bsd/netinet/tcp_cache.c
bsd/netinet/tcp_cache.h
bsd/netinet/tcp_input.c
bsd/netinet/tcp_output.c
bsd/netinet/tcp_sack.c
bsd/netinet/tcp_subr.c
bsd/netinet/tcp_timer.c
bsd/netinet/tcp_usrreq.c
bsd/netinet/tcp_var.h
bsd/netinet6/ah_input.c
bsd/netinet6/esp_input.c
bsd/netinet6/in6_gif.c
bsd/netinet6/in6_pcb.c
bsd/netinet6/ip6_output.c
bsd/netinet6/nd6.c
bsd/nfs/nfs_gss.c
bsd/nfs/nfs_syscalls.c
bsd/security/audit/audit_session.c
bsd/sys/coalition.h
bsd/sys/kdebug.h
bsd/sys/proc_info.h
bsd/sys/proc_internal.h
bsd/sys/sockio.h
bsd/sys/systm.h
bsd/vfs/vfs_cluster.c
bsd/vfs/vfs_journal.c
bsd/vfs/vfs_subr.c
bsd/vfs/vfs_syscalls.c
config/IOKit.exports
config/MasterVersion
iokit/IOKit/IOKitKeysPrivate.h
iokit/IOKit/pwr_mgt/RootDomain.h
iokit/Kernel/IONVRAM.cpp
iokit/Kernel/IOPMrootDomain.cpp
iokit/Kernel/IOService.cpp
iokit/Kernel/IOUserClient.cpp
libkern/libkern/Makefile
libkern/libkern/section_keywords.h [new file with mode: 0644]
libkern/zlib/zutil.c
libsyscall/Libsyscall.xcodeproj/project.pbxproj
libsyscall/mach/err_kern.sub
libsyscall/mach/mach/thread_state.h [new file with mode: 0644]
libsyscall/mach/tvos_prohibited_mig.txt [new file with mode: 0644]
libsyscall/mach/watchos_prohibited_mig.txt
libsyscall/wrappers/cancelable/pselect-darwinext-cancel.c [new file with mode: 0644]
libsyscall/wrappers/cancelable/pselect-darwinext.c [new file with mode: 0644]
libsyscall/wrappers/select-base.c
libsyscall/wrappers/string/memset.c
libsyscall/wrappers/thread_register_state.c [new file with mode: 0644]
libsyscall/xcodescripts/mach_install_mig.sh
osfmk/i386/AT386/model_dep.c
osfmk/i386/i386_init.c
osfmk/i386/mp.c
osfmk/i386/pmap_x86_common.c
osfmk/i386/proc_reg.h
osfmk/ipc/ipc_importance.c
osfmk/ipc/ipc_importance.h
osfmk/ipc/ipc_kmsg.c
osfmk/ipc/ipc_port.c
osfmk/ipc/ipc_port.h
osfmk/ipc/ipc_right.c
osfmk/ipc/ipc_voucher.c
osfmk/kern/coalition.c
osfmk/kern/debug.c
osfmk/kern/ipc_kobject.c
osfmk/kern/kpc_common.c
osfmk/kern/machine.c
osfmk/kern/sched_prim.c
osfmk/kern/startup.c
osfmk/kern/sync_sema.c
osfmk/kern/sync_sema.h
osfmk/kern/task.c
osfmk/kern/task.h
osfmk/kern/thread.c
osfmk/kern/thread_call.c
osfmk/kern/timer_call.c
osfmk/kern/waitq.c
osfmk/kern/zalloc.c
osfmk/mach/coalition.h
osfmk/mach/kern_return.h
osfmk/mach/machine.h
osfmk/mach/thread_info.h
osfmk/mach/vm_param.h
osfmk/mach_debug/mach_debug_types.defs
osfmk/vm/bsd_vm.c
osfmk/vm/memory_object.c
osfmk/vm/pmap.h
osfmk/vm/vm_compressor.c
osfmk/vm/vm_fault.c
osfmk/vm/vm_kern.c
osfmk/vm/vm_map.c
osfmk/vm/vm_map.h
osfmk/vm/vm_object.c
osfmk/vm/vm_pageout.c
osfmk/vm/vm_pageout.h
osfmk/vm/vm_purgeable.c
osfmk/vm/vm_purgeable_internal.h
osfmk/vm/vm_resident.c
osfmk/vm/vm_user.c
osfmk/x86_64/idt64.s
osfmk/x86_64/pmap.c
security/mac_base.c
tools/lldbmacros/ioreg.py
tools/lldbmacros/kcdata.py
tools/lldbmacros/memory.py
tools/lldbmacros/process.py
tools/tests/MPMMTest/KQMPMMtest.c
tools/tests/MPMMTest/MPMMtest.c
tools/tests/TLBcoherency/Makefile [new file with mode: 0644]
tools/tests/TLBcoherency/TLBcoherency.c [new file with mode: 0644]
tools/tests/execperf/test.sh

index 7fbedaceba696afbb7d5181e920fd920459ad773..2a4b9ab192aa678744bbf669dd714d0af2561b53 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -100,12 +100,25 @@ installsrc:
        pax -rw . $(SRCROOT)
 
 else ifeq ($(RC_ProjectName),xnu_quick_test)
+# This rule should be removed once rdar://22820602 is complete.
+default: install
+
+installhdrs:
+
+install: xnu_tests
+
+clean:
+
+installsrc:
+       pax -rw . $(SRCROOT)
+
+else ifeq ($(RC_ProjectName),xnu_tests)
 
 default: install
 
 installhdrs:
 
-install: xnu_quick_test
+install: xnu_tests
 
 clean:
 
@@ -237,11 +250,11 @@ installhdrs_libkdd install_libkdd:
                        "SDKROOT=$(SDKROOT)"
 
 
-# "xnu_quick_test" and "testbots" are targets that can be invoked via a standalone
-# "make xnu_quick_test" or via buildit/XBS with the RC_ProjectName=xnu_quick_test.
+# "xnu_tests" and "testbots" are targets that can be invoked via a standalone
+# "make xnu_tests" or via buildit/XBS with the RC_ProjectName=xnu_tests.
 # Define the target here in the outermost scope of the initial Makefile
 
-xnu_quick_test:
+xnu_tests xnu_quick_test:
        $(MAKE) -C $(SRCROOT)/tools/tests                                       \
                SRCROOT=$(SRCROOT)/tools/tests
 
index 06e9389757da24585bcbba042410908fb197341f..2e4672f23e3b651bf3427b0036b3973fc75b2d21 100644 (file)
@@ -32,6 +32,7 @@
 #include <sys/sysctl.h>
 #include <i386/cpuid.h>
 #include <i386/tsc.h>
+#include <i386/rtclock_protos.h>
 #include <i386/machine_routines.h>
 #include <i386/pal_routines.h>
 #include <i386/ucode.h>
@@ -758,7 +759,11 @@ SYSCTL_QUAD(_machdep_tsc, OID_AUTO, frequency,
 
 extern uint32_t deep_idle_rebase;
 SYSCTL_UINT(_machdep_tsc, OID_AUTO, deep_idle_rebase,
-       CTLFLAG_RW|CTLFLAG_KERN|CTLFLAG_LOCKED, &deep_idle_rebase, 0, "");
+       CTLFLAG_RD|CTLFLAG_LOCKED, &deep_idle_rebase, 0, "");
+SYSCTL_QUAD(_machdep_tsc, OID_AUTO, at_boot,
+       CTLFLAG_RD|CTLFLAG_LOCKED, &tsc_at_boot, "");
+SYSCTL_QUAD(_machdep_tsc, OID_AUTO, rebase_abs_time,
+       CTLFLAG_RD|CTLFLAG_LOCKED, &tsc_rebase_abs_time, "");
 
 SYSCTL_NODE(_machdep_tsc, OID_AUTO, nanotime,
        CTLFLAG_RD|CTLFLAG_LOCKED, NULL, "TSC to ns conversion");
index 143bc1983e5479e7f519cddaf532027c2ab27da5..b6fa4a276c5d2a9e30db93e1d1130ea380d953e4 100644 (file)
@@ -1709,8 +1709,10 @@ hfs_recording_init(struct hfsmount *hfsmp)
                }
                cnid = filep->fileID;
 
-               /* Skip over journal files. */
-               if (cnid == hfsmp->hfs_jnlfileid || cnid == hfsmp->hfs_jnlinfoblkid) {
+               /* Skip over journal files and the hotfiles B-Tree file. */
+               if (cnid == hfsmp->hfs_jnlfileid
+                       || cnid == hfsmp->hfs_jnlinfoblkid
+                       || cnid == VTOC(hfsmp->hfc_filevp)->c_fileid) {
                        continue;
                }
                /*
@@ -2865,6 +2867,15 @@ hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx)
                        stage = HFC_ADOPTION;
                        break;
                }
+
+               // Jump straight to delete for some files...
+               if (key->fileID == VTOC(hfsmp->hfc_filevp)->c_fileid
+                       || key->fileID == hfsmp->hfs_jnlfileid
+                       || key->fileID == hfsmp->hfs_jnlinfoblkid
+                       || key->fileID < kHFSFirstUserCatalogNodeID) {
+                       goto delete;
+               }
+
                /*
                 * Aquire the vnode for this file.
                 */
index a819362bb5eca73af012c12804f1fa6e8e9fee13..71380f628e90bfda06c702c426d860d86129c8a7 100644 (file)
@@ -2847,6 +2847,26 @@ hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp)
 // XXXdbg
 #include <sys/filedesc.h>
 
+static hfsmount_t *hfs_mount_from_cwd(vfs_context_t ctx)
+{
+       vnode_t vp = vfs_context_cwd(ctx);
+
+       if (!vp)
+               return NULL;
+
+       /*
+        * We could use vnode_tag, but it is probably more future proof to
+        * compare fstypename.
+        */
+       char fstypename[MFSNAMELEN];
+       vnode_vfsname(vp, fstypename);
+
+       if (strcmp(fstypename, "hfs"))
+               return NULL;
+
+       return VTOHFS(vp);
+}
+
 /*
  * HFS filesystem related variables.
  */
@@ -2930,7 +2950,6 @@ encodinghint_exit:
 
        } else if (name[0] == HFS_ENABLE_JOURNALING) {
                // make the file system journaled...
-               vnode_t vp = vfs_context_cwd(context);
                vnode_t jvp;
                ExtendedVCB *vcb;
                struct cat_attr jnl_attr;
@@ -2952,10 +2971,11 @@ encodinghint_exit:
                if (!kauth_cred_issuser(kauth_cred_get())) {
                        return (EPERM);
                }
-               if (vp == NULLVP)
-                       return EINVAL;
 
-               hfsmp = VTOHFS(vp);
+               hfsmp = hfs_mount_from_cwd(context);
+               if (!hfsmp)
+                       return EINVAL;
+
                if (hfsmp->hfs_flags & HFS_READ_ONLY) {
                        return EROFS;
                }
@@ -2965,7 +2985,7 @@ encodinghint_exit:
                }
 
                if (hfsmp->jnl) {
-                   printf("hfs: volume @ mp %p is already journaled!\n", vnode_mount(vp));
+                   printf("hfs: volume %s is already journaled!\n", hfsmp->vcbVN);
                    return EAGAIN;
                }
                vcb = HFSTOVCB(hfsmp);
@@ -3145,16 +3165,15 @@ encodinghint_exit:
                return 0;
        } else if (name[0] == HFS_DISABLE_JOURNALING) {
                // clear the journaling bit 
-               vnode_t vp = vfs_context_cwd(context);
-               
+
                /* Only root can disable journaling */
                if (!kauth_cred_issuser(kauth_cred_get())) {
                        return (EPERM);
                }
-               if (vp == NULLVP)
-                       return EINVAL;
 
-               hfsmp = VTOHFS(vp);
+               hfsmp = hfs_mount_from_cwd(context);
+               if (!hfsmp)
+                       return EINVAL;
 
                /* 
                 * Disabling journaling is disallowed on volumes with directory hard links
@@ -3165,7 +3184,7 @@ encodinghint_exit:
                        return EPERM;
                }
 
-               printf("hfs: disabling journaling for mount @ %p\n", vnode_mount(vp));
+               printf("hfs: disabling journaling for %s\n", hfsmp->vcbVN);
 
                hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
 
@@ -3197,34 +3216,6 @@ encodinghint_exit:
                        fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
                        vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
                }
-               return 0;
-       } else if (name[0] == HFS_GET_JOURNAL_INFO) {
-               vnode_t vp = vfs_context_cwd(context);
-               off_t jnl_start, jnl_size;
-
-               if (vp == NULLVP)
-                       return EINVAL;
-
-               /* 64-bit processes won't work with this sysctl -- can't fit a pointer into an int! */
-               if (proc_is64bit(current_proc()))
-                       return EINVAL;
-
-               hfsmp = VTOHFS(vp);
-           if (hfsmp->jnl == NULL) {
-                       jnl_start = 0;
-                       jnl_size  = 0;
-           } else {
-                       jnl_start = hfs_blk_to_bytes(hfsmp->jnl_start, HFSTOVCB(hfsmp)->blockSize) + HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
-                       jnl_size  = hfsmp->jnl_size;
-           }
-
-           if ((error = copyout((caddr_t)&jnl_start, CAST_USER_ADDR_T(name[1]), sizeof(off_t))) != 0) {
-                       return error;
-               }
-           if ((error = copyout((caddr_t)&jnl_size, CAST_USER_ADDR_T(name[2]), sizeof(off_t))) != 0) {
-                       return error;
-               }
-
                return 0;
        } else if (name[0] == HFS_SET_PKG_EXTENSIONS) {
 
index dac4b088f814fb1ae4b13f377c2df5bfcd647994..a198b651ed5457e8651d00aaaf84d248d918ba27 100644 (file)
@@ -2823,8 +2823,9 @@ int hfs_move_data(cnode_t *from_cp, cnode_t *to_cp,
                // Update to_cp's resource data if it has it
                filefork_t *to_rfork = to_cp->c_rsrcfork;
                if (to_rfork) {
-                       to_rfork->ff_invalidranges = from_rfork->ff_invalidranges;
-                       to_rfork->ff_data                  = from_rfork->ff_data;
+                       TAILQ_SWAP(&to_rfork->ff_invalidranges,
+                                          &from_rfork->ff_invalidranges, rl_entry, rl_link);
+                       to_rfork->ff_data = from_rfork->ff_data;
 
                        // Deal with ubc_setsize
                        hfs_rsrc_setsize(to_cp);
index d9b90aff2a4991dd1dcecc0310cadf3b853f8ac3..4f871df3293a65ac1eefa69b915302a421bf03d0 100644 (file)
@@ -367,6 +367,7 @@ extern int  (*mountroot)(void);
 lck_grp_t * proc_lck_grp;
 lck_grp_t * proc_slock_grp;
 lck_grp_t * proc_fdmlock_grp;
+lck_grp_t * proc_ucred_mlock_grp;
 lck_grp_t * proc_mlock_grp;
 lck_grp_attr_t * proc_lck_grp_attr;
 lck_attr_t * proc_lck_attr;
@@ -452,6 +453,7 @@ bsd_init(void)
 #if CONFIG_FINE_LOCK_GROUPS
        proc_slock_grp = lck_grp_alloc_init("proc-slock",  proc_lck_grp_attr);
        proc_fdmlock_grp = lck_grp_alloc_init("proc-fdmlock",  proc_lck_grp_attr);
+       proc_ucred_mlock_grp = lck_grp_alloc_init("proc-ucred-mlock",  proc_lck_grp_attr);
        proc_mlock_grp = lck_grp_alloc_init("proc-mlock",  proc_lck_grp_attr);
 #endif
        /* Allocate proc lock attribute */
@@ -467,12 +469,14 @@ bsd_init(void)
        proc_klist_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr);
        lck_mtx_init(&kernproc->p_mlock, proc_mlock_grp, proc_lck_attr);
        lck_mtx_init(&kernproc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr);
+       lck_mtx_init(&kernproc->p_ucred_mlock, proc_ucred_mlock_grp, proc_lck_attr);
        lck_spin_init(&kernproc->p_slock, proc_slock_grp, proc_lck_attr);
 #else
        proc_list_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
        proc_klist_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
        lck_mtx_init(&kernproc->p_mlock, proc_lck_grp, proc_lck_attr);
        lck_mtx_init(&kernproc->p_fdmlock, proc_lck_grp, proc_lck_attr);
+       lck_mtx_init(&kernproc->p_ucred_mlock, proc_lck_grp, proc_lck_attr);
        lck_spin_init(&kernproc->p_slock, proc_lck_grp, proc_lck_attr);
 #endif
 
index 27ad69aa124ec8db87621a4548411e6b707dfb63..505fbf81da0e8a55c0bd5c0c62b206cadbaa1664 100644 (file)
@@ -134,7 +134,6 @@ int kdbg_readcurcpumap(user_addr_t, size_t *);
 int kdbg_readthrmap(user_addr_t, size_t *, vnode_t, vfs_context_t);
 int kdbg_readthrmap_v3(user_addr_t, size_t *, int);
 int kdbg_readcurthrmap(user_addr_t, size_t *);
-int kdbg_getreg(kd_regtype *);
 int kdbg_setreg(kd_regtype *);
 int kdbg_setrtcdec(kd_regtype *);
 int kdbg_setpidex(kd_regtype *);
@@ -2290,50 +2289,6 @@ kdbg_setreg(kd_regtype * kdr)
        return(ret);
 }
 
-int
-kdbg_getreg(__unused kd_regtype * kdr)
-{
-#if 0  
-       int i,j, ret=0;
-       unsigned int val_1, val_2, val;
-
-       switch (kdr->type) {
-       case KDBG_CLASSTYPE :
-               val_1 = (kdr->value1 & 0xff);
-               val_2 = val_1 + 1;
-               kdlog_beg = (val_1<<24);
-               kdlog_end = (val_2<<24);
-               kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
-               kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_CLASSTYPE);
-               break;
-       case KDBG_SUBCLSTYPE :
-               val_1 = (kdr->value1 & 0xff);
-               val_2 = (kdr->value2 & 0xff);
-               val = val_2 + 1;
-               kdlog_beg = ((val_1<<24) | (val_2 << 16));
-               kdlog_end = ((val_1<<24) | (val << 16));
-               kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
-               kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_SUBCLSTYPE);
-               break;
-       case KDBG_RANGETYPE :
-               kdlog_beg = (kdr->value1);
-               kdlog_end = (kdr->value2);
-               kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
-               kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_RANGETYPE);
-               break;
-       case KDBG_TYPENONE :
-               kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
-               kdlog_beg = 0;
-               kdlog_end = 0;
-               break;
-       default :
-               ret = EINVAL;
-               break;
-       }
-#endif /* 0 */
-       return(EINVAL);
-}
-
 static int
 kdbg_write_to_vnode(caddr_t buffer, size_t size, vnode_t vp, vfs_context_t ctx, off_t file_offset)
 {
@@ -3206,16 +3161,8 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
                        ret = kdbg_setreg(&kd_Reg);
                        break;
                case KERN_KDGETREG:
-                       if (size < sizeof(kd_regtype)) {
-                               ret = EINVAL;
-                               break;
-                       }
-                       ret = kdbg_getreg(&kd_Reg);
-                       if (copyout(&kd_Reg, where, sizeof(kd_regtype))) {
-                               ret = EINVAL;
-                       }
                        kdbg_disable_bg_trace();
-
+                       ret = EINVAL;
                        break;
                case KERN_KDREADTR:
                        ret = kdbg_read(where, sizep, NULL, NULL, RAW_VERSION1);
index fbbfb752a590d9f4abfe6d2a5f134edb8076e1f7..0d2a07e02636ba788543ca7d68d4ca45f96444ac 100644 (file)
@@ -3598,7 +3598,7 @@ kauth_cred_get_with_ref(void)
  * Returns:    (kauth_cred_t)                  Pointer to the process's
  *                                             newly referenced credential
  *
- * Locks:      PROC_LOCK is held before taking the reference and released
+ * Locks:      PROC_UCRED_LOCK is held before taking the reference and released
  *             after the refeence is taken to protect the p_ucred field of
  *             the process referred to by procp.
  *
@@ -3620,10 +3620,10 @@ kauth_cred_proc_ref(proc_t procp)
 {
        kauth_cred_t    cred;
        
-       proc_lock(procp);
+       proc_ucred_lock(procp);
        cred = proc_ucred(procp);
        kauth_cred_ref(cred);
-       proc_unlock(procp);
+       proc_ucred_unlock(procp);
        return(cred);
 }
 
@@ -4456,7 +4456,7 @@ int kauth_proc_label_update(struct proc *p, struct label *label)
 
                        DEBUG_CRED_CHANGE("kauth_proc_setlabel_unlocked CH(%d): %p/0x%08x -> %p/0x%08x\n", p->p_pid, my_cred, my_cred->cr_flags, my_new_cred, my_new_cred->cr_flags);
 
-                       proc_lock(p);
+                       proc_ucred_lock(p);
                        /*
                         * We need to protect for a race where another thread
                         * also changed the credential after we took our
@@ -4464,7 +4464,7 @@ int kauth_proc_label_update(struct proc *p, struct label *label)
                         * restart this again with the new cred.
                         */
                        if (p->p_ucred != my_cred) {
-                               proc_unlock(p);
+                               proc_ucred_unlock(p);
                                kauth_cred_unref(&my_new_cred);
                                my_cred = kauth_cred_proc_ref(p);
                                /* try again */
@@ -4475,7 +4475,7 @@ int kauth_proc_label_update(struct proc *p, struct label *label)
                        PROC_UPDATE_CREDS_ONPROC(p);
 
                        mac_proc_set_enforce(p, MAC_ALL_ENFORCE);
-                       proc_unlock(p);
+                       proc_ucred_unlock(p);
                }
                break;
        }
@@ -4536,7 +4536,7 @@ kauth_proc_label_update_execve(struct proc *p, vfs_context_t ctx,
 
                        DEBUG_CRED_CHANGE("kauth_proc_label_update_execve_unlocked CH(%d): %p/0x%08x -> %p/0x%08x\n", p->p_pid, my_cred, my_cred->cr_flags, my_new_cred, my_new_cred->cr_flags);
 
-                       proc_lock(p);
+                       proc_ucred_lock(p);
                        /*
                         * We need to protect for a race where another thread
                         * also changed the credential after we took our
@@ -4544,7 +4544,7 @@ kauth_proc_label_update_execve(struct proc *p, vfs_context_t ctx,
                         * restart this again with the new cred.
                         */
                        if (p->p_ucred != my_cred) {
-                               proc_unlock(p);
+                               proc_ucred_unlock(p);
                                kauth_cred_unref(&my_new_cred);
                                my_cred = kauth_cred_proc_ref(p);
                                /* try again */
@@ -4554,7 +4554,7 @@ kauth_proc_label_update_execve(struct proc *p, vfs_context_t ctx,
                        /* update cred on proc */
                        PROC_UPDATE_CREDS_ONPROC(p);
                        mac_proc_set_enforce(p, MAC_ALL_ENFORCE);
-                       proc_unlock(p);
+                       proc_ucred_unlock(p);
                }
                break;
        }
index 1459a472f2c710efb0d27b3158c105594452ff02..fb3a2012cc90b9fe60c8f0dc7bab43542f37cb06 100644 (file)
@@ -71,6 +71,8 @@
 
 #include <mach/shared_region.h>
 
+#include <libkern/section_keywords.h>
+
 unsigned long cs_procs_killed = 0;
 unsigned long cs_procs_invalidated = 0;
 
@@ -84,16 +86,18 @@ const int cs_library_val_enable = 1;
 int cs_enforcement_panic=0;
 
 #if CONFIG_ENFORCE_SIGNED_CODE
-int cs_enforcement_enable = 1;
+#define DEFAULT_CS_ENFORCEMENT_ENABLE 1
 #else
-int cs_enforcement_enable = 0;
+#define DEFAULT_CS_ENFORCEMENT_ENABLE 0
 #endif
+SECURITY_READ_ONLY_LATE(int) cs_enforcement_enable = DEFAULT_CS_ENFORCEMENT_ENABLE;
 
 #if CONFIG_ENFORCE_LIBRARY_VALIDATION
-int cs_library_val_enable = 1;
+#define DEFAULT_CS_LIBRARY_VA_ENABLE 1
 #else
-int cs_library_val_enable = 0;
+#define DEFAULT_CS_LIBRARY_VA_ENABLE 0
 #endif
+SECURITY_READ_ONLY_LATE(int) cs_library_val_enable = DEFAULT_CS_LIBRARY_VA_ENABLE;
 
 #endif /* !SECURE_KERNEL */
 int cs_all_vnodes = 0;
index ff5d6dda6d028c0a1acb6ded6cef2c03d34933ba..7b3e2440e5900b36c05b8ea0cddb357008700b66 100644 (file)
@@ -1255,6 +1255,7 @@ retry:
 #if CONFIG_FINE_LOCK_GROUPS
        lck_mtx_init(&child_proc->p_mlock, proc_mlock_grp, proc_lck_attr);
        lck_mtx_init(&child_proc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr);
+       lck_mtx_init(&child_proc->p_ucred_mlock, proc_ucred_mlock_grp, proc_lck_attr);
 #if CONFIG_DTRACE
        lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr);
 #endif
@@ -1262,6 +1263,7 @@ retry:
 #else /* !CONFIG_FINE_LOCK_GROUPS */
        lck_mtx_init(&child_proc->p_mlock, proc_lck_grp, proc_lck_attr);
        lck_mtx_init(&child_proc->p_fdmlock, proc_lck_grp, proc_lck_attr);
+       lck_mtx_init(&child_proc->p_ucred_mlock, proc_lck_grp, proc_lck_attr);
 #if CONFIG_DTRACE
        lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr);
 #endif
@@ -1391,6 +1393,7 @@ bad:
 void
 proc_lock(proc_t p)
 {
+       lck_mtx_assert(proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
        lck_mtx_lock(&p->p_mlock);
 }
 
@@ -1424,6 +1427,18 @@ proc_list_unlock(void)
        lck_mtx_unlock(proc_list_mlock);
 }
 
+void 
+proc_ucred_lock(proc_t p)
+{
+       lck_mtx_lock(&p->p_ucred_mlock);
+}
+
+void 
+proc_ucred_unlock(proc_t p)
+{
+       lck_mtx_unlock(&p->p_ucred_mlock);
+}
+
 #include <kern/zalloc.h>
 
 struct zone    *uthread_zone;
@@ -1555,6 +1570,12 @@ uthread_cleanup(task_t task, void *uthread, void * bsd_info, boolean_t is_corpse
        uthread_t uth = (uthread_t)uthread;
        proc_t p = (proc_t)bsd_info;
 
+#if PROC_REF_DEBUG
+       if (__improbable(uthread_get_proc_refcount(uthread) != 0)) {
+               panic("uthread_cleanup called for uthread %p with uu_proc_refcount != 0", uthread);
+       }
+#endif
+
        if (uth->uu_lowpri_window || uth->uu_throttle_info) {
                /*
                 * task is marked as a low priority I/O type
index 13dcc2607c11dc729cd648d1e99bd961711a270b..97296a8a87a2327e7680f5c2af7168ab3a9e17ab 100644 (file)
@@ -3093,24 +3093,34 @@ memorystatus_kill_specific_process(pid_t victim_pid, uint32_t cause) {
        proc_t p;
 
        /* TODO - add a victim queue and push this into the main jetsam thread */
-
        p = proc_find(victim_pid);
        if (!p) {
                return FALSE;
        }
 
-       printf("memorystatus: specifically killing pid %d [%s] (%s %d) - memorystatus_available_pages: %d\n", 
-               victim_pid, (p->p_comm ? p->p_comm : "(unknown)"),
-              jetsam_kill_cause_name[cause], p->p_memstat_effectivepriority, memorystatus_available_pages);
-
        proc_list_lock();
 
+       if ((p->p_memstat_state & P_MEMSTAT_TERMINATED) ||
+               (p->p_listflag & P_LIST_EXITED) ||
+               (p->p_memstat_state & P_MEMSTAT_ERROR)) {
+               proc_list_unlock();
+               proc_rele(p);
+               return FALSE;
+       }
+
+       p->p_memstat_state |= P_MEMSTAT_TERMINATED;
+
        if (memorystatus_jetsam_snapshot_count == 0) {
                memorystatus_init_jetsam_snapshot_locked(NULL,0);
        }
 
        memorystatus_update_jetsam_snapshot_entry_locked(p, cause);
        proc_list_unlock();
+
+       printf("memorystatus: specifically killing pid %d [%s] (%s %d) - memorystatus_available_pages: %d\n",
+               victim_pid, (p->p_comm ? p->p_comm : "(unknown)"),
+              jetsam_kill_cause_name[cause], p->p_memstat_effectivepriority, memorystatus_available_pages);
+
        
        killed = memorystatus_do_kill(p, cause);
        proc_rele(p);
index 5df82a23f0fecd9416231a961d737d78c7c0fcb0..e90c68c55b80b7109f686800d1ca750f684c7ced 100644 (file)
@@ -782,7 +782,7 @@ setuid(proc_t p, struct setuid_args *uap, __unused int32_t *retval)
                                (void)chgproccnt(ruid, 1);
                        }
 
-                       proc_lock(p);
+                       proc_ucred_lock(p);
                        /*
                         * We need to protect for a race where another thread
                         * also changed the credential after we took our
@@ -792,7 +792,7 @@ setuid(proc_t p, struct setuid_args *uap, __unused int32_t *retval)
                         * Note: the kauth_cred_setresuid has consumed a reference to my_cred, it p_ucred != my_cred, then my_cred must not be dereferenced!
                         */
                        if (p->p_ucred != my_cred) {
-                               proc_unlock(p);
+                               proc_ucred_unlock(p);
                                /*
                                 * We didn't successfully switch to the new ruid, so decrement
                                 * the procs/uid count that we incremented above.
@@ -811,7 +811,7 @@ setuid(proc_t p, struct setuid_args *uap, __unused int32_t *retval)
                        PROC_UPDATE_CREDS_ONPROC(p);
 
                        OSBitOrAtomic(P_SUGID, &p->p_flag);
-                       proc_unlock(p);
+                       proc_ucred_unlock(p);
                        /*
                         * If we've updated the ruid, decrement the count of procs running
                         * under the previous ruid
@@ -885,7 +885,7 @@ seteuid(proc_t p, struct seteuid_args *uap, __unused int32_t *retval)
 
                        DEBUG_CRED_CHANGE("seteuid CH(%d): %p/0x%08x -> %p/0x%08x\n", p->p_pid, my_cred, my_pcred->cr_flags, my_new_cred, posix_cred_get(my_new_cred)->cr_flags);
 
-                       proc_lock(p);
+                       proc_ucred_lock(p);
                        /*
                         * We need to protect for a race where another thread
                         * also changed the credential after we took our
@@ -893,7 +893,7 @@ seteuid(proc_t p, struct seteuid_args *uap, __unused int32_t *retval)
                         * should restart this again with the new cred.
                         */
                        if (p->p_ucred != my_cred) {
-                               proc_unlock(p);
+                               proc_ucred_unlock(p);
                                kauth_cred_unref(&my_new_cred);
                                my_cred = kauth_cred_proc_ref(p);
                                my_pcred = posix_cred_get(my_cred);
@@ -904,7 +904,7 @@ seteuid(proc_t p, struct seteuid_args *uap, __unused int32_t *retval)
                        /* update cred on proc */
                        PROC_UPDATE_CREDS_ONPROC(p);
                        OSBitOrAtomic(P_SUGID, &p->p_flag);
-                       proc_unlock(p);
+                       proc_ucred_unlock(p);
                }
                break;
        }
@@ -1030,7 +1030,7 @@ setreuid(proc_t p, struct setreuid_args *uap, __unused int32_t *retval)
                                (void)chgproccnt(ruid, 1);
                        }
 
-                       proc_lock(p);
+                       proc_ucred_lock(p);
                        /*
                         * We need to protect for a race where another thread
                         * also changed the credential after we took our
@@ -1040,7 +1040,7 @@ setreuid(proc_t p, struct setreuid_args *uap, __unused int32_t *retval)
                         * Note: the kauth_cred_setresuid has consumed a reference to my_cred, it p_ucred != my_cred, then my_cred must not be dereferenced!
                         */
                        if (p->p_ucred != my_cred) {
-                               proc_unlock(p);
+                               proc_ucred_unlock(p);
                                if (ruid != KAUTH_UID_NONE && chgproccnt_ok(p)) {
                                        /*
                                         * We didn't successfully switch to the new ruid, so decrement
@@ -1059,7 +1059,7 @@ setreuid(proc_t p, struct setreuid_args *uap, __unused int32_t *retval)
                        /* update cred on proc */
                        PROC_UPDATE_CREDS_ONPROC(p);
                        OSBitOrAtomic(P_SUGID, &p->p_flag);
-                       proc_unlock(p);
+                       proc_ucred_unlock(p);
 
                        if (ruid != KAUTH_UID_NONE && chgproccnt_ok(p)) {
                                /*
@@ -1155,7 +1155,7 @@ setgid(proc_t p, struct setgid_args *uap, __unused int32_t *retval)
 
                        DEBUG_CRED_CHANGE("setgid(CH)%d: %p/0x%08x->%p/0x%08x\n", p->p_pid, my_cred, my_cred->cr_flags, my_new_cred, my_new_cred->cr_flags);
 
-                       proc_lock(p);
+                       proc_ucred_lock(p);
                        /*
                         * We need to protect for a race where another thread
                         * also changed the credential after we took our
@@ -1163,7 +1163,7 @@ setgid(proc_t p, struct setgid_args *uap, __unused int32_t *retval)
                         * should restart this again with the new cred.
                         */
                        if (p->p_ucred != my_cred) {
-                               proc_unlock(p);
+                               proc_ucred_unlock(p);
                                kauth_cred_unref(&my_new_cred);
                                /* try again */
                                my_cred = kauth_cred_proc_ref(p);
@@ -1174,7 +1174,7 @@ setgid(proc_t p, struct setgid_args *uap, __unused int32_t *retval)
                        /* update cred on proc */
                        PROC_UPDATE_CREDS_ONPROC(p);
                        OSBitOrAtomic(P_SUGID, &p->p_flag);
-                       proc_unlock(p);
+                       proc_ucred_unlock(p);
                }
                break;
        }
@@ -1246,7 +1246,7 @@ setegid(proc_t p, struct setegid_args *uap, __unused int32_t *retval)
 
                        DEBUG_CRED_CHANGE("setegid(CH)%d: %p/0x%08x->%p/0x%08x\n", p->p_pid, my_cred, my_pcred->cr_flags, my_new_cred, posix_cred_get(my_new_cred)->cr_flags);
 
-                       proc_lock(p);
+                       proc_ucred_lock(p);
                        /*
                         * We need to protect for a race where another thread
                         * also changed the credential after we took our
@@ -1254,7 +1254,7 @@ setegid(proc_t p, struct setegid_args *uap, __unused int32_t *retval)
                         * should restart this again with the new cred.
                         */
                        if (p->p_ucred != my_cred) {
-                               proc_unlock(p);
+                               proc_ucred_unlock(p);
                                kauth_cred_unref(&my_new_cred);
                                /* try again */
                                my_cred = kauth_cred_proc_ref(p);
@@ -1265,7 +1265,7 @@ setegid(proc_t p, struct setegid_args *uap, __unused int32_t *retval)
                        /* update cred on proc */
                        PROC_UPDATE_CREDS_ONPROC(p);
                        OSBitOrAtomic(P_SUGID, &p->p_flag);
-                       proc_unlock(p);
+                       proc_ucred_unlock(p);
                }
                break;
        }
@@ -1393,14 +1393,14 @@ setregid(proc_t p, struct setregid_args *uap, __unused int32_t *retval)
 
                        DEBUG_CRED_CHANGE("setregid(CH)%d: %p/0x%08x->%p/0x%08x\n", p->p_pid, my_cred, my_pcred->cr_flags, my_new_cred, posix_cred_get(my_new_cred)->cr_flags);
 
-                       proc_lock(p);
+                       proc_ucred_lock(p);
                        /* need to protect for a race where another thread
                         * also changed the credential after we took our
                         * reference.  If p_ucred has changed then we
                         * should restart this again with the new cred.
                         */
                        if (p->p_ucred != my_cred) {
-                               proc_unlock(p);
+                               proc_ucred_unlock(p);
                                kauth_cred_unref(&my_new_cred);
                                /* try again */
                                my_cred = kauth_cred_proc_ref(p);
@@ -1411,7 +1411,7 @@ setregid(proc_t p, struct setregid_args *uap, __unused int32_t *retval)
                        /* update cred on proc */
                        PROC_UPDATE_CREDS_ONPROC(p);
                        OSBitOrAtomic(P_SUGID, &p->p_flag); /* XXX redundant? */
-                       proc_unlock(p);
+                       proc_ucred_unlock(p);
                }
                break;
        }
@@ -1698,7 +1698,7 @@ setgroups1(proc_t p, u_int gidsetsize, user_addr_t gidset, uid_t gmuid, __unused
 
                                DEBUG_CRED_CHANGE("setgroups1(CH)%d: %p/0x%08x->%p/0x%08x\n", p->p_pid, my_cred, my_cred->cr_flags, my_new_cred, my_new_cred->cr_flags);
 
-                               proc_lock(p);
+                               proc_ucred_lock(p);
                                /*
                                 * We need to protect for a race where another
                                 * thread also changed the credential after we
@@ -1707,7 +1707,7 @@ setgroups1(proc_t p, u_int gidsetsize, user_addr_t gidset, uid_t gmuid, __unused
                                 * with the new cred.
                                 */
                                if (p->p_ucred != my_cred) {
-                                       proc_unlock(p);
+                                       proc_ucred_unlock(p);
                                        kauth_cred_unref(&my_new_cred);
                                        my_cred = kauth_cred_proc_ref(p);
                                        /* try again */
@@ -1717,7 +1717,7 @@ setgroups1(proc_t p, u_int gidsetsize, user_addr_t gidset, uid_t gmuid, __unused
                                /* update cred on proc */
                                PROC_UPDATE_CREDS_ONPROC(p);
                                OSBitOrAtomic(P_SUGID, &p->p_flag);
-                               proc_unlock(p);
+                               proc_ucred_unlock(p);
                        }
                        break;
                }
index 5f3e5960cbae0ad29d903cc144798e04ae3c7ba3..bf55079038d1a66c35461320378ff4ca205fad32 100644 (file)
@@ -367,7 +367,7 @@ cansignal(proc_t p, kauth_cred_t uc, proc_t q, int signum, int zombie)
  */
 unsigned sigrestrict_arg = 0;
 
-#if PLATFORM_WatchOS || PLATFORM_AppleTVOS
+#if PLATFORM_WatchOS
 static int
 sigrestrictmask(void)
 {
@@ -400,7 +400,7 @@ signal_is_restricted(proc_t p, int signum)
        (void)signum;
        return 0;
 }
-#endif /* !(PLATFORM_WatchOS || PLATFORM_AppleTVOS) */
+#endif /* !PLATFORM_WatchOS */
 
 /*
  * Returns:    0                       Success
@@ -430,12 +430,29 @@ sigaction(proc_t p, struct sigaction_args *uap, __unused int32_t *retval)
                        signum == SIGKILL || signum == SIGSTOP)
                return (EINVAL);
 
-       if ((error = signal_is_restricted(p, signum))) {
-               if (error == ENOTSUP) {
-                       printf("%s(%d): denied attempt to register action for signal %d\n",
-                                       proc_name_address(p), proc_pid(p), signum);
+       if (uap->nsa) {
+               if (IS_64BIT_PROCESS(p)) {
+                       struct __user64_sigaction       __vec64;
+                       error = copyin(uap->nsa, &__vec64, sizeof(__vec64));
+                       __sigaction_user64_to_kern(&__vec64, &__vec);
+               } else {
+                       struct __user32_sigaction       __vec32;
+                       error = copyin(uap->nsa, &__vec32, sizeof(__vec32));
+                       __sigaction_user32_to_kern(&__vec32, &__vec);
+               }
+               if (error)
+                       return (error);
+               __vec.sa_flags &= SA_USERSPACE_MASK; /* Only pass on valid sa_flags */
+
+               if ((__vec.sa_flags & SA_SIGINFO) || __vec.sa_handler != SIG_DFL) {
+                       if ((error = signal_is_restricted(p, signum))) {
+                               if (error == ENOTSUP) {
+                                       printf("%s(%d): denied attempt to register action for signal %d\n",
+                                                       proc_name_address(p), proc_pid(p), signum);
+                               }
+                               return error;
+                       }
                }
-               return error;
        }
 
        if (uap->osa) {
@@ -460,35 +477,21 @@ sigaction(proc_t p, struct sigaction_args *uap, __unused int32_t *retval)
 
                if (IS_64BIT_PROCESS(p)) {
                        struct user64_sigaction vec64;
-                       
                        sigaction_kern_to_user64(sa, &vec64);
                        error = copyout(&vec64, uap->osa, sizeof(vec64));
                } else {
                        struct user32_sigaction vec32;
-                       
                        sigaction_kern_to_user32(sa, &vec32);
                        error = copyout(&vec32, uap->osa, sizeof(vec32));
                }
                if (error)
                        return (error);
        }
+
        if (uap->nsa) {
-               if (IS_64BIT_PROCESS(p)) {
-                       struct __user64_sigaction       __vec64;
-                       
-                       error = copyin(uap->nsa, &__vec64, sizeof(__vec64));
-                       __sigaction_user64_to_kern(&__vec64, &__vec);
-               } else {
-                       struct __user32_sigaction       __vec32;
-                       
-                       error = copyin(uap->nsa, &__vec32, sizeof(__vec32));
-                       __sigaction_user32_to_kern(&__vec32, &__vec);
-               }
-               if (error)
-                       return (error);
-               __vec.sa_flags &= SA_USERSPACE_MASK; /* Only pass on valid sa_flags */
                error = setsigvec(p, current_thread(), signum, &__vec, FALSE);
        }
+
        return (error);
 }
 
@@ -1714,6 +1717,18 @@ threadsignal(thread_t sig_actthread, int signum, mach_exception_code_t code)
        signal_setast(sig_actthread);
 }
 
+/*
+ * get_signalthread
+ *
+ * Picks an appropriate thread from a process to target with a signal.
+ *
+ * Called with proc locked.
+ * Returns thread with BSD ast set.
+ *
+ * We attempt to deliver a proc-wide signal to the first thread in the task.
+ * This allows single threaded applications which use signals to
+ * be able to be linked with multithreaded libraries.
+ */
 static kern_return_t
 get_signalthread(proc_t p, int signum, thread_t * thr)
 {
@@ -1735,19 +1750,15 @@ get_signalthread(proc_t p, int signum, thread_t * thr)
                        return(KERN_FAILURE);
        }
 
-       proc_lock(p);
-
        TAILQ_FOREACH(uth, &p->p_uthlist, uu_list) {
                if(((uth->uu_flag & UT_NO_SIGMASK)== 0) && 
                        (((uth->uu_sigmask & mask) == 0) || (uth->uu_sigwait & mask))) {
                        if (check_actforsig(p->task, uth->uu_context.vc_thread, 1) == KERN_SUCCESS) {
                                *thr = uth->uu_context.vc_thread;
-                               proc_unlock(p);
                                return(KERN_SUCCESS);
                        }
                }
        }
-       proc_unlock(p);
        if (get_signalact(p->task, thr, 1) == KERN_SUCCESS) {
                return(KERN_SUCCESS);
        }
@@ -1775,7 +1786,7 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
        user_addr_t action = USER_ADDR_NULL;
        proc_t          sig_proc;
        thread_t        sig_thread;
-       register task_t         sig_task;
+       task_t          sig_task;
        int mask;
        struct uthread *uth;
        kern_return_t kret;
@@ -1784,7 +1795,8 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
        kauth_cred_t my_cred;
 
        if ((u_int)signum >= NSIG || signum == 0)
-               panic("psignal signal number");
+               panic("psignal: bad signal number %d", signum);
+
        mask = sigmask(signum);
        prop = sigprop[signum];
 
@@ -1814,12 +1826,13 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
                sig_thread = thread;
                sig_proc = (proc_t)get_bsdtask_info(sig_task);
        } else if (flavor & PSIG_TRY_THREAD) {
+               assert((thread == current_thread()) && (p == current_proc()));
                sig_task = p->task;
                sig_thread = thread;
                sig_proc = p;
        } else {
                sig_task = p->task;
-               sig_thread = (struct thread *)0;
+               sig_thread = THREAD_NULL;
                sig_proc = p;
        }
 
@@ -1833,9 +1846,10 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
         * also no need to send a signal to a process that is in the middle
         * of being torn down.
         */
-       if (ISSET(sig_proc->p_flag, P_REBOOT) ||
-           ISSET(sig_proc->p_lflag, P_LEXIT))
+       if (ISSET(sig_proc->p_flag, P_REBOOT) || ISSET(sig_proc->p_lflag, P_LEXIT)) {
+               DTRACE_PROC3(signal__discard, thread_t, sig_thread, proc_t, sig_proc, int, signum);
                return;
+       }
 
        if( (flavor & (PSIG_VFORK | PSIG_THREAD)) == 0) {
                proc_knote(sig_proc, NOTE_SIGNAL | signum);
@@ -1844,22 +1858,22 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
        if ((flavor & PSIG_LOCKED)== 0)
                proc_signalstart(sig_proc, 0);
 
-       /*
-        *      Deliver the signal to the first thread in the task. This
-        *      allows single threaded applications which use signals to
-        *      be able to be linked with multithreaded libraries.  We have
-        *      an implicit reference to the current thread, but need
-        *      an explicit one otherwise.  The thread reference keeps
-        *      the corresponding task data structures around too.  This
-        *      reference is released by thread_deallocate.
-        */
-
-
+       /* Don't send signals to a process that has ignored them. */
        if (((flavor & PSIG_VFORK) == 0) && ((sig_proc->p_lflag & P_LTRACED) == 0) && (sig_proc->p_sigignore & mask)) {
                DTRACE_PROC3(signal__discard, thread_t, sig_thread, proc_t, sig_proc, int, signum);
-               goto psigout;
+               goto sigout_unlocked;
        }
 
+       /*
+        * The proc_lock prevents the targeted thread from being deallocated
+        * or handling the signal until we're done signaling it.
+        *
+        * Once the proc_lock is dropped, we have no guarantee the thread or uthread exists anymore.
+        *
+        * XXX: What if the thread goes inactive after the thread passes bsd ast point?
+        */
+       proc_lock(sig_proc);
+
        if (flavor & PSIG_VFORK) {
                action = SIG_DFL;
                act_set_astbsd(sig_thread);
@@ -1881,11 +1895,11 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
                /* If successful return with ast set */
                kret = get_signalthread(sig_proc, signum, &sig_thread);
        }
+
        if (kret != KERN_SUCCESS) {
-#if SIGNAL_DEBUG
-                       ram_printf(1);
-#endif /* SIGNAL_DEBUG */
-               goto psigout;
+               DTRACE_PROC3(signal__discard, thread_t, sig_thread, proc_t, sig_proc, int, signum);
+               proc_unlock(sig_proc);
+               goto sigout_unlocked;
        }
 
        uth = get_bsdthread_info(sig_thread);
@@ -1906,7 +1920,8 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
                         * action will be SIG_DFL here.)
                         */
                        if (sig_proc->p_sigignore & mask)
-                               goto psigout;
+                               goto sigout_locked;
+
                        if (uth->uu_sigwait & mask)
                                action = KERN_SIG_WAIT;
                        else if (uth->uu_sigmask & mask)
@@ -1918,8 +1933,7 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
                }
        }
 
-       proc_lock(sig_proc);
-
+       /* TODO: p_nice isn't hooked up to the scheduler... */
        if (sig_proc->p_nice > NZERO && action == SIG_DFL && (prop & SA_KILL) &&
                (sig_proc->p_lflag & P_LTRACED) == 0)
                        sig_proc->p_nice = NZERO;
@@ -1935,41 +1949,33 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
                 * is default; don't stop the process below if sleeping,
                 * and don't clear any pending SIGCONT.
                 */
-               proc_unlock(sig_proc);
                pg = proc_pgrp(sig_proc);
                if (prop & SA_TTYSTOP && pg->pg_jobc == 0 &&
                        action == SIG_DFL) {
                        pg_rele(pg);
-                       goto psigout;
+                       goto sigout_locked;
                }
                pg_rele(pg);
-               proc_lock(sig_proc);
                uth->uu_siglist &= ~contsigmask;
        }
 
        uth->uu_siglist |= mask;
-       /* 
-        * Repost AST incase sigthread has processed 
-        * ast and missed signal post.
-        */
-       if (action == KERN_SIG_CATCH)
-               act_set_astbsd(sig_thread);
 
-       
        /*
         * Defer further processing for signals which are held,
         * except that stopped processes must be continued by SIGCONT.
         */
        /* vfork will not go thru as action is SIG_DFL */
-       if ((action == KERN_SIG_HOLD) && ((prop & SA_CONT) == 0 || sig_proc->p_stat != SSTOP)) {
-               proc_unlock(sig_proc);
-               goto psigout;
-       }
+       if ((action == KERN_SIG_HOLD) && ((prop & SA_CONT) == 0 || sig_proc->p_stat != SSTOP))
+               goto sigout_locked;
+
        /*
         *      SIGKILL priority twiddling moved here from above because
         *      it needs sig_thread.  Could merge it into large switch
         *      below if we didn't care about priority for tracing
         *      as SIGKILL's action is always SIG_DFL.
+        *
+        *      TODO: p_nice isn't hooked up to the scheduler...
         */
        if ((signum == SIGKILL) && (sig_proc->p_nice > NZERO)) {
                sig_proc->p_nice = NZERO;
@@ -1983,11 +1989,10 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
        if (sig_proc->p_lflag & P_LTRACED) {
                if (sig_proc->p_stat != SSTOP)
                        goto runlocked;
-               else {
-                       proc_unlock(sig_proc);
-                       goto psigout;
-               }
+               else
+                       goto sigout_locked;
        }
+
        if ((flavor & PSIG_VFORK) != 0)
                goto runlocked;
 
@@ -2013,13 +2018,9 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
                if (prop & SA_CONT) {
                        OSBitOrAtomic(P_CONTINUED, &sig_proc->p_flag);
                        sig_proc->p_contproc = current_proc()->p_pid;
-
-                       proc_unlock(sig_proc);
                        (void) task_resume_internal(sig_task);
-                       goto psigout;
                }
-               proc_unlock(sig_proc);
-               goto psigout;
+               goto sigout_locked;
        }
 
        if (action != SIG_DFL) {
@@ -2030,13 +2031,10 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
                 */
                if (prop & SA_CONT) {
                        OSBitOrAtomic(P_CONTINUED, &sig_proc->p_flag);
-                       proc_unlock(sig_proc);
                        (void) task_resume_internal(sig_task);
-                       proc_lock(sig_proc);
                        sig_proc->p_stat = SRUN;
                }  else if (sig_proc->p_stat == SSTOP) {
-                       proc_unlock(sig_proc);
-                       goto psigout;
+                       goto sigout_locked;
                }
                /*
                 * Fill out siginfo structure information to pass to the
@@ -2051,9 +2049,7 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
                 * Note:        Avoid the SIGCHLD recursion case!
                 */
                if (signum != SIGCHLD) {
-                       proc_unlock(sig_proc);
                        r_uid = kauth_getruid();
-                       proc_lock(sig_proc);
 
                        sig_proc->si_pid = current_proc()->p_pid;
                        sig_proc->si_status = W_EXITCODE(signum, 0);
@@ -2073,14 +2069,13 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
                         * stopped from the keyboard.
                         */
                        if (!(prop & SA_STOP) && sig_proc->p_pptr == initproc) {
-                               proc_unlock(sig_proc);
-                               psignal_locked(sig_proc, SIGKILL);
-                               proc_lock(sig_proc);
                                uth->uu_siglist &= ~mask;
                                proc_unlock(sig_proc);
-                               goto psigout;
+                               /* siglock still locked, proc_lock not locked */
+                               psignal_locked(sig_proc, SIGKILL);
+                               goto sigout_unlocked;
                        }
-                        
+
                        /*
                         *      Stop the task
                         *      if task hasn't already been stopped by
@@ -2119,19 +2114,18 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
 
                                        psignal(pp, SIGCHLD);
                                }
-                               if (pp != PROC_NULL)
+                               if (pp != PROC_NULL) {
                                        proc_parentdropref(pp, 0);
-                       } else
-                               proc_unlock(sig_proc);
-                       goto psigout;
+                               }
+
+                               goto sigout_unlocked;
+                       }
+
+                       goto sigout_locked;
                }
 
                DTRACE_PROC3(signal__send, thread_t, sig_thread, proc_t, p, int, signum);
 
-               /*
-                * enters switch with sig_proc lock held but dropped when
-                * gets out of switch
-                */
                switch (signum) {
                        /*
                         * Signals ignored by default have been dealt
@@ -2157,9 +2151,8 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
                         */
                        act_set_astbsd(sig_thread);
                        thread_abort(sig_thread);
-                       proc_unlock(sig_proc);
 
-                       goto psigout;
+                       goto sigout_locked;
 
                case SIGCONT:
                        /*
@@ -2169,9 +2162,8 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
                        OSBitOrAtomic(P_CONTINUED, &sig_proc->p_flag);
                        sig_proc->p_contproc = sig_proc->p_pid;
 
-                       proc_unlock(sig_proc);
                        (void) task_resume_internal(sig_task);
-                       proc_lock(sig_proc);
+
                        /*
                         * When processing a SIGCONT, we need to check
                         * to see if there are signals pending that
@@ -2190,8 +2182,7 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
 
                        uth->uu_siglist &= ~mask;
                        sig_proc->p_stat = SRUN;
-                       proc_unlock(sig_proc);
-                       goto psigout;
+                       goto sigout_locked;
 
                default:
                        /*
@@ -2201,9 +2192,8 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
                         */
                        if (((flavor & (PSIG_VFORK|PSIG_THREAD)) == 0) && (action == SIG_DFL) && (prop & SA_KILL)) {
                                sig_proc->p_stat = SRUN;
-                               proc_unlock(sig_proc);
                                thread_abort(sig_thread);
-                               goto psigout;
+                               goto sigout_locked;
                        }
 
                        /*
@@ -2211,8 +2201,7 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
                         * resume it.
                         */
                        if (sig_proc->p_stat == SSTOP) {
-                               proc_unlock(sig_proc);
-                               goto psigout;
+                               goto sigout_locked;
                        }
                        goto runlocked;
                }
@@ -2226,22 +2215,25 @@ runlocked:
         */
        if (sig_proc->p_stat == SSTOP) {
                if ((sig_proc->p_lflag & P_LTRACED) != 0 && sig_proc->p_xstat != 0)
-                       uth->uu_siglist |= sigmask(sig_proc->p_xstat); 
+                       uth->uu_siglist |= sigmask(sig_proc->p_xstat);
+
                if ((flavor & PSIG_VFORK) != 0) {
                        sig_proc->p_stat = SRUN;
                }
-               proc_unlock(sig_proc);  
        } else {
                /*
                 * setrunnable(p) in BSD and
                 * Wake up the thread if it is interruptible.
                 */
                sig_proc->p_stat = SRUN;
-               proc_unlock(sig_proc);  
                if ((flavor & PSIG_VFORK) == 0)
                        thread_abort_safely(sig_thread);
        }
-psigout:
+
+sigout_locked:
+       proc_unlock(sig_proc);
+
+sigout_unlocked:
        if ((flavor & PSIG_LOCKED)== 0) {
                proc_signalend(sig_proc, 0);
        }
index cfbd3c99f47c1c1cfb622d8fe59ac361457ab729..5da44690ab7ef1525bacddc80a0ac96089e8f3ff 100644 (file)
@@ -546,6 +546,18 @@ itimerfix(
        return (0);
 }
 
+int
+timespec_is_valid(const struct timespec *ts)
+{
+       /* The INT32_MAX limit ensures the timespec is safe for clock_*() functions
+        * which accept 32-bit ints. */
+       if (ts->tv_sec < 0 || ts->tv_sec > INT32_MAX ||
+                       ts->tv_nsec < 0 || (unsigned long long)ts->tv_nsec > NSEC_PER_SEC) {
+               return 0;
+       }
+       return 1;
+}
+
 /*
  * Decrement an interval timer by a specified number
  * of microseconds, which must be less than a second,
@@ -723,6 +735,15 @@ tvtoabstime(
        return (result + usresult);
 }
 
+uint64_t
+tstoabstime(struct timespec *ts)
+{
+       uint64_t abstime_s, abstime_ns;
+       clock_interval_to_absolutetime_interval(ts->tv_sec, NSEC_PER_SEC, &abstime_s);
+       clock_interval_to_absolutetime_interval(ts->tv_nsec, 1, &abstime_ns);
+       return abstime_s + abstime_ns;
+}
+
 #if NETWORKING
 /*
  * ratecheck(): simple time-based rate-limit checking.
index a5da30245812cb7faf61867cfec5ad5eddc0aa85..e26393c52b211be4adfa906d30df6a03a01a90c2 100644 (file)
@@ -139,6 +139,7 @@ int __attribute__ ((noinline)) proc_terminate(int pid, int32_t * retval);
 int __attribute__ ((noinline)) proc_pid_rusage(int pid, int flavor, user_addr_t buffer, int32_t * retval);
 int __attribute__ ((noinline)) proc_pidoriginatorinfo(int pid, int flavor, user_addr_t buffer, uint32_t buffersize, int32_t * retval);
 int __attribute__ ((noinline)) proc_listcoalitions(int flavor, int coaltype, user_addr_t buffer, uint32_t buffersize, int32_t *retval);
+int __attribute__ ((noinline)) proc_can_use_foreground_hw(int pid, user_addr_t reason, uint32_t resonsize, int32_t *retval);
 
 /* protos for procpidinfo calls */
 int __attribute__ ((noinline)) proc_pidfdlist(proc_t p, user_addr_t buffer, uint32_t buffersize, int32_t *retval);
@@ -246,6 +247,8 @@ proc_info_internal(int callnum, int pid, int flavor, uint64_t arg, user_addr_t b
                case PROC_INFO_CALL_LISTCOALITIONS:
                        return proc_listcoalitions(pid /* flavor */, flavor /* coaltype */, buffer,
                                                   buffersize, retval);
+               case PROC_INFO_CALL_CANUSEFGHW:
+                       return proc_can_use_foreground_hw(pid, buffer, buffersize, retval);
                default:
                                return(EINVAL);
        }
@@ -1375,6 +1378,174 @@ out:
 }
 
 
+/*************************** proc_can_use_forgeound_hw **************************/
+int proc_can_use_foreground_hw(int pid, user_addr_t u_reason, uint32_t reasonsize, int32_t *retval)
+{
+       proc_t p = PROC_NULL;
+       int error = 0;
+       uint32_t reason = PROC_FGHW_ERROR;
+       uint32_t isBG = 0;
+       task_t task = TASK_NULL;
+#if CONFIG_COALITIONS
+       coalition_t coal = COALITION_NULL;
+#endif
+
+       *retval = 0;
+
+       if (pid <= 0) {
+               error = EINVAL;
+               reason = PROC_FGHW_ERROR;
+               goto out;
+       }
+
+       p = proc_find(pid);
+       if (p == PROC_NULL) {
+               error = ESRCH;
+               reason = PROC_FGHW_ERROR;
+               goto out;
+       }
+
+#if CONFIG_COALITIONS
+       if (p != current_proc() &&
+           !kauth_cred_issuser(kauth_cred_get())) {
+               error = EPERM;
+               reason = PROC_FGHW_ERROR;
+               goto out;
+       }
+
+       task = p->task;
+       task_reference(task);
+       if (coalition_is_leader(task, COALITION_TYPE_JETSAM, &coal) == FALSE) {
+               /* current task is not a coalition leader: find the leader */
+               task_deallocate(task);
+               task = coalition_get_leader(coal);
+       }
+
+       if (task != TASK_NULL) {
+               /*
+                * If task is non-null, then it is the coalition leader of the
+                * current process' coalition. This could be the same task as
+                * the current_task, and that's OK.
+                */
+               uint32_t flags = 0;
+               int role;
+
+               proc_get_darwinbgstate(task, &flags);
+               if ((flags & PROC_FLAG_APPLICATION) != PROC_FLAG_APPLICATION) {
+                       /*
+                        * Coalition leader is not an application, continue
+                        * searching for other ways this task could gain
+                        * access to HW
+                        */
+                       reason = PROC_FGHW_DAEMON_LEADER;
+                       goto no_leader;
+               }
+
+               if (proc_get_effective_task_policy(task, TASK_POLICY_DARWIN_BG)) {
+                       /*
+                        * If the leader of the current process' coalition has
+                        * been marked as DARWIN_BG, then it definitely should
+                        * not be using foreground hardware resources.
+                        */
+                       reason = PROC_FGHW_LEADER_BACKGROUND;
+                       goto out;
+               }
+
+               role = proc_get_effective_task_policy(task, TASK_POLICY_ROLE);
+               switch (role) {
+               case TASK_FOREGROUND_APPLICATION: /* DARWIN_ROLE_UI_FOCAL */
+               case TASK_BACKGROUND_APPLICATION: /* DARWIN_ROLE_UI */
+                       /*
+                        * The leader of this coalition is a focal, UI app:
+                        * access granted
+                        * TODO: should extensions/plugins be allowed to use
+                        *       this hardware?
+                        */
+                       *retval = 1;
+                       reason = PROC_FGHW_OK;
+                       goto out;
+               case TASK_DEFAULT_APPLICATION: /* DARWIN_ROLE_UI_NON_FOCAL */
+               case TASK_NONUI_APPLICATION: /* DARWIN_ROLE_NON_UI */
+               case TASK_THROTTLE_APPLICATION:
+               case TASK_UNSPECIFIED:
+               default:
+                       /* non-focal, non-ui apps don't get access */
+                       reason = PROC_FGHW_LEADER_NONUI;
+                       goto out;
+               }
+       }
+
+no_leader:
+       if (task != TASK_NULL) {
+               task_deallocate(task);
+               task = TASK_NULL;
+       }
+#endif /* CONFIG_COALITIONS */
+
+       /*
+        * There is no reasonable semantic to investigate the currently
+        * adopted voucher of an arbitrary thread in a non-current process.
+        * We return '0'
+        */
+       if (p != current_proc()) {
+               error = EINVAL;
+               goto out;
+       }
+
+       /*
+        * In the absence of coalitions, fall back to a voucher-based lookup
+        * where a daemon can used foreground HW if it's operating on behalf
+        * of a foreground application.
+        * NOTE: this is equivalent to a call to
+        *       proc_pidoriginatorinfo(PROC_PIDORIGINATOR_BGSTATE, &isBG, sizeof(isBG))
+        */
+       isBG = 1;
+       error = proc_get_originatorbgstate(&isBG);
+       switch (error) {
+       case 0:
+               break;
+       case ESRCH:
+               reason = PROC_FGHW_NO_ORIGINATOR;
+               error = 0;
+               goto out;
+       case ENOATTR:
+               reason = PROC_FGHW_NO_VOUCHER_ATTR;
+               error = 0;
+               goto out;
+       case EINVAL:
+               reason = PROC_FGHW_DAEMON_NO_VOUCHER;
+               error = 0;
+               goto out;
+       default:
+               /* some other error occurred: report that to the caller */
+               reason = PROC_FGHW_VOUCHER_ERROR;
+               goto out;
+       }
+
+       if (isBG) {
+               reason = PROC_FGHW_ORIGINATOR_BACKGROUND;
+               error = 0;
+       } else {
+               /*
+                * The process itself is either a foreground app, or has
+                * adopted a voucher originating from an app that's still in
+                * the foreground
+                */
+               reason = PROC_FGHW_DAEMON_OK;
+               *retval = 1;
+       }
+
+out:
+       if (task != TASK_NULL)
+               task_deallocate(task);
+       if (p != PROC_NULL)
+               proc_rele(p);
+       if (reasonsize >= sizeof(reason) && u_reason != (user_addr_t)0)
+               (void)copyout(&reason, u_reason, sizeof(reason));
+       return error;
+}
+
+
 /********************************** proc_pidinfo ********************************/
 
 
index a20ce301fc12cb214d8bdcac5fa1c46dd54c0b03..e35a8a87829ec60614e55cc1b34d0f1fb97610f1 100644 (file)
@@ -216,10 +216,6 @@ coalition_info_resource_usage(coalition_t coal, user_addr_t buffer, user_size_t
        kern_return_t kr;
        struct coalition_resource_usage cru;
 
-       if (bufsize != sizeof(cru)) {
-               return EINVAL;
-       }
-
        kr = coalition_resource_usage_internal(coal, &cru);
 
        switch (kr) {
@@ -233,7 +229,7 @@ coalition_info_resource_usage(coalition_t coal, user_addr_t buffer, user_size_t
                return EIO; /* shrug */
        }
 
-       return copyout(&cru, buffer, bufsize);
+       return copyout(&cru, buffer, MIN(bufsize, sizeof(cru)));
 }
 
 int coalition_info(proc_t p, struct coalition_info_args *uap, __unused int32_t *retval)
index d6c46f58db4658d7f9e9a759e44085c3e059948c..1fb49eb91dcd3961b85a932a2103b87c470a3468 100644 (file)
@@ -940,6 +940,7 @@ static int selscan(struct proc *p, struct _select * sel, struct _select_data * s
 static int selcount(struct proc *p, u_int32_t *ibits, int nfd, int *count);
 static int seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup, int fromselcount);
 static int seldrop(struct proc *p, u_int32_t *ibits, int nfd);
+static int select_internal(struct proc *p, struct select_nocancel_args *uap, uint64_t timeout, int32_t *retval);
 
 /*
  * Select system call.
@@ -947,17 +948,126 @@ static int seldrop(struct proc *p, u_int32_t *ibits, int nfd);
  * Returns:    0                       Success
  *             EINVAL                  Invalid argument
  *             EAGAIN                  Nonconformant error if allocation fails
- *     selprocess:???
  */
 int
 select(struct proc *p, struct select_args *uap, int32_t *retval)
 {
        __pthread_testcancel(1);
-       return(select_nocancel(p, (struct select_nocancel_args *)uap, retval));
+       return select_nocancel(p, (struct select_nocancel_args *)uap, retval);
 }
 
 int
 select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retval)
+{
+       uint64_t timeout = 0;
+
+       if (uap->tv) {
+               int err;
+               struct timeval atv;
+               if (IS_64BIT_PROCESS(p)) {
+                       struct user64_timeval atv64;
+                       err = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64));
+                       /* Loses resolution - assume timeout < 68 years */
+                       atv.tv_sec = atv64.tv_sec;
+                       atv.tv_usec = atv64.tv_usec;
+               } else {
+                       struct user32_timeval atv32;
+                       err = copyin(uap->tv, (caddr_t)&atv32, sizeof(atv32));
+                       atv.tv_sec = atv32.tv_sec;
+                       atv.tv_usec = atv32.tv_usec;
+               }
+               if (err)
+                       return err;
+
+               if (itimerfix(&atv)) {
+                       err = EINVAL;
+                       return err;
+               }
+
+               clock_absolutetime_interval_to_deadline(tvtoabstime(&atv), &timeout);
+       }
+
+       return select_internal(p, uap, timeout, retval);
+}
+
+int
+pselect(struct proc *p, struct pselect_args *uap, int32_t *retval)
+{
+       __pthread_testcancel(1);
+       return pselect_nocancel(p, (struct pselect_nocancel_args *)uap, retval);
+}
+
+int
+pselect_nocancel(struct proc *p, struct pselect_nocancel_args *uap, int32_t *retval)
+{
+       int err;
+       struct uthread *ut;
+       uint64_t timeout = 0;
+
+       if (uap->ts) {
+               struct timespec ts;
+
+               if (IS_64BIT_PROCESS(p)) {
+                       struct user64_timespec ts64;
+                       err = copyin(uap->ts, (caddr_t)&ts64, sizeof(ts64));
+                       ts.tv_sec = ts64.tv_sec;
+                       ts.tv_nsec = ts64.tv_nsec;
+               } else {
+                       struct user32_timespec ts32;
+                       err = copyin(uap->ts, (caddr_t)&ts32, sizeof(ts32));
+                       ts.tv_sec = ts32.tv_sec;
+                       ts.tv_nsec = ts32.tv_nsec;
+               }
+               if (err) {
+                       return err;
+               }
+
+               if (!timespec_is_valid(&ts)) {
+                       return EINVAL;
+               }
+               clock_absolutetime_interval_to_deadline(tstoabstime(&ts), &timeout);
+       }
+
+       ut = get_bsdthread_info(current_thread());
+
+       if (uap->mask != USER_ADDR_NULL) {
+               /* save current mask, then copyin and set new mask */
+               sigset_t newset;
+               err = copyin(uap->mask, &newset, sizeof(sigset_t));
+               if (err) {
+                       return err;
+               }
+               ut->uu_oldmask = ut->uu_sigmask;
+               ut->uu_flag |= UT_SAS_OLDMASK;
+               ut->uu_sigmask = (newset & ~sigcantmask);
+       }
+
+       err = select_internal(p, (struct select_nocancel_args *)uap, timeout, retval);
+
+       if (err != EINTR && ut->uu_flag & UT_SAS_OLDMASK) {
+               /*
+                * Restore old mask (direct return case). NOTE: EINTR can also be returned
+                * if the thread is cancelled. In that case, we don't reset the signal
+                * mask to its original value (which usually happens in the signal
+                * delivery path). This behavior is permitted by POSIX.
+                */
+               ut->uu_sigmask = ut->uu_oldmask;
+               ut->uu_oldmask = 0;
+               ut->uu_flag &= ~UT_SAS_OLDMASK;
+       }
+
+       return err;
+}
+
+/*
+ * Generic implementation of {,p}select. Care: we type-pun uap across the two
+ * syscalls, which differ slightly. The first 4 arguments (nfds and the fd sets)
+ * are identical. The 5th (timeout) argument points to different types, so we
+ * unpack in the syscall-specific code, but the generic code still does a null
+ * check on this argument to determine if a timeout was specified.
+ */
+static int
+select_internal(struct proc *p, struct select_nocancel_args *uap, uint64_t timeout, int32_t *retval)
 {
        int error = 0;
        u_int ni, nw;
@@ -1049,32 +1159,7 @@ select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retva
        getbits(ex, 2);
 #undef getbits
 
-       if (uap->tv) {
-               struct timeval atv;
-               if (IS_64BIT_PROCESS(p)) {
-                       struct user64_timeval atv64;
-                       error = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64));
-                       /* Loses resolution - assume timeout < 68 years */
-                       atv.tv_sec = atv64.tv_sec;
-                       atv.tv_usec = atv64.tv_usec;
-               } else {
-                       struct user32_timeval atv32;
-                       error = copyin(uap->tv, (caddr_t)&atv32, sizeof(atv32));
-                       atv.tv_sec = atv32.tv_sec;
-                       atv.tv_usec = atv32.tv_usec;
-               }
-               if (error)
-                       goto continuation;
-               if (itimerfix(&atv)) {
-                       error = EINVAL;
-                       goto continuation;
-               }
-
-               clock_absolutetime_interval_to_deadline(
-                                                                               tvtoabstime(&atv), &seldata->abstime);
-       }
-       else
-               seldata->abstime = 0;
+       seldata->abstime = timeout;
 
        if ( (error = selcount(p, sel->ibits, uap->nd, &count)) ) {
                        goto continuation;
@@ -1306,6 +1391,14 @@ done:
                putbits(ex, 2);
 #undef putbits
        }
+
+       if (error != EINTR && sel_pass == SEL_SECONDPASS && uth->uu_flag & UT_SAS_OLDMASK) {
+               /* restore signal mask - continuation case */
+               uth->uu_sigmask = uth->uu_oldmask;
+               uth->uu_oldmask = 0;
+               uth->uu_flag &= ~UT_SAS_OLDMASK;
+       }
+
        return(error);
 }
 
index 5f58f92639543426325b13207538af28739e6950..066065c6a85946ab9d9358ee3eac67b29bb97e1d 100644 (file)
 391    AUE_NULL        ALL     { int enosys(void); }
 392    AUE_NULL        ALL     { int enosys(void); }
 393    AUE_NULL        ALL     { int enosys(void); }
-394    AUE_NULL        ALL     { int enosys(void); }
-395    AUE_NULL        ALL     { int enosys(void); }
+394    AUE_SELECT      ALL     { int pselect(int nd, u_int32_t *in, u_int32_t *ou, u_int32_t *ex, const struct timespec *ts, const struct sigset_t *mask) NO_SYSCALL_STUB; }
+395    AUE_SELECT      ALL     { int pselect_nocancel(int nd, u_int32_t *in, u_int32_t *ou, u_int32_t *ex, const struct timespec *ts, const struct sigset_t *mask) NO_SYSCALL_STUB; }
 396    AUE_NULL        ALL     { user_ssize_t read_nocancel(int fd, user_addr_t cbuf, user_size_t nbyte) NO_SYSCALL_STUB; } 
 397    AUE_NULL        ALL     { user_ssize_t write_nocancel(int fd, user_addr_t cbuf, user_size_t nbyte) NO_SYSCALL_STUB; } 
 398    AUE_OPEN_RWTC   ALL     { int open_nocancel(user_addr_t path, int flags, int mode) NO_SYSCALL_STUB; } 
index 2a5a362063e03c28518ddf9869dc51d9be83f326..b37035dfe694da54e4df5c7d9fd87a98358f2584 100644 (file)
 0x130048C      MACH_vm_page_sleep
 0x1300490      MACH_vm_page_expedite
 0x13004c0      MACH_vm_pressure_event
+0x1300500      MACH_vm_data_write
 0x1400000      MACH_SCHED
 0x1400004      MACH_STKATTACH
 0x1400008      MACH_STKHANDOFF
 0x1a30004      ENERGY_PERF_GPU_DESCRIPTION
 0x1a30008      ENERGY_PERF_GPU_TIME
 0x1a40000      SYSDIAGNOSE_notify_user
+0x1a50000      ZALLOC_ZCRAM
 0x2010000      L_IP_In_Beg
 0x2010004      L_IP_Out_Beg
 0x2010008      L_IP_In_End
index 0222981a2f31a9fad3e56316c9544ad37c89f614..7d7fbe7c61102632a26689f3be6856eba0c0cedf 100644 (file)
@@ -2847,13 +2847,12 @@ ubc_cs_blob_add(
 
        error = cs_validate_csblob((const uint8_t *)addr, size, &cd);
        if (error) {
-               if (cs_debug)
+
+        if (cs_debug)
                        printf("CODESIGNING: csblob invalid: %d\n", error);
-               blob->csb_flags = 0;
-               blob->csb_start_offset = 0;
-               blob->csb_end_offset = 0;
-               memset(blob->csb_cdhash, 0, sizeof(blob->csb_cdhash));
-               /* let the vnode checker determine if the signature is valid or not */
+        /* The vnode checker can't make the rest of this function succeed if csblob validation failed, so bail */
+        goto out;
+
        } else {
                const unsigned char *md_base;
                uint8_t hash[CS_HASH_MAX_SIZE];
index d73d61a4b24afbf2b63aaf89e818c2ae8a6ba087..ede272ba6763e4004bbf75e2f6db410e22aa4c1f 100644 (file)
@@ -6804,9 +6804,14 @@ sockaddrentry_dup(const struct sockaddr_entry *src_se, int how)
        dst_se = sockaddrentry_alloc(how);
        if (dst_se != NULL) {
                int len = src_se->se_addr->sa_len;
+               /*
+                * Workaround for rdar://23362120
+                * Allways allocate a buffer that can hold an IPv6 socket address
+                */
+               size_t alloclen = MAX(len, sizeof(struct sockaddr_in6));
 
                MALLOC(dst_se->se_addr, struct sockaddr *,
-                   len, M_SONAME, how | M_ZERO);
+                   alloclen, M_SONAME, how | M_ZERO);
                if (dst_se->se_addr != NULL) {
                        bcopy(src_se->se_addr, dst_se->se_addr, len);
                } else {
index f44291282652491235f99d5e12165aab78502774..03330fbcbca27bdb133de8c64a154adbd9124748 100644 (file)
@@ -2756,6 +2756,7 @@ getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
 {
        struct sockaddr *sa;
        int error;
+       size_t alloclen;
 
        if (len > SOCK_MAXADDRLEN)
                return (ENAMETOOLONG);
@@ -2763,7 +2764,12 @@ getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
        if (len < offsetof(struct sockaddr, sa_data[0]))
                return (EINVAL);
 
-       MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK | M_ZERO);
+       /*
+        * Workaround for rdar://23362120
+        * Allways allocate a buffer that can hold an IPv6 socket address
+        */
+       alloclen = MAX(len, sizeof(struct sockaddr_in6));
+       MALLOC(sa, struct sockaddr *, alloclen, M_SONAME, M_WAITOK | M_ZERO);
        if (sa == NULL) {
                return (ENOMEM);
        }
index 075ebeb1695b103a6b7acd1a075c10c3509c31a7..047f85de3a57656597f87dbc9dafc914a0f9c761 100644 (file)
@@ -140,6 +140,7 @@ DATAFILES = \
        posix_madvise.2         \
        pread.2                 \
        profil.2                \
+       pselect.2               \
        pthread_setugid_np.2    \
        ptrace.2                \
        pwrite.2                \
diff --git a/bsd/man/man2/pselect.2 b/bsd/man/man2/pselect.2
new file mode 100644 (file)
index 0000000..eaf91d0
--- /dev/null
@@ -0,0 +1,122 @@
+.\"
+.\" Copyright 2002 Massachusetts Institute of Technology
+.\"
+.\" Permission to use, copy, modify, and distribute this software and
+.\" its documentation for any purpose and without fee is hereby
+.\" granted, provided that both the above copyright notice and this
+.\" permission notice appear in all copies, that both the above
+.\" copyright notice and this permission notice appear in all
+.\" supporting documentation, and that the name of M.I.T. not be used
+.\" in advertising or publicity pertaining to distribution of the
+.\" software without specific, written prior permission.  M.I.T. makes
+.\" no representations about the suitability of this software for any
+.\" purpose.  It is provided "as is" without express or implied
+.\" warranty.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
+.\" ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
+.\" INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
+.\" SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+.\" SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+.\" LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+.\" USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+.\" ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+.\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+.\" OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD: src/lib/libc/gen/pselect.3,v 1.4 2002/12/18 10:13:54 ru Exp $
+.\"
+.Dd June 16, 2002
+.Dt PSELECT 2
+.Os
+.Sh NAME
+.Nm pselect
+.Nd synchronous I/O multiplexing a la POSIX.1g
+.Sh LIBRARY
+.Lb libc
+.Sh SYNOPSIS
+.In sys/select.h
+.Ft int
+.Fo pselect
+.Fa "int nfds"
+.Fa "fd_set *restrict readfds"
+.Fa "fd_set *restrict writefds"
+.Fa "fd_set *restrict errorfds"
+.Fa "const struct timespec *restrict timeout"
+.Fa "const sigset_t *restrict sigmask"
+.Fc
+.Sh DESCRIPTION
+The
+.Fn pselect
+function was introduced by
+.St -p1003.1g-2000
+as a slightly stronger version of
+.Xr select 2 .
+The
+.Fa nfds , readfds , writefds ,
+and
+.Fa errorfds
+arguments are all identical to the analogous arguments of
+.Fn select .
+The
+.Fa timeout
+argument in
+.Fn pselect
+points to a
+.Vt "const struct timespec" ,
+rather than the (modifiable)
+.Vt "struct timeval"
+used by
+.Fn select ;
+as in
+.Fn select ,
+a null pointer may be passed to indicate that
+.Fn pselect
+should wait indefinitely.
+Finally,
+.Fa sigmask
+specifies a signal mask which is set while waiting for input.
+When
+.Fn pselect
+returns, the original signal mask is restored.
+.Pp
+See
+.Xr select 2
+for a more detailed discussion of the semantics of this interface, and
+for macros used to manipulate the
+.Vt "fd_set"
+data type.
+.Sh RETURN VALUES
+The
+.Fn pselect
+function returns the same values and under the same conditions as
+.Fn select .
+.Sh ERRORS
+The
+.Fn pselect
+function may fail for any of the reasons documented for
+.Xr select 2
+and (if a signal mask is provided)
+.Xr sigprocmask 2 .
+.Sh SEE ALSO
+.Xr kqueue 2 ,
+.Xr poll 2 ,
+.Xr select 2 ,
+.Xr sigprocmask 2
+.Sh STANDARDS
+The
+.Fn pselect
+function conforms to
+.St -p1003.1-2001 .
+.Sh HISTORY
+The
+.Fn pselect
+function first appeared in
+.Fx 5.0 .
+.Sh AUTHORS
+The
+.Fn pselect
+manual page was written by
+.An Garrett Wollman Aq wollman@FreeBSD.org .
index 7d12ba606f03add83bc16efb6e39c93ab995c01b..5831f968f0721d3219e242d7e408bc8e7958c34e 100644 (file)
@@ -1251,6 +1251,7 @@ sfb_addq(struct sfb *sp, class_queue_t *q, struct mbuf *m, struct pf_mtag *t)
         */
        if (droptype == DTYPE_NODROP && qlen(q) >= maxqsize) {
                if (pkt->pkt_proto == IPPROTO_TCP &&
+                   qlen(q) < (maxqsize + (maxqsize >> 1)) &&
                    ((pkt->pkt_flags & PKTF_TCP_REXMT) ||
                    (sp->sfb_flags & SFBF_LAST_PKT_DROPPED))) {
                        /*
index 5576af7d7b0ed9f5e5682a9d0539d5ea54368673..6d89331b1b60b41bfe56f0a88f4e15c45c879c35 100644 (file)
@@ -1095,6 +1095,23 @@ dlil_alloc_local_stats(struct ifnet *ifp)
                ret = 0;
        }
 
+       if (ifp->if_ipv4_stat == NULL) {
+               MALLOC(ifp->if_ipv4_stat, struct if_tcp_ecn_stat *,
+                   sizeof(struct if_tcp_ecn_stat), M_TEMP, M_WAITOK|M_ZERO);
+               if (ifp->if_ipv4_stat == NULL) {
+                       ret = ENOMEM;
+                       goto end;
+               }
+       }
+
+       if (ifp->if_ipv6_stat == NULL) {
+               MALLOC(ifp->if_ipv6_stat, struct if_tcp_ecn_stat *,
+                   sizeof(struct if_tcp_ecn_stat), M_TEMP, M_WAITOK|M_ZERO);
+               if (ifp->if_ipv6_stat == NULL) {
+                       ret = ENOMEM;
+                       goto end;
+               }
+       }
 end:
        if (ret != 0) {
                if (ifp->if_tcp_stat != NULL) {
@@ -1109,6 +1126,14 @@ end:
                        zfree(dlif_udpstat_zone, *pbuf);
                        ifp->if_udp_stat = NULL;
                }
+               if (ifp->if_ipv4_stat != NULL) {
+                       FREE(ifp->if_ipv4_stat, M_TEMP);
+                       ifp->if_ipv4_stat = NULL;
+               }
+               if (ifp->if_ipv6_stat != NULL) {
+                       FREE(ifp->if_ipv6_stat, M_TEMP);
+                       ifp->if_ipv6_stat = NULL;
+               }
        }
 
        return (ret);
@@ -5093,6 +5118,7 @@ ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
 }
 
 extern int if_next_index(void);
+extern int tcp_ecn_outbound;
 
 errno_t
 ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
@@ -5414,6 +5440,16 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
        } else {
                ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
        }
+
+       /*
+        * Enable ECN capability on this interface depending on the
+        * value of ECN global setting
+        */
+       if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) {
+               ifp->if_eflags |= IFEF_ECN_ENABLE;
+               ifp->if_eflags &= ~IFEF_ECN_DISABLE;
+       }
+
        ifnet_lock_done(ifp);
        ifnet_head_done();
 
@@ -5711,6 +5747,14 @@ ifnet_detach(ifnet_t ifp)
        if (ifp->if_udp_stat != NULL)
                bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
 
+       /* Reset ifnet IPv4 stats */
+       if (ifp->if_ipv4_stat != NULL)
+               bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
+
+       /* Reset ifnet IPv6 stats */
+       if (ifp->if_ipv6_stat != NULL)
+               bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
+
        /* Release memory held for interface link status report */
        if (ifp->if_link_status != NULL) {
                FREE(ifp->if_link_status, M_TEMP);
index d65efe3a1cca1de134c709519caba9b02b51e467..57d48d21c671d98c62d5eeed21edb39ef65aeff1 100644 (file)
@@ -1992,7 +1992,9 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
        case SIOCGIFINTERFACESTATE:             /* struct ifreq */
        case SIOCSIFPROBECONNECTIVITY:          /* struct ifreq */
        case SIOCGIFPROBECONNECTIVITY:          /* struct ifreq */
-       case SIOCGSTARTDELAY: {                 /* struct ifreq */
+       case SIOCGSTARTDELAY:                   /* struct ifreq */
+       case SIOCGECNMODE:                      /* struct ifreq */
+       case SIOCSECNMODE: {                    /* struct ifreq */
                struct ifreq ifr;
                bcopy(data, &ifr, sizeof (ifr));
                ifr.ifr_name[IFNAMSIZ - 1] = '\0';
@@ -2780,6 +2782,28 @@ ifioctl_ifreq(struct socket *so, u_long cmd, struct ifreq *ifr, struct proc *p)
                else
                        ifr->ifr_probe_connectivity = 0;
                break;
+       case SIOCGECNMODE:
+               if ((ifp->if_eflags & (IFEF_ECN_ENABLE|IFEF_ECN_DISABLE)) ==
+                   IFEF_ECN_ENABLE)
+                       ifr->ifr_ecn_mode = IFRTYPE_ECN_ENABLE;
+               else if ((ifp->if_eflags & (IFEF_ECN_ENABLE|IFEF_ECN_DISABLE)) ==
+                   IFEF_ECN_DISABLE)
+                       ifr->ifr_ecn_mode = IFRTYPE_ECN_DISABLE;
+               else
+                       ifr->ifr_ecn_mode = IFRTYPE_ECN_DEFAULT;
+               break;
+       case SIOCSECNMODE:
+               if (ifr->ifr_ecn_mode == IFRTYPE_ECN_DEFAULT) {
+                       ifp->if_eflags &= ~(IFEF_ECN_ENABLE|IFEF_ECN_DISABLE);
+               } else if (ifr->ifr_ecn_mode == IFRTYPE_ECN_ENABLE) {
+                       ifp->if_eflags |= IFEF_ECN_ENABLE;
+                       ifp->if_eflags &= ~IFEF_ECN_DISABLE;
+               } else if (ifr->ifr_ecn_mode == IFRTYPE_ECN_DISABLE) {
+                       ifp->if_eflags |= IFEF_ECN_DISABLE;
+                       ifp->if_eflags &= ~IFEF_ECN_ENABLE;
+               } else
+                       error = EINVAL;
+               break;
        default:
                VERIFY(0);
                /* NOTREACHED */
@@ -4303,6 +4327,10 @@ ifioctl_cassert(void)
        case SIOCGIFAGENTDATA64:
        case SIOCSIFINTERFACESTATE:
        case SIOCGIFINTERFACESTATE:
+       case SIOCSIFPROBECONNECTIVITY:
+       case SIOCGIFPROBECONNECTIVITY:
+       case SIOCGECNMODE:
+       case SIOCSECNMODE:
                ;
        }
 }
index 62afa9cd44033e4c0bbcdf55f733185c2be5fdf8..c2c99314bff9f55ce3aa17a0475e750082064b0a 100644 (file)
@@ -182,6 +182,8 @@ struct if_clonereq32 {
 #define        IFEF_NOACKPRI           0x00200000      /* No TCP ACK prioritization */
 #define        IFEF_AWDL_RESTRICTED    0x00400000      /* Restricted AWDL mode */
 #define        IFEF_2KCL               0x00800000      /* prefers 2K cluster (socket based tunnel) */
+#define        IFEF_ECN_ENABLE         0x01000000      /* use ECN for TCP connections on the interface */
+#define        IFEF_ECN_DISABLE        0x02000000      /* do not use ECN for TCP connections on the interface */
 #define        IFEF_SENDLIST           0x10000000      /* Supports tx packet lists */
 #define        IFEF_DIRECTLINK         0x20000000      /* point-to-point topology */
 #define        _IFEF_INUSE             0x40000000      /* deprecated */
@@ -486,6 +488,10 @@ struct     ifreq {
                } ifru_start_delay;
                struct if_interface_state       ifru_interface_state;
                u_int32_t ifru_probe_connectivity;
+               u_int32_t ifru_ecn_mode;
+#define        IFRTYPE_ECN_DEFAULT             0
+#define        IFRTYPE_ECN_ENABLE                      1
+#define        IFRTYPE_ECN_DISABLE                     2
 #endif /* PRIVATE */
        } ifr_ifru;
 #define        ifr_addr        ifr_ifru.ifru_addr      /* address */
@@ -528,6 +534,7 @@ struct      ifreq {
 #define        ifr_start_delay_timeout ifr_ifru.ifru_start_delay.timeout
 #define ifr_interface_state    ifr_ifru.ifru_interface_state
 #define        ifr_probe_connectivity  ifr_ifru.ifru_probe_connectivity
+#define        ifr_ecn_mode    ifr_ifru.ifru_ecn_mode
 #endif /* PRIVATE */
 };
 
index 29e253bc4f3c9f2c5a6bca799d8cb026dc70633e..fc7ce88fb6e9f53a5d090a0ebc2809e92e7b7883 100644 (file)
@@ -327,6 +327,39 @@ struct if_rxpoll_stats {
        u_int64_t       ifi_poll_interval_time; /* poll interval (nsec) */
 };
 
+struct if_tcp_ecn_perf_stat {
+       u_int64_t rtt_avg;
+       u_int64_t rtt_var;
+       u_int64_t oo_percent;
+       u_int64_t sack_episodes;
+       u_int64_t reorder_percent;
+       u_int64_t rxmit_percent;
+       u_int64_t rxmit_drop;
+};
+
+struct if_tcp_ecn_stat {
+       u_int64_t timestamp;
+       u_int64_t ecn_client_setup;
+       u_int64_t ecn_server_setup;
+       u_int64_t ecn_client_success;
+       u_int64_t ecn_server_success;
+       u_int64_t ecn_peer_nosupport;
+       u_int64_t ecn_syn_lost;
+       u_int64_t ecn_synack_lost;
+       u_int64_t ecn_recv_ce;
+       u_int64_t ecn_recv_ece;
+       u_int64_t ecn_conn_recv_ce;
+       u_int64_t ecn_conn_recv_ece;
+       u_int64_t ecn_conn_plnoce;
+       u_int64_t ecn_conn_plce;
+       u_int64_t ecn_conn_noplce;
+       u_int64_t ecn_fallback_synloss;
+       u_int64_t ecn_fallback_reorder;
+       u_int64_t ecn_fallback_ce;
+       struct if_tcp_ecn_perf_stat ecn_on;
+       struct if_tcp_ecn_perf_stat ecn_off;
+};
+
 /*
  * Interface link status report -- includes statistics related to
  * the link layer technology sent by the driver. The driver will monitor
@@ -908,6 +941,8 @@ struct ifnet {
        decl_lck_rw_data(, if_link_status_lock);
        struct if_link_status   *if_link_status;
        struct if_interface_state       if_interface_state;
+       struct if_tcp_ecn_stat *if_ipv4_stat;
+       struct if_tcp_ecn_stat *if_ipv6_stat;
 };
 
 #define        IF_TCP_STATINC(_ifp, _s) do {                                   \
@@ -1189,6 +1224,13 @@ struct ifmultiaddr {
        ((_ifp)->if_type == IFT_CELLULAR ||                             \
        (_ifp)->if_delegated.type == IFT_CELLULAR)
 
+/*
+ * Indicate whether or not the immediate interface, or the interface delegated
+ * by it, is an ETHERNET interface.
+ */
+#define        IFNET_IS_ETHERNET(_ifp)                                         \
+       ((_ifp)->if_family == IFNET_FAMILY_ETHERNET ||                  \
+       (_ifp)->if_delegated.family == IFNET_FAMILY_ETHERNET)
 /*
  * Indicate whether or not the immediate interface, or the interface delegated
  * by it, is a Wi-Fi interface (IFNET_SUBFAMILY_WIFI).  Delegated interface
index 650e809cd7ff0f1b1bd20ff860d12e112ed31bd3..e40268c30a510fec642b7ee5ea67bf4a51208e28 100644 (file)
@@ -731,6 +731,13 @@ necp_packet_get_tlv_at_offset(mbuf_t packet, int tlv_offset, u_int32_t buff_len,
                return (error);
        }
 
+       u_int32_t total_len = m_length2(packet, NULL);
+       if (total_len < (tlv_offset + sizeof(u_int8_t) + sizeof(length) + length)) {
+               NECPLOG(LOG_ERR, "Got a bad TLV, length (%u) + offset (%d) < total length (%u)",
+                               length, (tlv_offset + sizeof(u_int8_t) + sizeof(length)), total_len);
+               return (EINVAL);
+       }
+
        if (value_size != NULL) {
                *value_size = length;
        }
@@ -4857,7 +4864,10 @@ necp_match_policy(struct proc *p, struct necp_match_policy_args *uap, int32_t *r
                goto done;
        }
        // Copy parameters in
-       copyin(uap->parameters, parameters, uap->parameters_size);
+       error = copyin(uap->parameters, parameters, uap->parameters_size);
+       if (error) {
+               goto done;
+       }
 
        error = necp_application_find_policy_match_internal(parameters, uap->parameters_size, &returned_result);
        if (error) {
@@ -4865,7 +4875,10 @@ necp_match_policy(struct proc *p, struct necp_match_policy_args *uap, int32_t *r
        }
 
        // Copy return value back
-       copyout(&returned_result, uap->returned_result, sizeof(struct necp_aggregate_result));
+       error = copyout(&returned_result, uap->returned_result, sizeof(struct necp_aggregate_result));
+       if (error) {
+               goto done;
+       }
 done:
        if (parameters != NULL) {
                FREE(parameters, M_NECP);
index 8a295f887cc76b9980d610ba6dd1a7ff5de25763..9d42c7c6de847f31c010cc1954754d809c279e42 100644 (file)
@@ -91,7 +91,6 @@ static struct nstat_stats nstat_stats;
 SYSCTL_STRUCT(_net_stats, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
     &nstat_stats, nstat_stats, "");
 
-
 enum
 {
        NSTAT_FLAG_CLEANUP                              = (1 << 0),
@@ -155,6 +154,7 @@ static void         nstat_control_cleanup_source(nstat_control_state *state, nstat_src
 static bool            nstat_control_reporting_allowed(nstat_control_state *state, nstat_src *src);
 static boolean_t       nstat_control_begin_query(nstat_control_state *state, const nstat_msg_hdr *hdrp);
 static u_int16_t       nstat_control_end_query(nstat_control_state *state, nstat_src *last_src, boolean_t partial);
+static void            nstat_ifnet_report_ecn_stats(void);
 
 static u_int32_t       nstat_udp_watchers = 0;
 static u_int32_t       nstat_tcp_watchers = 0;
@@ -2101,6 +2101,77 @@ done:
        lck_rw_done(&ifp->if_link_status_lock);
 }
 
+static u_int64_t nstat_ifnet_last_report_time = 0;
+extern int tcp_report_stats_interval;
+
+void
+nstat_ifnet_report_ecn_stats(void)
+{
+       u_int64_t uptime, last_report_time;
+       struct nstat_sysinfo_data data;
+       struct nstat_sysinfo_ifnet_ecn_stats *st;
+       struct ifnet *ifp;
+
+       uptime = net_uptime();
+
+       if ((int)(uptime - nstat_ifnet_last_report_time) <
+           tcp_report_stats_interval)
+               return;
+
+       last_report_time = nstat_ifnet_last_report_time;
+       nstat_ifnet_last_report_time = uptime;
+       data.flags = NSTAT_SYSINFO_IFNET_ECN_STATS;
+       st = &data.u.ifnet_ecn_stats;
+
+       ifnet_head_lock_shared();
+       TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
+               if (ifp->if_ipv4_stat == NULL || ifp->if_ipv6_stat == NULL)
+                       continue;
+
+               if ((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) !=
+                   IFRF_ATTACHED)
+                       continue;
+
+               /* Limit reporting to Wifi, Ethernet and cellular. */
+               if (!(IFNET_IS_ETHERNET(ifp) || IFNET_IS_CELLULAR(ifp)))
+                       continue;
+
+               bzero(st, sizeof(*st));
+               if (IFNET_IS_CELLULAR(ifp)) {
+                       st->ifnet_type = NSTAT_IFNET_ECN_TYPE_CELLULAR;
+               } else if (IFNET_IS_WIFI(ifp)) {
+                       st->ifnet_type = NSTAT_IFNET_ECN_TYPE_WIFI;
+               } else {
+                       st->ifnet_type = NSTAT_IFNET_ECN_TYPE_ETHERNET;
+               }
+
+               /* skip if there was no update since last report */
+               if (ifp->if_ipv4_stat->timestamp <= 0 ||
+                   ifp->if_ipv4_stat->timestamp < last_report_time)
+                       goto v6;
+               st->ifnet_proto = NSTAT_IFNET_ECN_PROTO_IPV4;
+               bcopy(ifp->if_ipv4_stat, &st->ecn_stat,
+                   sizeof(st->ecn_stat));
+               nstat_sysinfo_send_data(&data);
+               bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
+
+v6:
+               /* skip if there was no update since last report */
+               if (ifp->if_ipv6_stat->timestamp <= 0 ||
+                   ifp->if_ipv6_stat->timestamp < last_report_time)
+                       continue;
+               st->ifnet_proto = NSTAT_IFNET_ECN_PROTO_IPV6;
+               bcopy(ifp->if_ipv6_stat, &st->ecn_stat,
+                   sizeof(st->ecn_stat));
+               nstat_sysinfo_send_data(&data);
+
+               /* Zero the stats in ifp */
+               bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
+       }
+       ifnet_head_done();
+
+}
+
 static errno_t
 nstat_ifnet_copy_descriptor(
        nstat_provider_cookie_t cookie,
@@ -2210,6 +2281,14 @@ nstat_sysinfo_send_data_internal(
                        nkeyvals = sizeof(struct nstat_sysinfo_tcp_stats) /
                            sizeof(u_int32_t);
                        break;
+               case NSTAT_SYSINFO_IFNET_ECN_STATS:
+                       nkeyvals = (sizeof(struct if_tcp_ecn_stat) /
+                           sizeof(u_int64_t));
+                       /* One less because we are not going to send timestamp */
+                       nkeyvals -= 1;
+                       /* Two more keys for ifnet type and proto */
+                       nkeyvals += 2;
+                       break;
                default:
                        return;
        }
@@ -2334,6 +2413,15 @@ nstat_sysinfo_send_data_internal(
                        nstat_set_keyval_scalar(&kv[i++],
                            NSTAT_SYSINFO_ECN_CONN_NOPL_CE,
                            data->u.tcp_stats.ecn_conn_nopl_ce);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_FALLBACK_SYNLOSS,
+                           data->u.tcp_stats.ecn_fallback_synloss);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_FALLBACK_REORDER,
+                           data->u.tcp_stats.ecn_fallback_reorder);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_FALLBACK_CE,
+                           data->u.tcp_stats.ecn_fallback_ce);
                        nstat_set_keyval_scalar(&kv[i++],
                            NSTAT_SYSINFO_TFO_SYN_DATA_RCV,
                            data->u.tcp_stats.tfo_syn_data_rcv);
@@ -2368,6 +2456,110 @@ nstat_sysinfo_send_data_internal(
                        VERIFY(i == nkeyvals);
                        break;
                }
+               case NSTAT_SYSINFO_IFNET_ECN_STATS:
+               {
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_TYPE,
+                           data->u.ifnet_ecn_stats.ifnet_type);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_PROTO,
+                           data->u.ifnet_ecn_stats.ifnet_proto);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_CLIENT_SETUP,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_client_setup);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_SERVER_SETUP,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_server_setup);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_CLIENT_SUCCESS,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_client_success);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_SERVER_SUCCESS,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_server_success);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_PEER_NOSUPPORT,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_peer_nosupport);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_SYN_LOST,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_syn_lost);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_SYNACK_LOST,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_synack_lost);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_RECV_CE,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_recv_ce);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_RECV_ECE,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_recv_ece);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_CONN_RECV_CE,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_conn_recv_ce);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_CONN_RECV_ECE,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_conn_recv_ece);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_CONN_PLNOCE,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_conn_plnoce);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_CONN_PLCE,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_conn_plce);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_CONN_NOPLCE,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_conn_noplce);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_FALLBACK_SYNLOSS,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_fallback_synloss);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_FALLBACK_REORDER,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_fallback_reorder);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_FALLBACK_CE,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_fallback_ce);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_ON_RTT_AVG,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_on.rtt_avg);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_ON_RTT_VAR,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_on.rtt_var);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_ON_OOPERCENT,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_on.oo_percent);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_ON_SACK_EPISODE,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_on.sack_episodes);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_ON_REORDER_PERCENT,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_on.reorder_percent);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_ON_RXMIT_PERCENT,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_on.rxmit_percent);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_ON_RXMIT_DROP,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_on.rxmit_drop);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_OFF_RTT_AVG,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_off.rtt_avg);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_OFF_RTT_VAR,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_off.rtt_var);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_OFF_OOPERCENT,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_off.oo_percent);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_OFF_SACK_EPISODE,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_off.sack_episodes);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_OFF_REORDER_PERCENT,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_off.reorder_percent);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_OFF_RXMIT_PERCENT,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_off.rxmit_percent);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_IFNET_OFF_RXMIT_DROP,
+                           data->u.ifnet_ecn_stats.ecn_stat.ecn_off.rxmit_drop);
+                       VERIFY(i == nkeyvals);
+                       break;
+               }
        }
        
        if (syscnt != NULL)
@@ -2407,6 +2599,7 @@ nstat_sysinfo_generate_report(void)
 {
        mbuf_report_peak_usage();
        tcp_report_stats();
+       nstat_ifnet_report_ecn_stats();
 }
 
 #pragma mark -- Kernel Control Socket --
index a6bcec06b8c7fb902d3dee068875754522732568..1d479d3ab6d8bd5279b634a7a45676e81d253a0a 100644 (file)
@@ -136,6 +136,43 @@ enum
        ,NSTAT_SYSINFO_TFO_SYN_DATA_ACKED       = 41
        ,NSTAT_SYSINFO_TFO_SYN_LOSS             = 42
        ,NSTAT_SYSINFO_TFO_BLACKHOLE            = 43
+       ,NSTAT_SYSINFO_ECN_FALLBACK_SYNLOSS     = 44
+       ,NSTAT_SYSINFO_ECN_FALLBACK_REORDER     = 45
+       ,NSTAT_SYSINFO_ECN_FALLBACK_CE          = 46
+       ,NSTAT_SYSINFO_ECN_IFNET_TYPE           = 47
+       ,NSTAT_SYSINFO_ECN_IFNET_PROTO          = 48
+       ,NSTAT_SYSINFO_ECN_IFNET_CLIENT_SETUP   = 49
+       ,NSTAT_SYSINFO_ECN_IFNET_SERVER_SETUP   = 50
+       ,NSTAT_SYSINFO_ECN_IFNET_CLIENT_SUCCESS = 51
+       ,NSTAT_SYSINFO_ECN_IFNET_SERVER_SUCCESS = 52
+       ,NSTAT_SYSINFO_ECN_IFNET_PEER_NOSUPPORT = 53
+       ,NSTAT_SYSINFO_ECN_IFNET_SYN_LOST       = 54
+       ,NSTAT_SYSINFO_ECN_IFNET_SYNACK_LOST    = 55
+       ,NSTAT_SYSINFO_ECN_IFNET_RECV_CE        = 56
+       ,NSTAT_SYSINFO_ECN_IFNET_RECV_ECE       = 57
+       ,NSTAT_SYSINFO_ECN_IFNET_SENT_ECE       = 58
+       ,NSTAT_SYSINFO_ECN_IFNET_CONN_RECV_CE   = 59
+       ,NSTAT_SYSINFO_ECN_IFNET_CONN_RECV_ECE  = 60
+       ,NSTAT_SYSINFO_ECN_IFNET_CONN_PLNOCE    = 61
+       ,NSTAT_SYSINFO_ECN_IFNET_CONN_PLCE      = 62
+       ,NSTAT_SYSINFO_ECN_IFNET_CONN_NOPLCE    = 63
+       ,NSTAT_SYSINFO_ECN_IFNET_FALLBACK_SYNLOSS = 64
+       ,NSTAT_SYSINFO_ECN_IFNET_FALLBACK_REORDER = 65
+       ,NSTAT_SYSINFO_ECN_IFNET_FALLBACK_CE    = 66
+       ,NSTAT_SYSINFO_ECN_IFNET_ON_RTT_AVG     = 67
+       ,NSTAT_SYSINFO_ECN_IFNET_ON_RTT_VAR     = 68
+       ,NSTAT_SYSINFO_ECN_IFNET_ON_OOPERCENT   = 69
+       ,NSTAT_SYSINFO_ECN_IFNET_ON_SACK_EPISODE = 70
+       ,NSTAT_SYSINFO_ECN_IFNET_ON_REORDER_PERCENT = 71
+       ,NSTAT_SYSINFO_ECN_IFNET_ON_RXMIT_PERCENT = 72
+       ,NSTAT_SYSINFO_ECN_IFNET_ON_RXMIT_DROP  = 73
+       ,NSTAT_SYSINFO_ECN_IFNET_OFF_RTT_AVG    = 74
+       ,NSTAT_SYSINFO_ECN_IFNET_OFF_RTT_VAR    = 75
+       ,NSTAT_SYSINFO_ECN_IFNET_OFF_OOPERCENT  = 76
+       ,NSTAT_SYSINFO_ECN_IFNET_OFF_SACK_EPISODE = 77
+       ,NSTAT_SYSINFO_ECN_IFNET_OFF_REORDER_PERCENT = 78
+       ,NSTAT_SYSINFO_ECN_IFNET_OFF_RXMIT_PERCENT = 79
+       ,NSTAT_SYSINFO_ECN_IFNET_OFF_RXMIT_DROP = 80
 };
 
 #pragma mark -- Network Statistics Providers --
@@ -463,6 +500,7 @@ typedef struct nstat_sysinfo_add_param
 
 #define        NSTAT_SYSINFO_MBUF_STATS        0x0001
 #define        NSTAT_SYSINFO_TCP_STATS         0x0002  
+#define NSTAT_SYSINFO_IFNET_ECN_STATS  0x0003
 
 #pragma mark -- Network Statistics User Client --
 
@@ -703,6 +741,9 @@ typedef struct nstat_sysinfo_tcp_stats
        u_int32_t               ecn_conn_plnoce; /* Number of connections using ECN seen packet loss but never received CE */
        u_int32_t               ecn_conn_pl_ce; /* Number of connections using ECN seen packet loss and CE */
        u_int32_t               ecn_conn_nopl_ce; /* Number of connections using ECN with no packet loss but received CE */
+       u_int32_t               ecn_fallback_synloss; /* Number of times we did fall back due to SYN-Loss */
+       u_int32_t               ecn_fallback_reorder; /* Number of times we fallback because we detected the PAWS-issue */
+       u_int32_t               ecn_fallback_ce; /* Number of times we fallback because we received too many CEs */
        u_int32_t               tfo_syn_data_rcv;       /* Number of SYN+data received with valid cookie */
        u_int32_t               tfo_cookie_req_rcv;/* Number of TFO cookie-requests received */
        u_int32_t               tfo_cookie_sent;        /* Number of TFO-cookies offered to the client */
@@ -715,12 +756,30 @@ typedef struct nstat_sysinfo_tcp_stats
        u_int32_t               tfo_blackhole;  /* Number of times SYN+TFO has been lost and we fallback */
 } nstat_sysinfo_tcp_stats;
 
+enum {
+       NSTAT_IFNET_ECN_PROTO_IPV4 = 1
+       ,NSTAT_IFNET_ECN_PROTO_IPV6
+};
+
+enum {
+       NSTAT_IFNET_ECN_TYPE_CELLULAR = 1
+       ,NSTAT_IFNET_ECN_TYPE_WIFI
+       ,NSTAT_IFNET_ECN_TYPE_ETHERNET
+};
+
+typedef struct nstat_sysinfo_ifnet_ecn_stats {
+       u_int32_t                       ifnet_proto;
+       u_int32_t                       ifnet_type;
+       struct if_tcp_ecn_stat          ecn_stat;
+} nstat_sysinfo_ifnet_ecn_stats;
+
 typedef struct nstat_sysinfo_data
 {
        u_int32_t               flags;
        union {
                nstat_sysinfo_mbuf_stats mb_stats;
                nstat_sysinfo_tcp_stats tcp_stats;
+               nstat_sysinfo_ifnet_ecn_stats ifnet_ecn_stats;
        } u;
 } nstat_sysinfo_data;
 
index 9dec8f7604d3a1eb1700026e7ff082fe9436b275..75b52b626036c1400ef278c5fcadc68bc3b953d4 100644 (file)
@@ -604,24 +604,23 @@ pfi_update_status(const char *name, struct pf_status *pfs)
        if (p == NULL)
                return;
 
-       if (pfs) {
+       if (pfs != NULL) {
                bzero(pfs->pcounters, sizeof (pfs->pcounters));
                bzero(pfs->bcounters, sizeof (pfs->bcounters));
-       }
-       /* just clear statistics */
-       if (pfs == NULL) {
+               for (i = 0; i < 2; i++)
+                       for (j = 0; j < 2; j++)
+                               for (k = 0; k < 2; k++) {
+                                       pfs->pcounters[i][j][k] +=
+                                               p->pfik_packets[i][j][k];
+                                       pfs->bcounters[i][j] +=
+                                               p->pfik_bytes[i][j][k];
+                               }
+       } else {
+               /* just clear statistics */
                bzero(p->pfik_packets, sizeof (p->pfik_packets));
                bzero(p->pfik_bytes, sizeof (p->pfik_bytes));
                p->pfik_tzero = pf_calendar_time_second();
        }
-       for (i = 0; i < 2; i++)
-               for (j = 0; j < 2; j++)
-                       for (k = 0; k < 2; k++) {
-                               pfs->pcounters[i][j][k] +=
-                                   p->pfik_packets[i][j][k];
-                               pfs->bcounters[i][j] +=
-                                   p->pfik_bytes[i][j][k];
-                       }
 }
 
 int
index df11307022f9300888e003242016e4a81237d14d..0cd7a22874af3474c83ffa75c1c2c14518c92e5b 100644 (file)
@@ -239,8 +239,9 @@ in_gif_input(m, off)
        struct ifnet *gifp = NULL;
        struct ip *ip;
        int af, proto;
-       u_int8_t otos;
+       u_int8_t otos, old_tos;
        int egress_success = 0;
+       int sum;
 
        ip = mtod(m, struct ip *);
        proto = ip->ip_p;
@@ -268,9 +269,17 @@ in_gif_input(m, off)
                                return;
                }
                ip = mtod(m, struct ip *);
-               if (gifp->if_flags & IFF_LINK1)
+               if (gifp->if_flags & IFF_LINK1) {
+                       old_tos = ip->ip_tos;
                        egress_success = ip_ecn_egress(ECN_NORMAL, &otos, &ip->ip_tos);
-               else
+                       if (old_tos != ip->ip_tos) {
+                           sum = ~ntohs(ip->ip_sum) & 0xffff;
+                           sum += (~otos & 0xffff) + ip->ip_tos;
+                           sum = (sum >> 16) + (sum & 0xffff);
+                           sum += (sum >> 16);  /* add carry */
+                           ip->ip_sum = htons(~sum & 0xffff);
+                       }
+               } else
                        egress_success = ip_ecn_egress(ECN_NOCARE, &otos, &ip->ip_tos);
                break;
            }
index 0cbd238ccdc75ea41cc9d45fbf00d508c50cf406..dce4177d444c3a96395896e4f6ac2ab968954123 100644 (file)
@@ -714,11 +714,12 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
                return (EINVAL);
        if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT)))
                wild = 1;
-       socket_unlock(so, 0); /* keep reference on socket */
-       lck_rw_lock_exclusive(pcbinfo->ipi_lock);
 
        bzero(&laddr, sizeof(laddr));
 
+       socket_unlock(so, 0); /* keep reference on socket */
+       lck_rw_lock_exclusive(pcbinfo->ipi_lock);
+
        if (nam != NULL) {
 
                if (nam->sa_len != sizeof (struct sockaddr_in)) {
@@ -944,6 +945,17 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
                }
        }
        socket_lock(so, 0);
+
+       /*
+        * We unlocked socket's protocol lock for a long time.
+        * The socket might have been dropped/defuncted.
+        * Checking if world has changed since.
+        */
+       if (inp->inp_state == INPCB_STATE_DEAD) {
+               lck_rw_done(pcbinfo->ipi_lock);
+               return (ECONNABORTED);
+       }
+
        if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
                lck_rw_done(pcbinfo->ipi_lock);
                return (EINVAL);
@@ -2039,7 +2051,13 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
 }
 
 /*
- * Insert PCB onto various hash lists.
+ * @brief      Insert PCB onto various hash lists.
+ *
+ * @param      inp Pointer to internet protocol control block
+ * @param      locked  Implies if ipi_lock (protecting pcb list)
+ *             is already locked or not.
+ *
+ * @return     int error on failure and 0 on success
  */
 int
 in_pcbinshash(struct inpcb *inp, int locked)
@@ -2059,17 +2077,23 @@ in_pcbinshash(struct inpcb *inp, int locked)
                        socket_unlock(inp->inp_socket, 0);
                        lck_rw_lock_exclusive(pcbinfo->ipi_lock);
                        socket_lock(inp->inp_socket, 0);
-                       if (inp->inp_state == INPCB_STATE_DEAD) {
-                               /*
-                                * The socket got dropped when
-                                * it was unlocked
-                                */
-                               lck_rw_done(pcbinfo->ipi_lock);
-                               return (ECONNABORTED);
-                       }
                }
        }
 
+       /*
+        * This routine or its caller may have given up
+        * socket's protocol lock briefly.
+        * During that time the socket may have been dropped.
+        * Safe-guarding against that.
+        */
+       if (inp->inp_state == INPCB_STATE_DEAD) {
+               if (!locked) {
+                       lck_rw_done(pcbinfo->ipi_lock);
+               }
+               return (ECONNABORTED);
+       }
+
+
 #if INET6
        if (inp->inp_vflag & INP_IPV6)
                hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
@@ -2093,8 +2117,6 @@ in_pcbinshash(struct inpcb *inp, int locked)
                        break;
        }
 
-       VERIFY(inp->inp_state != INPCB_STATE_DEAD);
-
        /*
         * If none exists, malloc one and tack it on.
         */
index 7ce89307aa027fb252aec700cf74af24f192fedd..8616581768fabce45d9833b518530cabe019ec84 100644 (file)
@@ -498,6 +498,15 @@ struct     xinpgen {
 #if IPSEC
 #define        in6p_sp         inp_sp
 #endif /* IPSEC */
+#define INP_INC_IFNET_STAT(_inp_, _stat_) { \
+       if ((_inp_)->inp_last_outifp != NULL) { \
+               if ((_inp_)->inp_vflag & INP_IPV6) { \
+                       (_inp_)->inp_last_outifp->if_ipv6_stat->_stat_++;\
+               } else { \
+                       (_inp_)->inp_last_outifp->if_ipv4_stat->_stat_++;\
+               }\
+       }\
+}
 
 struct inpcbport {
        LIST_ENTRY(inpcbport) phd_hash;
index 3e0facc1c74cd7863b6142a02a5e6a4efa1b0a03..2a00f895f21969270f3904977bd685af7446f273 100644 (file)
@@ -464,6 +464,13 @@ inpcb_get_ports_used(uint32_t ifindex, int protocol, uint32_t flags,
                if (SOCK_PROTO(inp->inp_socket) == IPPROTO_TCP) {
                        struct  tcpcb *tp = sototcpcb(inp->inp_socket);
 
+                       /*
+                        * Workaround race where inp_ppcb is NULL during
+                        * socket initialization
+                        */
+                       if (tp == NULL)
+                               continue;
+
                        switch (tp->t_state) {
                                case TCPS_CLOSED:
                                        continue;
index 38c7900267e75b4b97ad9dd47f736b0527a0d837..70ce0cfb8576bf363bf2459afd5335cec1d71b68 100644 (file)
@@ -117,13 +117,9 @@ ip_ecn_egress(mode, outer, inner)
        /* Process ECN for both normal and compatibility modes */
        case ECN_NORMAL:
        case ECN_COMPATIBILITY:
-               if ((*outer & IPTOS_ECN_MASK) == IPTOS_ECN_CE) {
-                       if ((*inner & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) {
-                               /* Drop */
-                               return (0);
-                       } else {
-                               *inner |= IPTOS_ECN_CE;
-                       }
+               if (((*outer & IPTOS_ECN_MASK) == IPTOS_ECN_CE) &&
+                   ((*inner & IPTOS_ECN_MASK) != IPTOS_ECN_NOTECT)) {
+                       *inner |= IPTOS_ECN_CE;
                } else if ((*outer & IPTOS_ECN_MASK) == IPTOS_ECN_ECT1 &&
                                   (*inner & IPTOS_ECN_MASK) == IPTOS_ECN_ECT0) {
                        *inner = *outer;
index 134a615355dde9a55db2005336b7f747cf385ec0..9dcb06620cc4bef6e74bfc1b26269f59cf05b140 100644 (file)
@@ -239,10 +239,15 @@ struct tcphdr {
  * Enable message delivery on a socket, this feature is currently unsupported and
  * is subjected to change in future.
  */
-#define        TCP_ENABLE_MSGS 0x206
+#define        TCP_ENABLE_MSGS                 0x206
 #define        TCP_ADAPTIVE_WRITE_TIMEOUT      0x207   /* Write timeout used as a multiple of RTT */
-#define        TCP_NOTIMEWAIT          0x208   /* Avoid going into time-wait */
+#define        TCP_NOTIMEWAIT                  0x208   /* Avoid going into time-wait */
 #define        TCP_DISABLE_BLACKHOLE_DETECTION 0x209   /* disable PMTU blackhole detection */
+#define        TCP_ECN_MODE                    0x210   /* fine grain control for A/B testing */
+
+#define        ECN_MODE_DEFAULT        0x0     /* per interface or system wide default */
+#define        ECN_MODE_ENABLE         0x1     /* force enable ECN on connection */
+#define        ECN_MODE_DISABLE        0x2     /* force disable ECN on connection */
 
 /*
  * The TCP_INFO socket option is a private API and is subject to change
@@ -339,6 +344,26 @@ struct tcp_info {
                tcpi_tfo_cookie_req_rcv:1, /* Server received cookie-request */
                tcpi_tfo_cookie_sent:1, /* Server announced cookie */
                tcpi_tfo_cookie_invalid:1; /* Server received an invalid cookie */
+
+       u_int16_t       tcpi_ecn_client_setup:1,        /* Attempted ECN setup from client side */
+                       tcpi_ecn_server_setup:1,        /* Attempted ECN setup from server side */
+                       tcpi_ecn_success:1,             /* peer negotiated ECN */
+                       tcpi_ecn_lost_syn:1,            /* Lost SYN with ECN setup */
+                       tcpi_ecn_lost_synack:1,         /* Lost SYN-ACK with ECN setup */
+                       tcpi_local_peer:1,              /* Local to the host or the subnet */
+                       tcpi_if_cell:1,                 /* Interface is cellular */
+                       tcpi_if_wifi:1;                 /* Interface is WiFi */
+
+       u_int32_t       tcpi_ecn_recv_ce;       /* Packets received with CE */
+       u_int32_t       tcpi_ecn_recv_cwr;      /* Packets received with CWR */
+
+       u_int32_t       tcpi_rcvoopack;         /* out-of-order packets received */
+       u_int32_t       tcpi_pawsdrop;          /* segments dropped due to PAWS */
+       u_int32_t       tcpi_sack_recovery_episode; /* SACK recovery episodes */
+       u_int32_t       tcpi_reordered_pkts;    /* packets reorderd */
+       u_int32_t       tcpi_dsack_sent;        /* Sent DSACK notification */
+       u_int32_t       tcpi_dsack_recvd;       /* Received a valid DSACK option */
+       u_int32_t       tcpi_flowhash;          /* Unique id for the connection */
 };
 
 struct tcp_measure_bw_burst {
index cb3b86d0401d6f63e0c750220c647c12ef3f57df..b872c7d321c0854a58ca46487ec9bf7769057551 100644 (file)
@@ -56,18 +56,21 @@ struct tcp_heuristic {
 
        struct tcp_heuristic_key        th_key;
 
-       /*
-        * If tfo_cookie_loss is changed to a smaller type, it might be worth
-        * checking for integer-overflow in tcp_cache_tfo_inc_loss
-        */
-       u_int32_t       th_tfo_cookie_loss; /* The number of times a SYN+cookie has been lost */
+       char            th_val_start[0]; /* Marker for memsetting to 0 */
+
+       u_int8_t        th_tfo_cookie_loss; /* The number of times a SYN+cookie has been lost */
+       u_int8_t        th_ecn_loss; /* The number of times a SYN+ecn has been lost */
+       u_int8_t        th_ecn_aggressive; /* The number of times we did an aggressive fallback */
        u_int32_t       th_tfo_fallback_trials; /* Number of times we did not try out TFO due to SYN-loss */
        u_int32_t       th_tfo_cookie_backoff; /* Time until when we should not try out TFO */
+       u_int32_t       th_ecn_backoff; /* Time until when we should not try out ECN */
 
-       u_int8_t        th_tfo_in_backoff:1, /* Are we doing TFO due to the backoff timer? */
-                       th_tfo_aggressive_fallback:1, /* Agressive fallback due to nasty middlebox */
+       u_int8_t        th_tfo_in_backoff:1, /* Are we avoiding TFO due to the backoff timer? */
+                       th_tfo_aggressive_fallback:1, /* Aggressive fallback due to nasty middlebox */
                        th_tfo_snd_middlebox_supp:1, /* We are sure that the network supports TFO in upstream direction */
                        th_tfo_rcv_middlebox_supp:1; /* We are sure that the network supports TFO in downstream direction*/
+
+       char            th_val_end[0]; /* Marker for memsetting to 0 */
 };
 
 struct tcp_heuristics_head {
@@ -131,8 +134,9 @@ static lck_attr_t   *tcp_heuristic_mtx_attr;
 static lck_grp_t       *tcp_heuristic_mtx_grp;
 static lck_grp_attr_t  *tcp_heuristic_mtx_grp_attr;
 
-/* Number of SYN-losses we accept */
-#define TFO_MAX_COOKIE_LOSS    2
+int    tcp_ecn_timeout = 60;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, ecn_timeout, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &tcp_ecn_timeout, 0, "Initial minutes to wait before re-trying ECN");
 
 /*
  * Round up to next higher power-of 2.  See "Bit Twiddling Hacks".
@@ -446,13 +450,8 @@ static struct tcp_heuristic *tcp_getheuristic_with_lock(struct tcpcb *tp,
                        tpheur = oldest_heur;
 
                        /* We recycle - set everything to 0 */
-                       tpheur->th_tfo_cookie_loss = 0;
-                       tpheur->th_tfo_fallback_trials = 0;
-                       tpheur->th_tfo_cookie_backoff = 0;
-                       tpheur->th_tfo_in_backoff = 0;
-                       tpheur->th_tfo_aggressive_fallback = 0;
-                       tpheur->th_tfo_snd_middlebox_supp = 0;
-                       tpheur->th_tfo_rcv_middlebox_supp = 0;
+                       bzero(tpheur->th_val_start,
+                             tpheur->th_val_end - tpheur->th_val_start);
                } else {
                        /* Create a new heuristic and add it to the list */
                        tpheur = _MALLOC(sizeof(struct tcp_heuristic), M_TEMP,
@@ -463,6 +462,13 @@ static struct tcp_heuristic *tcp_getheuristic_with_lock(struct tcpcb *tp,
                        SLIST_INSERT_HEAD(&head->tcp_heuristics, tpheur, list);
                }
 
+               /*
+                * Set to tcp_now, to make sure it won't be > than tcp_now in the
+                * near future.
+                */
+               tpheur->th_ecn_backoff = tcp_now;
+               tpheur->th_tfo_cookie_backoff = tcp_now;
+
                memcpy(&tpheur->th_key, &key, sizeof(key));
        }
 
@@ -523,7 +529,7 @@ void tcp_heuristic_tfo_snd_good(struct tcpcb *tp)
        tp->t_tfo_flags |= TFO_F_NO_SNDPROBING;
 }
 
-void tcp_heuristic_tfo_inc_loss(struct tcpcb *tp)
+void tcp_heuristic_inc_loss(struct tcpcb *tp, int tfo, int ecn)
 {
        struct tcp_heuristics_head *head;
        struct tcp_heuristic *tpheur;
@@ -532,8 +538,20 @@ void tcp_heuristic_tfo_inc_loss(struct tcpcb *tp)
        if (tpheur == NULL)
                return;
 
-       /* Potential integer overflow, but tfo_cookie_loss is 32-bits */
-       tpheur->th_tfo_cookie_loss++;
+       /* Limit to 9 to prevent integer-overflow during exponential backoff */
+       if (tfo && tpheur->th_tfo_cookie_loss < 9)
+               tpheur->th_tfo_cookie_loss++;
+
+       if (ecn && tpheur->th_ecn_loss < 9) {
+               tpheur->th_ecn_loss++;
+               if (tpheur->th_ecn_loss >= ECN_MAX_SYN_LOSS) {
+                       tcpstat.tcps_ecn_fallback_synloss++;
+                       INP_INC_IFNET_STAT(tp->t_inpcb, ecn_fallback_synloss);
+                       tpheur->th_ecn_backoff = tcp_now +
+                           ((tcp_ecn_timeout * 60 * TCP_RETRANSHZ)
+                           << (tpheur->th_ecn_loss - ECN_MAX_SYN_LOSS));
+               }
+       }
 
        tcp_heuristic_unlock(head);
 }
@@ -552,7 +570,30 @@ void tcp_heuristic_tfo_middlebox(struct tcpcb *tp)
        tcp_heuristic_unlock(head);
 }
 
-void tcp_heuristic_tfo_reset_loss(struct tcpcb *tp)
+void tcp_heuristic_ecn_aggressive(struct tcpcb *tp)
+{
+       struct tcp_heuristics_head *head;
+       struct tcp_heuristic *tpheur;
+
+       tpheur = tcp_getheuristic_with_lock(tp, 1, &head);
+       if (tpheur == NULL)
+               return;
+
+       /* Must be done before, otherwise we will start off with expo-backoff */
+       tpheur->th_ecn_backoff = tcp_now +
+           ((tcp_ecn_timeout * 60 * TCP_RETRANSHZ) << (tpheur->th_ecn_aggressive));
+
+       /*
+        * Ugly way to prevent integer overflow... limit to 9 to prevent in
+        * overflow during exp. backoff.
+        */
+       if (tpheur->th_ecn_aggressive < 9)
+               tpheur->th_ecn_aggressive++;
+
+       tcp_heuristic_unlock(head);
+}
+
+void tcp_heuristic_reset_loss(struct tcpcb *tp, int tfo, int ecn)
 {
        struct tcp_heuristics_head *head;
        struct tcp_heuristic *tpheur;
@@ -566,8 +607,11 @@ void tcp_heuristic_tfo_reset_loss(struct tcpcb *tp)
        if (tpheur == NULL)
                return;
 
-       tpheur->th_tfo_cookie_loss = 0;
-       tpheur->th_tfo_aggressive_fallback = 0;
+       if (tfo)
+               tpheur->th_tfo_cookie_loss = 0;
+
+       if (ecn)
+               tpheur->th_ecn_loss = 0;
 
        tcp_heuristic_unlock(head);
 }
@@ -634,6 +678,25 @@ boolean_t tcp_heuristic_do_tfo(struct tcpcb *tp)
        return (true);
 }
 
+boolean_t tcp_heuristic_do_ecn(struct tcpcb *tp)
+{
+       struct tcp_heuristics_head *head;
+       struct tcp_heuristic *tpheur;
+       boolean_t ret = true;
+
+       /* Get the tcp-heuristic. */
+       tpheur = tcp_getheuristic_with_lock(tp, 0, &head);
+       if (tpheur == NULL)
+               return ret;
+
+       if (TSTMP_GT(tpheur->th_ecn_backoff, tcp_now))
+               ret = false;
+
+       tcp_heuristic_unlock(head);
+
+       return (ret);
+}
+
 static void sysctl_cleartfocache(void)
 {
        int i;
index 601aec807c4d66f0f61379fe2a5d3986d3b8a63d..4408fd5ffb3835cbd62b17e07a712d5c12831962 100644 (file)
 #include <netinet/tcp_var.h>
 #include <netinet/in.h>
 
+/* Number of SYN-losses we accept */
+#define        TFO_MAX_COOKIE_LOSS     2
+#define        ECN_MAX_SYN_LOSS        2
+
+#define ECN_MIN_CE_PROBES      10 /* Probes are basically the number of incoming packets */
+#define ECN_MAX_CE_RATIO       7 /* Ratio is the maximum number of CE-packets we accept per incoming "probe" */
+
 extern void tcp_cache_set_cookie(struct tcpcb *tp, u_char *cookie, u_int8_t len);
 extern int tcp_cache_get_cookie(struct tcpcb *tp, u_char *cookie, u_int8_t *len);
 extern unsigned int tcp_cache_get_cookie_len(struct tcpcb *tp);
 
-extern void tcp_heuristic_tfo_inc_loss(struct tcpcb *tp);
+extern void tcp_heuristic_inc_loss(struct tcpcb *tp, int tfo, int ecn);
 extern void tcp_heuristic_tfo_snd_good(struct tcpcb *tp);
 extern void tcp_heuristic_tfo_rcv_good(struct tcpcb *tp);
 extern void tcp_heuristic_tfo_middlebox(struct tcpcb *tp);
-extern void tcp_heuristic_tfo_reset_loss(struct tcpcb *tp);
+extern void tcp_heuristic_ecn_aggressive(struct tcpcb *tp);
+extern void tcp_heuristic_reset_loss(struct tcpcb *tp, int tfo, int ecn);
 extern void tcp_heuristic_tfo_success(struct tcpcb *tp);
 extern boolean_t tcp_heuristic_do_tfo(struct tcpcb *tp);
+extern boolean_t tcp_heuristic_do_ecn(struct tcpcb *tp);
 
 extern void tcp_cache_init(void);
 
index 8f2a92cc8a4b197c7365a4e9a22fbf6c26738e03..1d65c4355d014eef26fc05f41964f588c56518e5 100644 (file)
@@ -707,6 +707,7 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m,
                        th->th_seq += i;
                }
        }
+       tp->t_rcvoopack++;
        tcpstat.tcps_rcvoopack++;
        tcpstat.tcps_rcvoobyte += *tlenp;
        if (nstat_collect) {
@@ -1649,7 +1650,7 @@ tcp_tfo_synack(tp, to)
 
                /*
                 * If this happens, things have gone terribly wrong. len should
-                * have been check in tcp_dooptions.
+                * have been checked in tcp_dooptions.
                 */
                VERIFY(len <= TFO_COOKIE_LEN_MAX);
 
@@ -1668,9 +1669,9 @@ tcp_tfo_synack(tp, to)
                 * backing of TFO-cookie requests.
                 */
                if (tp->t_tfo_flags & TFO_F_SYN_LOSS)
-                       tcp_heuristic_tfo_inc_loss(tp);
+                       tcp_heuristic_inc_loss(tp, 1, 0);
                else
-                       tcp_heuristic_tfo_reset_loss(tp);
+                       tcp_heuristic_reset_loss(tp, 1, 0);
        }
 }
 
@@ -2546,18 +2547,21 @@ findpcb:
            TCP_ECN_ENABLED(tp) && tlen > 0 &&
            SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
            SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) {
+               tp->t_ecn_recv_ce++;
                tcpstat.tcps_ecn_recv_ce++;
+               INP_INC_IFNET_STAT(inp, ecn_recv_ce);
                /* Mark this connection as it received CE from network */
                tp->ecn_flags |= TE_RECV_ECN_CE;
                tp->ecn_flags |= TE_SENDECE;
        }
-       
+
        /*
         * Clear TE_SENDECE if TH_CWR is set. This is harmless, so we don't
         * bother doing extensive checks for state and whatnot.
         */
        if (thflags & TH_CWR) {
                tp->ecn_flags &= ~TE_SENDECE;
+               tp->t_ecn_recv_cwr++;
        }
 
        /* 
@@ -2573,6 +2577,30 @@ findpcb:
                CLEAR_IAJ_STATE(tp);
        }
 
+       if (ip_ecn == IPTOS_ECN_CE && tp->t_state == TCPS_ESTABLISHED &&
+           !TCP_ECN_ENABLED(tp) && !(tp->ecn_flags & TE_CEHEURI_SET)) {
+               tcpstat.tcps_ecn_fallback_ce++;
+               tcp_heuristic_ecn_aggressive(tp);
+               tp->ecn_flags |= TE_CEHEURI_SET;
+       }
+
+       if (tp->t_state == TCPS_ESTABLISHED && TCP_ECN_ENABLED(tp) &&
+           ip_ecn == IPTOS_ECN_CE && !(tp->ecn_flags & TE_CEHEURI_SET)) {
+               if (inp->inp_stat->rxpackets < ECN_MIN_CE_PROBES) {
+                       tp->t_ecn_recv_ce_pkt++;
+               } else if (tp->t_ecn_recv_ce_pkt > ECN_MAX_CE_RATIO) {
+                       tcpstat.tcps_ecn_fallback_ce++;
+                       tcp_heuristic_ecn_aggressive(tp);
+                       tp->ecn_flags |= TE_CEHEURI_SET;
+                       INP_INC_IFNET_STAT(inp,ecn_fallback_ce);
+               } else {
+                       /* We tracked the first ECN_MIN_CE_PROBES segments, we
+                        * now know that the path is good.
+                        */
+                       tp->ecn_flags |= TE_CEHEURI_SET;
+               }
+       }
+
        /* 
         * Try to determine if we are receiving a packet after a long time.
         * Use our own approximation of idletime to roughly measure remote 
@@ -2711,7 +2739,7 @@ findpcb:
         * be TH_NEEDSYN.
         */
        if (tp->t_state == TCPS_ESTABLISHED &&
-           (thflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK|TH_ECE)) == TH_ACK &&
+           (thflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK|TH_ECE|TH_CWR)) == TH_ACK &&
            ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&
            ((to.to_flags & TOF_TS) == 0 ||
             TSTMP_GEQ(to.to_tsval, tp->ts_recent)) &&
@@ -2731,11 +2759,6 @@ findpcb:
                        tp->ts_recent = to.to_tsval;
                }
 
-               /* Force acknowledgment if we received a FIN */
-
-               if (thflags & TH_FIN)
-                       tp->t_flags |= TF_ACKNOW;
-
                if (tlen == 0) {
                        if (SEQ_GT(th->th_ack, tp->snd_una) &&
                            SEQ_LEQ(th->th_ack, tp->snd_max) &&
@@ -3189,12 +3212,20 @@ findpcb:
                        if ((thflags & (TH_ECE | TH_CWR)) == (TH_ECE)) {
                                /* ECN-setup SYN-ACK */
                                tp->ecn_flags |= TE_SETUPRECEIVED;
-                               if (TCP_ECN_ENABLED(tp))
+                               if (TCP_ECN_ENABLED(tp)) {
+                                       tcp_heuristic_reset_loss(tp, 0, 1);
                                        tcpstat.tcps_ecn_client_success++;
+                               }
                        } else {
                                if (tp->ecn_flags & TE_SETUPSENT &&
-                                   tp->t_rxtshift == 0)
+                                   tp->t_rxtshift == 0) {
+                                       tcp_heuristic_reset_loss(tp, 0, 1);
                                        tcpstat.tcps_ecn_not_supported++;
+                               }
+                               if (tp->ecn_flags & TE_SETUPSENT &&
+                                   tp->t_rxtshift > 0)
+                                       tcp_heuristic_inc_loss(tp, 0, 1);
+
                                /* non-ECN-setup SYN-ACK */
                                tp->ecn_flags &= ~TE_SENDIPECT;
                        }
@@ -3506,7 +3537,24 @@ trimthenstep6:
                } else {
                        tcpstat.tcps_rcvduppack++;
                        tcpstat.tcps_rcvdupbyte += tlen;
+                       tp->t_pawsdrop++;
                        tcpstat.tcps_pawsdrop++;
+
+                       /*
+                        * PAWS-drop when ECN is being used? That indicates
+                        * that ECT-marked packets take a different path, with
+                        * different congestion-characteristics.
+                        *
+                        * Only fallback when we did send less than 2GB as PAWS
+                        * really has no reason to kick in earlier.
+                        */
+                       if (TCP_ECN_ENABLED(tp) &&
+                           inp->inp_stat->rxbytes < 2147483648) {
+                               INP_INC_IFNET_STAT(inp, ecn_fallback_reorder);
+                               tcpstat.tcps_ecn_fallback_reorder++;
+                               tcp_heuristic_ecn_aggressive(tp);
+                       }
+
                        if (nstat_collect) {
                                nstat_route_rx(tp->t_inpcb->inp_route.ro_rt, 
                                        1, tlen, NSTAT_RX_FLAG_DUPLICATE);
@@ -4139,6 +4187,7 @@ process_dupack:
 
                                        if (SACK_ENABLED(tp)) {
                                                tcpstat.tcps_sack_recovery_episode++;
+                                               tp->t_sack_recovery_episode++;
                                                tp->sack_newdata = tp->snd_nxt;
                                                tp->snd_cwnd = tp->t_maxseg;
                                                tp->t_flagsext &=
@@ -4331,6 +4380,7 @@ process_ACK:
                                 * ECE atleast once
                                 */
                                tp->ecn_flags |= TE_RECV_ECN_ECE;
+                               INP_INC_IFNET_STAT(inp, ecn_recv_ece);
                                tcpstat.tcps_ecn_recv_ece++;
                                tcp_ccdbg_trace(tp, th, TCP_CC_ECN_RCVD);
                        }
@@ -4736,8 +4786,7 @@ dodata:
                                        tp->t_flags |= TF_DELACK;
                                        tp->t_timer[TCPT_DELACK] = OFFSET_FROM_START(tp, tcp_delack);
                                }
-                       }
-                       else {
+                       } else {
                                tp->t_flags |= TF_ACKNOW;
                        }
                        tp->rcv_nxt++;
index e348fadde93e7ef07cb1d2e952c14a39d704f06f..86d4a71f308eabae174c7631e63647af1be5dab4 100644 (file)
@@ -154,15 +154,75 @@ int       tcp_do_tso = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_RW | CTLFLAG_LOCKED,
        &tcp_do_tso, 0, "Enable TCP Segmentation Offload");
 
+static int
+sysctl_change_ecn_setting SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+       int i, err = 0, changed = 0;
+       struct ifnet *ifp;
+
+       err = sysctl_io_number(req, tcp_ecn_outbound, sizeof(int32_t),
+           &i, &changed);
+       if (err != 0 || req->newptr == USER_ADDR_NULL)
+               return(err);
+
+       if (changed) {
+               if ((tcp_ecn_outbound == 0 || tcp_ecn_outbound == 1) &&
+                   (i == 0 || i == 1)) {
+                       tcp_ecn_outbound = i;
+                       return(err);
+               }
+               if (tcp_ecn_outbound == 2 && (i == 0 || i == 1)) {
+                       /*
+                        * Reset ECN enable flags on non-cellular
+                        * interfaces so that the system default will take
+                        * over
+                        */
+                       ifnet_head_lock_shared();
+                       TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
+                               if (!IFNET_IS_CELLULAR(ifp)) {
+                                       ifnet_lock_exclusive(ifp);
+                                       ifp->if_eflags &= ~IFEF_ECN_DISABLE;
+                                       ifp->if_eflags &= ~IFEF_ECN_ENABLE;
+                                       ifnet_lock_done(ifp);
+                               }
+                       }
+                       ifnet_head_done();
+               } else {
+                       /*
+                        * Set ECN enable flags on non-cellular
+                        * interfaces
+                        */
+                       ifnet_head_lock_shared();
+                       TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
+                               if (!IFNET_IS_CELLULAR(ifp)) {
+                                       ifnet_lock_exclusive(ifp);
+                                       ifp->if_eflags |= IFEF_ECN_ENABLE;
+                                       ifp->if_eflags &= ~IFEF_ECN_DISABLE;
+                                       ifnet_lock_done(ifp);
+                               }
+                       }
+                       ifnet_head_done();
+               }
+               tcp_ecn_outbound = i;
+       }
+       /* Change the other one too as the work is done */
+       if (i == 2 || tcp_ecn_inbound == 2)
+               tcp_ecn_inbound = i;
+       return (err);
+}
+
 int     tcp_ecn_outbound = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, ecn_initiate_out,
-       CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_ecn_outbound, 0,
-       "Initiate ECN for outbound connections");
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, ecn_initiate_out,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_ecn_outbound, 0,
+    sysctl_change_ecn_setting, "IU",
+    "Initiate ECN for outbound connections");
 
 int     tcp_ecn_inbound = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, ecn_negotiate_in,
-       CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_ecn_inbound, 0,
-       "Allow ECN negotiation for inbound connections");
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, ecn_negotiate_in,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_ecn_inbound, 0,
+    sysctl_change_ecn_setting, "IU",
+    "Initiate ECN for inbound connections");
 
 int    tcp_packet_chaining = 50;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, packetchain,
@@ -379,6 +439,56 @@ tcp_send_ecn_flags_on_syn(struct tcpcb *tp, struct socket *so)
            (tp->t_flagsext & TF_FASTOPEN)));
 }
 
+void
+tcp_set_ecn(struct tcpcb *tp, struct ifnet *ifp)
+{
+       boolean_t inbound;
+
+       /*
+        * Socket option has precedence
+        */
+       if (tp->ecn_flags & TE_ECN_MODE_ENABLE) {
+               tp->ecn_flags |= TE_ENABLE_ECN;
+               goto check_heuristic;
+       }
+
+       if (tp->ecn_flags & TE_ECN_MODE_DISABLE) {
+               tp->ecn_flags &= ~TE_ENABLE_ECN;
+               return;
+       }
+       /*
+        * Per interface setting comes next
+        */
+       if (ifp != NULL) {
+               if (ifp->if_eflags & IFEF_ECN_ENABLE) {
+                       tp->ecn_flags |= TE_ENABLE_ECN;
+                       goto check_heuristic;
+               }
+
+               if (ifp->if_eflags & IFEF_ECN_DISABLE) {
+                       tp->ecn_flags &= ~TE_ENABLE_ECN;
+                       return;
+               }
+       }
+       /*
+        * System wide settings come last
+        */
+       inbound = (tp->t_inpcb->inp_socket->so_head != NULL);
+       if ((inbound && tcp_ecn_inbound == 1) ||
+           (!inbound && tcp_ecn_outbound == 1)) {
+               tp->ecn_flags |= TE_ENABLE_ECN;
+               goto check_heuristic;
+       } else {
+               tp->ecn_flags &= ~TE_ENABLE_ECN;
+       }
+
+       return;
+
+check_heuristic:
+       if (!tcp_heuristic_do_ecn(tp))
+               tp->ecn_flags &= ~TE_ENABLE_ECN;
+}
+
 /*
  * Tcp output routine: figure out what should be sent and send it.
  *
@@ -609,8 +719,8 @@ again:
                if ((ifp = rt->rt_ifp) != NULL) {
                        somultipages(so, (ifp->if_hwassist & IFNET_MULTIPAGES));
                        tcp_set_tso(tp, ifp);
-                       soif2kcl(so,
-                           (ifp->if_eflags & IFEF_2KCL));
+                       soif2kcl(so, (ifp->if_eflags & IFEF_2KCL));
+                       tcp_set_ecn(tp, ifp);
                }
                if (rt->rt_flags & RTF_UP)
                        RT_GENID_SYNC(rt);
@@ -1493,6 +1603,7 @@ send:
                                *lp++ = htonl(tp->t_dsack_lseq);
                                *lp++ = htonl(tp->t_dsack_rseq);
                                tcpstat.tcps_dsack_sent++;
+                               tp->t_dsack_sent++;
                                nsack--;
                        }
                        VERIFY(nsack == 0 || tp->rcv_numsacks >= nsack);
@@ -1533,8 +1644,8 @@ send:
         *
         * For a SYN-ACK, send an ECN setup SYN-ACK
         */
-       if ((tcp_ecn_inbound || (tp->t_flags & TF_ENABLE_ECN))
-           && (flags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) {
+       if ((flags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK) &&
+           (tp->ecn_flags & TE_ENABLE_ECN)) {
                if (tp->ecn_flags & TE_SETUPRECEIVED) {
                        if (tcp_send_ecn_flags_on_syn(tp, so)) {
                                /*
@@ -1568,6 +1679,7 @@ send:
                                if (tp->ecn_flags & TE_SETUPSENT) {
                                        tcpstat.tcps_ecn_lost_synack++;
                                        tcpstat.tcps_ecn_server_success--;
+                                       tp->ecn_flags |= TE_LOST_SYNACK;
                                }
 
                                tp->ecn_flags &=
@@ -1575,8 +1687,8 @@ send:
                                    TE_SENDCWR);
                        }
                }
-       } else if ((tcp_ecn_outbound || (tp->t_flags & TF_ENABLE_ECN))
-           && (flags & (TH_SYN | TH_ACK)) == TH_SYN) {
+       } else if ((flags & (TH_SYN | TH_ACK)) == TH_SYN &&
+           (tp->ecn_flags & TE_ENABLE_ECN)) {
                if (tcp_send_ecn_flags_on_syn(tp, so)) {
                        /*
                         * Setting TH_ECE and TH_CWR makes this an
@@ -1584,6 +1696,7 @@ send:
                         */
                        flags |= (TH_ECE | TH_CWR);
                        tcpstat.tcps_ecn_client_setup++;
+                       tp->ecn_flags |= TE_CLIENT_SETUP;
 
                        /*
                         * Record that we sent the ECN-setup and default to
@@ -1596,8 +1709,10 @@ send:
                         * Fall back to non-ECN and clear flag indicating
                         * we should send data with IP ECT set.
                         */
-                       if (tp->ecn_flags & TE_SETUPSENT)
+                       if (tp->ecn_flags & TE_SETUPSENT) {
                                tcpstat.tcps_ecn_lost_syn++;
+                               tp->ecn_flags |= TE_LOST_SYN;
+                       }
                        tp->ecn_flags &= ~TE_SENDIPECT;
                }
        }
@@ -1971,7 +2086,9 @@ send:
                                        tcp_rxtseg_insert(tp, tp->snd_nxt,
                                            (tp->snd_nxt + len - 1));
                                }
-                               m->m_pkthdr.pkt_flags |= PKTF_TCP_REXMT;
+                               if (len > 0)
+                                       m->m_pkthdr.pkt_flags |=
+                                           PKTF_TCP_REXMT;
                        }
                } else {
                        th->th_seq = htonl(tp->snd_max);
@@ -1981,7 +2098,8 @@ send:
                tcp_rxtseg_insert(tp, p->rxmit, (p->rxmit + len - 1));
                p->rxmit += len;
                tp->sackhint.sack_bytes_rexmit += len;
-               m->m_pkthdr.pkt_flags |= PKTF_TCP_REXMT;
+               if (len > 0)
+                       m->m_pkthdr.pkt_flags |= PKTF_TCP_REXMT;
        }
        th->th_ack = htonl(tp->rcv_nxt);
        tp->last_ack_sent = tp->rcv_nxt;
index 7d8b715ed65db24ea46544dfa1f7fdee9f806a0a..5d0bf9130056885aa63d7aef0128ea98bbd047b5 100644 (file)
@@ -377,6 +377,7 @@ tcp_sack_detect_reordering(struct tcpcb *tp, struct sackhole *s,
                }
 
                tcpstat.tcps_reordered_pkts++;
+               tp->t_reordered_pkts++;
 
                VERIFY(SEQ_GEQ(snd_fack, s->rxmit));
 
@@ -890,6 +891,7 @@ tcp_sack_process_dsack(struct tcpcb *tp, struct tcpopt *to,
        to->to_nsacks--;
        to->to_sacks += TCPOLEN_SACK;
        tcpstat.tcps_dsack_recvd++;
+       tp->t_dsack_recvd++;
 
        /* ignore DSACK option, if DSACK is disabled */
        if (tp->t_flagsext & TF_DISABLE_DSACK)
index 6fafa0f5f853b481cabd900abf4f5ac82c9e3767..65a171fed3df0d3e01c65efda38d85265d40217a 100644 (file)
@@ -1134,6 +1134,89 @@ tcp_getrt_rtt(struct tcpcb *tp, struct rtentry *rt)
        }
 }
 
+static inline void
+tcp_update_ecn_perf_stats(struct tcpcb *tp,
+    struct if_tcp_ecn_perf_stat *stat)
+{
+       u_int64_t curval, oldval;
+       struct inpcb *inp = tp->t_inpcb;
+
+       /* Average RTT */
+       curval = (tp->t_srtt >> TCP_RTT_SHIFT);
+       if (curval > 0 && tp->t_rttupdated >= 16) {
+               if (stat->rtt_avg == 0) {
+                       stat->rtt_avg = curval;
+               } else {
+                       oldval = stat->rtt_avg;
+                       stat->rtt_avg =
+                           ((oldval << 4) - oldval + curval) >> 4;
+               }
+       }
+
+       /* RTT variance */
+       curval = tp->t_rttvar >> TCP_RTTVAR_SHIFT;
+       if (curval > 0 && tp->t_rttupdated >= 16) {
+               if (stat->rtt_var == 0) {
+                       stat->rtt_var = curval;
+               } else {
+                       oldval = stat->rtt_var;
+                       stat->rtt_var =
+                           ((oldval << 4) - oldval + curval) >> 4;
+               }
+       }
+
+       /* Percentage of Out-of-order packets, shift by 10 for precision */
+       curval = (tp->t_rcvoopack << 10);
+       if (inp->inp_stat != NULL && inp->inp_stat->rxpackets > 0 &&
+           curval > 0) {
+               /* Compute percentage */
+               curval = (curval * 100)/inp->inp_stat->rxpackets;
+               if (stat->oo_percent == 0) {
+                       stat->oo_percent = curval;
+               } else {
+                       oldval = stat->oo_percent;
+                       stat->oo_percent =
+                           ((oldval << 4) - oldval + curval) >> 4;
+               }
+       }
+
+       /* Total number of SACK recovery episodes */
+       stat->sack_episodes += tp->t_sack_recovery_episode;
+
+       /* Percentage of reordered packets, shift by 10 for precision */
+       curval = tp->t_reordered_pkts + tp->t_pawsdrop + tp->t_dsack_sent +
+           tp->t_dsack_recvd;
+       curval = curval << 10;
+       if (inp->inp_stat != NULL && (inp->inp_stat->rxpackets > 0 ||
+           inp->inp_stat->txpackets > 0) && curval > 0) {
+               /* Compute percentage */
+               curval = (curval * 100) /
+                   (inp->inp_stat->rxpackets + inp->inp_stat->txpackets);
+               if (stat->reorder_percent == 0) {
+                       stat->reorder_percent = curval;
+               } else {
+                       oldval = stat->reorder_percent;
+                       stat->reorder_percent =
+                           ((oldval << 4) - oldval + curval) >> 4;
+               }
+       }
+
+       /* Percentage of retransmit bytes, shift by 10 for precision */
+       curval = tp->t_stat.txretransmitbytes << 10;
+       if (inp->inp_stat != NULL && inp->inp_stat->txbytes > 0
+           && curval > 0) {
+               curval = (curval * 100) / inp->inp_stat->txbytes;
+               if (stat->rxmit_percent == 0) {
+                       stat->rxmit_percent = curval;
+               } else {
+                       oldval = stat->rxmit_percent;
+                       stat->rxmit_percent =
+                           ((oldval << 4) - oldval + curval) >> 4;
+               }
+       }
+       return;
+}
+
 /*
  * Close a TCP control block:
  *     discard all space held by the tcp
@@ -1316,22 +1399,95 @@ no_valid_rt:
 
        /* free the reassembly queue, if any */
        (void) tcp_freeq(tp);
+
+       /* Collect ECN related statistics */
+       if (tp->ecn_flags & TE_SETUPSENT) {
+               if (tp->ecn_flags & TE_CLIENT_SETUP) {
+                       INP_INC_IFNET_STAT(inp, ecn_client_setup);
+                       if (TCP_ECN_ENABLED(tp)) {
+                               INP_INC_IFNET_STAT(inp,
+                                   ecn_client_success);
+                       } else if (tp->ecn_flags & TE_LOST_SYN) {
+                               INP_INC_IFNET_STAT(inp, ecn_syn_lost);
+                       } else {
+                               INP_INC_IFNET_STAT(inp,
+                                   ecn_peer_nosupport);
+                       }
+               } else {
+                       INP_INC_IFNET_STAT(inp, ecn_server_setup);
+                       if (TCP_ECN_ENABLED(tp)) {
+                               INP_INC_IFNET_STAT(inp,
+                                   ecn_server_success);
+                       } else if (tp->ecn_flags & TE_LOST_SYNACK) {
+                               INP_INC_IFNET_STAT(inp,
+                                   ecn_synack_lost);
+                       } else {
+                               INP_INC_IFNET_STAT(inp,
+                                   ecn_peer_nosupport);
+                       }
+               }
+       }
        if (TCP_ECN_ENABLED(tp)) {
-               if (tp->ecn_flags & TE_RECV_ECN_CE)
+               if (tp->ecn_flags & TE_RECV_ECN_CE) {
                        tcpstat.tcps_ecn_conn_recv_ce++;
-               if (tp->ecn_flags & TE_RECV_ECN_ECE)
+                       INP_INC_IFNET_STAT(inp, ecn_conn_recv_ce);
+               }
+               if (tp->ecn_flags & TE_RECV_ECN_ECE) {
                        tcpstat.tcps_ecn_conn_recv_ece++;
+                       INP_INC_IFNET_STAT(inp, ecn_conn_recv_ece);
+               }
                if (tp->ecn_flags & (TE_RECV_ECN_CE | TE_RECV_ECN_ECE)) {
                        if (tp->t_stat.txretransmitbytes > 0 ||
-                           tp->t_stat.rxoutoforderbytes > 0)
+                           tp->t_stat.rxoutoforderbytes > 0) {
                                tcpstat.tcps_ecn_conn_pl_ce++;
-                       else
+                               INP_INC_IFNET_STAT(inp, ecn_conn_plce);
+                       } else {
                                tcpstat.tcps_ecn_conn_nopl_ce++;
+                               INP_INC_IFNET_STAT(inp, ecn_conn_noplce);
+                       }
                } else {
                        if (tp->t_stat.txretransmitbytes > 0 ||
-                           tp->t_stat.rxoutoforderbytes > 0)
+                           tp->t_stat.rxoutoforderbytes > 0) {
                                tcpstat.tcps_ecn_conn_plnoce++;
+                               INP_INC_IFNET_STAT(inp, ecn_conn_plnoce);
+                       }
+               }
+
+       }
+
+       /* Aggregate performance stats */
+       if (inp->inp_last_outifp != NULL) {
+               struct ifnet *ifp = inp->inp_last_outifp;
+               ifnet_lock_shared(ifp);
+               if ((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) ==
+                   IFRF_ATTACHED) {
+                       if (inp->inp_vflag & INP_IPV6) {
+                               if (TCP_ECN_ENABLED(tp)) {
+                                       ifp->if_ipv6_stat->timestamp
+                                           = net_uptime();
+                                       tcp_update_ecn_perf_stats(tp,
+                                           &ifp->if_ipv6_stat->ecn_on);
+                               } else {
+                                       ifp->if_ipv6_stat->timestamp
+                                           = net_uptime();
+                                       tcp_update_ecn_perf_stats(tp,
+                                           &ifp->if_ipv6_stat->ecn_off);
+                               }
+                       } else {
+                               if (TCP_ECN_ENABLED(tp)) {
+                                       ifp->if_ipv4_stat->timestamp
+                                           = net_uptime();
+                                       tcp_update_ecn_perf_stats(tp,
+                                           &ifp->if_ipv4_stat->ecn_on);
+                               } else {
+                                       ifp->if_ipv4_stat->timestamp
+                                           = net_uptime();
+                                       tcp_update_ecn_perf_stats(tp,
+                                           &ifp->if_ipv4_stat->ecn_off);
+                               }
+                       }
                }
+               ifnet_lock_done(ifp);
        }
 
        tcp_free_sackholes(tp);
@@ -2451,6 +2607,7 @@ tcp_rtlookup(inp, input_ifscope)
                tcp_set_tso(tp, rt->rt_ifp);
                soif2kcl(inp->inp_socket,
                    (rt->rt_ifp->if_eflags & IFEF_2KCL));
+               tcp_set_ecn(tp, rt->rt_ifp);
        }
 
        /* Note if the peer is local */
@@ -2557,6 +2714,7 @@ tcp_rtlookup6(inp, input_ifscope)
                tcp_set_tso(tp, rt->rt_ifp);
                soif2kcl(inp->inp_socket,
                    (rt->rt_ifp->if_eflags & IFEF_2KCL));
+               tcp_set_ecn(tp, rt->rt_ifp);
        }
 
        /* Note if the peer is local */
index 046163f7bb9df624b0f35301c1c14012a14a693f..0ffb340d0400845510d56e48104aea566f7cfd46 100644 (file)
@@ -327,6 +327,9 @@ struct tcp_last_report_stats {
        u_int32_t       tcps_ecn_conn_plnoce;
        u_int32_t       tcps_ecn_conn_pl_ce;
        u_int32_t       tcps_ecn_conn_nopl_ce;
+       u_int32_t       tcps_ecn_fallback_synloss;
+       u_int32_t       tcps_ecn_fallback_reorder;
+       u_int32_t       tcps_ecn_fallback_ce;
 
        /* TFO-related statistics */
        u_int32_t       tcps_tfo_syn_data_rcv;
@@ -779,6 +782,15 @@ tcp_timers(tp, timer)
                        } else {
                                tcpstat.tcps_timeoutdrop++;
                        }
+                       if (tp->t_rxtshift >= TCP_MAXRXTSHIFT) {
+                               if (TCP_ECN_ENABLED(tp)) {
+                                       INP_INC_IFNET_STAT(tp->t_inpcb,
+                                           ecn_on.rxmit_drop);
+                               } else {
+                                       INP_INC_IFNET_STAT(tp->t_inpcb,
+                                           ecn_off.rxmit_drop);
+                               }
+                       }
                        tp->t_rxtshift = TCP_MAXRXTSHIFT;
                        postevent(so, 0, EV_TIMEOUT);                   
                        soevent(so, 
@@ -1341,6 +1353,7 @@ fc_output:
 
                tp->t_timer[TCPT_REXMT] = 0;
                tcpstat.tcps_sack_recovery_episode++;
+               tp->t_sack_recovery_episode++;
                tp->sack_newdata = tp->snd_nxt;
                tp->snd_cwnd = tp->t_maxseg;
                tcp_ccdbg_trace(tp, NULL, TCP_CC_ENTER_FASTRECOVERY);
@@ -2046,6 +2059,12 @@ tcp_report_stats(void)
            &prev.tcps_ecn_conn_pl_ce, &stat.ecn_conn_pl_ce);
        tcp_cumulative_stat(tcpstat.tcps_ecn_conn_nopl_ce,
            &prev.tcps_ecn_conn_nopl_ce, &stat.ecn_conn_nopl_ce);
+       tcp_cumulative_stat(tcpstat.tcps_ecn_fallback_synloss,
+           &prev.tcps_ecn_fallback_synloss, &stat.ecn_fallback_synloss);
+       tcp_cumulative_stat(tcpstat.tcps_ecn_fallback_reorder,
+           &prev.tcps_ecn_fallback_reorder, &stat.ecn_fallback_reorder);
+       tcp_cumulative_stat(tcpstat.tcps_ecn_fallback_ce,
+           &prev.tcps_ecn_fallback_ce, &stat.ecn_fallback_ce);
        tcp_cumulative_stat(tcpstat.tcps_tfo_syn_data_rcv,
            &prev.tcps_tfo_syn_data_rcv, &stat.tfo_syn_data_rcv);
        tcp_cumulative_stat(tcpstat.tcps_tfo_cookie_req_rcv,
index 96b17ba5afc36dcc5d686091895d01c405f08f98..bfc86e99493eafe2848062fe90fcabe18d49b3af 100644 (file)
@@ -671,6 +671,9 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
                } else {
                        error = ENETDOWN;
                }
+
+               /* Disable PRECONNECT_DATA, as we don't need to send a SYN anymore. */
+               so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
                return error;
        }
 #endif /* FLOW_DIVERT */
@@ -1558,7 +1561,8 @@ tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
        bzero(ti, sizeof(*ti));
 
        ti->tcpi_state = tp->t_state;
-       
+       ti->tcpi_flowhash = inp->inp_flowhash;
+
        if (tp->t_state > TCPS_LISTEN) {
                if (TSTMP_SUPPORTED(tp))
                        ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
@@ -1569,6 +1573,8 @@ tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
                        ti->tcpi_snd_wscale = tp->snd_scale;
                        ti->tcpi_rcv_wscale = tp->rcv_scale;
                }
+               if (TCP_ECN_ENABLED(tp))
+                       ti->tcpi_options |= TCPI_OPT_ECN;
 
                /* Are we in retranmission episode */
                if (IN_FASTRECOVERY(tp) || tp->t_rxtshift > 0)
@@ -1643,6 +1649,31 @@ tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
                ti->tcpi_tfo_syn_data_sent = !!(tp->t_tfo_stats & TFO_S_SYN_DATA_SENT);
                ti->tcpi_tfo_syn_data_acked = !!(tp->t_tfo_stats & TFO_S_SYN_DATA_ACKED);
                ti->tcpi_tfo_syn_loss = !!(tp->t_tfo_stats & TFO_S_SYN_LOSS);
+
+               ti->tcpi_ecn_client_setup = !!(tp->ecn_flags & TE_SETUPSENT);
+               ti->tcpi_ecn_server_setup = !!(tp->ecn_flags & TE_SETUPRECEIVED);
+               ti->tcpi_ecn_success = (tp->ecn_flags & TE_ECN_ON) == TE_ECN_ON ? 1 : 0;
+               ti->tcpi_ecn_lost_syn = !!(tp->ecn_flags & TE_LOST_SYN);
+               ti->tcpi_ecn_lost_synack = !!(tp->ecn_flags & TE_LOST_SYNACK);
+
+               ti->tcpi_local_peer = !!(tp->t_flags & TF_LOCAL);
+
+               if (tp->t_inpcb->inp_last_outifp != NULL) {
+                       if (IFNET_IS_CELLULAR(tp->t_inpcb->inp_last_outifp))
+                               ti->tcpi_if_cell = 1;
+                       else if (IFNET_IS_WIFI(tp->t_inpcb->inp_last_outifp))
+                               ti->tcpi_if_wifi = 1;
+               }
+
+               ti->tcpi_ecn_recv_ce = tp->t_ecn_recv_ce;
+               ti->tcpi_ecn_recv_cwr = tp->t_ecn_recv_cwr;
+
+               ti->tcpi_rcvoopack = tp->t_rcvoopack;
+               ti->tcpi_pawsdrop = tp->t_pawsdrop;
+               ti->tcpi_sack_recovery_episode = tp->t_sack_recovery_episode;
+               ti->tcpi_reordered_pkts = tp->t_reordered_pkts;
+               ti->tcpi_dsack_sent = tp->t_dsack_sent;
+               ti->tcpi_dsack_recvd = tp->t_dsack_recvd;
        }
 }
 
@@ -1913,7 +1944,6 @@ tcp_ctloutput(so, sopt)
                case TCP_NODELAY:
                case TCP_NOOPT:
                case TCP_NOPUSH:
-               case TCP_ENABLE_ECN:
                        error = sooptcopyin(sopt, &optval, sizeof optval,
                                            sizeof optval);
                        if (error)
@@ -1929,9 +1959,6 @@ tcp_ctloutput(so, sopt)
                        case TCP_NOPUSH:
                                opt = TF_NOPUSH;
                                break;
-                       case TCP_ENABLE_ECN:
-                               opt = TF_ENABLE_ECN;
-                               break;
                        default:
                                opt = 0; /* dead code to fool gcc */
                                break;
@@ -2260,6 +2287,36 @@ tcp_ctloutput(so, sopt)
                        else
                                tcp_disable_tfo(tp);
                        break;
+               case TCP_ENABLE_ECN:
+                       error = sooptcopyin(sopt, &optval, sizeof optval,
+                                           sizeof optval);
+                       if (error)
+                               break;
+                       if (optval) {
+                               tp->ecn_flags |= TE_ECN_MODE_ENABLE;
+                               tp->ecn_flags &= ~TE_ECN_MODE_DISABLE;
+                       } else {
+                               tp->ecn_flags &= ~TE_ECN_MODE_ENABLE;
+                       }
+                       break;
+               case TCP_ECN_MODE:
+                       error = sooptcopyin(sopt, &optval, sizeof optval,
+                                           sizeof optval);
+                       if (error)
+                               break;
+                       if (optval == ECN_MODE_DEFAULT) {
+                               tp->ecn_flags &= ~TE_ECN_MODE_ENABLE;
+                               tp->ecn_flags &= ~TE_ECN_MODE_DISABLE;
+                       } else if (optval == ECN_MODE_ENABLE) {
+                               tp->ecn_flags |= TE_ECN_MODE_ENABLE;
+                               tp->ecn_flags &= ~TE_ECN_MODE_DISABLE;
+                       } else if (optval == ECN_MODE_DISABLE) {
+                               tp->ecn_flags &= ~TE_ECN_MODE_ENABLE;
+                               tp->ecn_flags |= TE_ECN_MODE_DISABLE;
+                       } else {
+                               error = EINVAL;
+                       }
+                       break;
                case SO_FLUSH:
                        if ((error = sooptcopyin(sopt, &optval, sizeof (optval),
                            sizeof (optval))) != 0)
@@ -2312,7 +2369,15 @@ tcp_ctloutput(so, sopt)
                        optval = tp->t_flags & TF_NOPUSH;
                        break;
                case TCP_ENABLE_ECN:
-                       optval = (tp->t_flags & TF_ENABLE_ECN) ? 1 : 0; 
+                       optval = (tp->ecn_flags & TE_ECN_MODE_ENABLE) ? 1 : 0;
+                       break;
+               case TCP_ECN_MODE:
+                       if (tp->ecn_flags & TE_ECN_MODE_ENABLE)
+                               optval = ECN_MODE_ENABLE;
+                       else if (tp->ecn_flags & TE_ECN_MODE_DISABLE)
+                               optval = ECN_MODE_DISABLE;
+                       else
+                               optval = ECN_MODE_DEFAULT;
                        break;
                case TCP_CONNECTIONTIMEOUT:
                        optval = tp->t_keepinit / TCP_RETRANSHZ;
index 1ec0559eeaf32ee6c9b577f05a6a93cc1fe10789..26f5b49d089eb1ccd6a494e01e9675ad7a69e14d 100644 (file)
@@ -280,7 +280,6 @@ struct tcpcb {
 #define        TF_WASFRECOVERY 0x400000        /* was in NewReno Fast Recovery */
 #define        TF_SIGNATURE    0x800000        /* require MD5 digests (RFC2385) */
 #define        TF_MAXSEGSNT    0x1000000       /* last segment sent was a full segment */
-#define        TF_ENABLE_ECN   0x2000000       /* Enable ECN */
 #define TF_PMTUD       0x4000000       /* Perform Path MTU Discovery for this connection */
 #define        TF_CLOSING      0x8000000       /* pending tcp close */
 #define TF_TSO         0x10000000      /* TCP Segment Offloading is enable on this connection */
@@ -382,16 +381,29 @@ struct tcpcb {
        u_int32_t       rcv_by_unackwin; /* bytes seen during the last ack-stretching win */
        u_int32_t       rcv_nostrack_ts; /* timestamp when stretch ack was disabled automatically */
        u_int16_t       rcv_waitforss;  /* wait for packets during slow-start */
-       u_int16_t               ecn_flags;
-#define TE_SETUPSENT           0x01    /* Indicate we have sent ECN-SETUP SYN or SYN-ACK */
-#define TE_SETUPRECEIVED       0x02    /* Indicate we have received ECN-SETUP SYN or SYN-ACK */
-#define TE_SENDIPECT           0x04    /* Indicate we haven't sent or received non-ECN-setup SYN or SYN-ACK */
-#define TE_SENDCWR             0x08    /* Indicate that the next non-retransmit should have the TCP CWR flag set */
-#define TE_SENDECE             0x10    /* Indicate that the next packet should have the TCP ECE flag set */
-#define TE_INRECOVERY          0x20    /* connection entered recovery after receiving ECE */
-#define TE_RECV_ECN_CE         0x40    /* Received IPTOS_ECN_CE marking atleast once */
-#define TE_RECV_ECN_ECE        0x80    /* Received ECE marking atleast once */
-#define TE_ECN_ON              (TE_SETUPSENT | TE_SETUPRECEIVED) /* Indicate ECN was successfully negotiated on a connection) */
+
+/* ECN stats */
+       u_int16_t       ecn_flags;
+#define        TE_SETUPSENT            0x0001  /* Indicate we have sent ECN-SETUP SYN or SYN-ACK */
+#define        TE_SETUPRECEIVED        0x0002  /* Indicate we have received ECN-SETUP SYN or SYN-ACK */
+#define        TE_SENDIPECT            0x0004  /* Indicate we haven't sent or received non-ECN-setup SYN or SYN-ACK */
+#define        TE_SENDCWR              0x0008  /* Indicate that the next non-retransmit should have the TCP CWR flag set */
+#define        TE_SENDECE              0x0010  /* Indicate that the next packet should have the TCP ECE flag set */
+#define        TE_INRECOVERY           0x0020  /* connection entered recovery after receiving ECE */
+#define        TE_RECV_ECN_CE          0x0040  /* Received IPTOS_ECN_CE marking atleast once */
+#define        TE_RECV_ECN_ECE         0x0080  /* Received ECE marking atleast once */
+#define        TE_LOST_SYN             0x0100  /* Lost SYN with ECN setup */
+#define        TE_LOST_SYNACK          0x0200  /* Lost SYN-ACK with ECN setup */
+#define        TE_ECN_MODE_ENABLE      0x0400  /* Option ECN mode set to enable */
+#define        TE_ECN_MODE_DISABLE     0x0800  /* Option ECN mode set to disable */
+#define        TE_ENABLE_ECN           0x1000  /* Enable negotiation of ECN */
+#define        TE_ECN_ON               (TE_SETUPSENT | TE_SETUPRECEIVED) /* Indicate ECN was successfully negotiated on a connection) */
+#define        TE_CEHEURI_SET          0x2000 /* We did our CE-probing at the beginning */
+#define        TE_CLIENT_SETUP         0x4000  /* setup from client side */
+
+       u_int32_t       t_ecn_recv_ce;  /* Received CE from the network */
+       u_int32_t       t_ecn_recv_cwr; /* Packets received with CWR */
+       u_int8_t        t_ecn_recv_ce_pkt; /* Received packet with CE-bit set (independent from last_ack_sent) */
 
 /* state for bad retransmit recovery */
        u_int32_t       snd_cwnd_prev;  /* cwnd prior to retransmit */
@@ -445,7 +457,7 @@ struct tcpcb {
        uint32_t        rtt_count;              /* Number of RTT samples in recent base history */
        uint32_t        bg_ssthresh;            /* Slow start threshold until delay increases */
        uint32_t        t_flagsext;             /* Another field to accommodate more flags */
-#define TF_RXTFINDROP  0x1                     /* Drop conn after retransmitting FIN 3 times */
+#define TF_RXTFINDROP          0x1                     /* Drop conn after retransmitting FIN 3 times */
 #define TF_RCVUNACK_WAITSS     0x2             /* set when the receiver should not stretch acks */
 #define TF_BWMEAS_INPROGRESS   0x4             /* Indicate BW meas is happening */
 #define TF_MEASURESNDBW                0x8             /* Measure send bw on this connection */
@@ -587,6 +599,13 @@ struct tcpcb {
 #define        TFO_PROBE_PROBING       1 /* Sending out TCP-keepalives waiting for reply */
 #define        TFO_PROBE_WAIT_DATA     2 /* Received reply, waiting for data */
        u_int8_t                t_tfo_probe_state;
+       
+       u_int32_t       t_rcvoopack;            /* out-of-order packets received */
+       u_int32_t       t_pawsdrop;             /* segments dropped due to PAWS */
+       u_int32_t       t_sack_recovery_episode; /* SACK recovery episodes */
+       u_int32_t       t_reordered_pkts;       /* packets reorderd */
+       u_int32_t       t_dsack_sent;           /* Sent DSACK notification */
+       u_int32_t       t_dsack_recvd;          /* Received a valid DSACK option */
 };
 
 #define IN_FASTRECOVERY(tp)    (tp->t_flags & TF_FASTRECOVERY)
@@ -1107,6 +1126,9 @@ struct    tcpstat {
        u_int32_t       tcps_ecn_conn_plnoce;   /* Number of connections that received no CE and sufferred packet loss */
        u_int32_t       tcps_ecn_conn_pl_ce;    /* Number of connections that received CE and sufferred packet loss */
        u_int32_t       tcps_ecn_conn_nopl_ce;  /* Number of connections that received CE and sufferred no packet loss */
+       u_int32_t       tcps_ecn_fallback_synloss; /* Number of times we did fall back due to SYN-Loss */
+       u_int32_t       tcps_ecn_fallback_reorder; /* Number of times we fallback because we detected the PAWS-issue */
+       u_int32_t       tcps_ecn_fallback_ce;   /* Number of times we fallback because we received too many CEs */
 
        /* TFO-related statistics */
        u_int32_t       tcps_tfo_syn_data_rcv;  /* Received a SYN+data with valid cookie */
@@ -1121,6 +1143,7 @@ struct    tcpstat {
        u_int32_t       tcps_tfo_blackhole;     /* TFO got blackholed by a middlebox. */
 };
 
+
 struct tcpstat_local {
        u_int64_t badformat;
        u_int64_t unspecv6;
@@ -1467,6 +1490,7 @@ void       tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
 void    tcp_free_sackholes(struct tcpcb *tp);
 int32_t         tcp_sbspace(struct tcpcb *tp);
 void    tcp_set_tso(struct tcpcb *tp, struct ifnet *ifp);
+void     tcp_set_ecn(struct tcpcb *tp, struct ifnet *ifp);
 void    tcp_reset_stretch_ack(struct tcpcb *tp);
 extern void tcp_get_ports_used(u_int32_t, int, u_int32_t, bitstr_t *);
 uint32_t tcp_count_opportunistic(unsigned int ifindex, u_int32_t flags);
index 28f53d5cc3d236141488a08423bb5acdc6e42bd7..bfd976bb7d3c4ecc4c887a53c5d24c25572b7268 100644 (file)
@@ -412,8 +412,9 @@ ah4_input(struct mbuf *m, int off)
                 * XXX more sanity checks
                 * XXX relationship with gif?
                 */
-               u_int8_t tos;
-               
+               u_int8_t tos, otos;
+               int sum;
+
                if (ifamily == AF_INET6) {
                        ipseclog((LOG_NOTICE, "ipsec tunnel protocol mismatch "
                            "in IPv4 AH input: %s\n", ipsec_logsastr(sav)));
@@ -429,11 +430,21 @@ ah4_input(struct mbuf *m, int off)
                        }
                }
                ip = mtod(m, struct ip *);
+               otos = ip->ip_tos;
                /* ECN consideration. */
                if (ip_ecn_egress(ip4_ipsec_ecn, &tos, &ip->ip_tos) == 0) {
                        IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
                        goto fail;
                }
+
+                if (otos != ip->ip_tos) {
+                   sum = ~ntohs(ip->ip_sum) & 0xffff;
+                   sum += (~otos & 0xffff) + ip->ip_tos;
+                   sum = (sum >> 16) + (sum & 0xffff);
+                   sum += (sum >> 16);  /* add carry */
+                   ip->ip_sum = htons(~sum & 0xffff);
+               } 
+
                if (!key_checktunnelsanity(sav, AF_INET,
                            (caddr_t)&ip->ip_src, (caddr_t)&ip->ip_dst)) {
                        ipseclog((LOG_NOTICE, "ipsec tunnel address mismatch "
index 277e6963ed5bae230ed785569387999cedbc438d..1718e87bd936f7b1a2543d5ade6b6f996dcee664 100644 (file)
@@ -521,7 +521,8 @@ noreplaycheck:
                 * XXX more sanity checks
                 * XXX relationship with gif?
                 */
-               u_int8_t tos;
+               u_int8_t tos, otos;
+               int sum;
 
                tos = ip->ip_tos;
                m_adj(m, off + esplen + ivlen);
@@ -537,10 +538,21 @@ noreplaycheck:
                        }
                        ip = mtod(m, struct ip *);
                        /* ECN consideration. */
+
+                       otos = ip->ip_tos;
                        if (ip_ecn_egress(ip4_ipsec_ecn, &tos, &ip->ip_tos) == 0) {
                                IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
                                goto bad;
                        }
+
+                       if (otos != ip->ip_tos) {
+                           sum = ~ntohs(ip->ip_sum) & 0xffff;
+                           sum += (~otos & 0xffff) + ip->ip_tos;
+                           sum = (sum >> 16) + (sum & 0xffff);
+                           sum += (sum >> 16);  /* add carry */
+                           ip->ip_sum = htons(~sum & 0xffff);
+                       }
+
                        if (!key_checktunnelsanity(sav, AF_INET,
                            (caddr_t)&ip->ip_src, (caddr_t)&ip->ip_dst)) {
                                ipseclog((LOG_ERR, "ipsec tunnel address mismatch "
@@ -1187,12 +1199,26 @@ noreplaycheck:
                                        goto bad;
                                }
                        }
+
+                       u_int8_t otos;
+                       int sum;
+
                        ip = mtod(m, struct ip *);
+                       otos = ip->ip_tos;
                        /* ECN consideration. */
                        if (ip46_ecn_egress(ip6_ipsec_ecn, &flowinfo, &ip->ip_tos) == 0) {
                                IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
                                goto bad;
                        }
+
+                       if (otos != ip->ip_tos) {
+                           sum = ~ntohs(ip->ip_sum) & 0xffff;
+                           sum += (~otos & 0xffff) + ip->ip_tos;
+                           sum = (sum >> 16) + (sum & 0xffff);
+                           sum += (sum >> 16);  /* add carry */
+                           ip->ip_sum = htons(~sum & 0xffff);
+                       }
+
                        if (!key_checktunnelsanity(sav, AF_INET,
                            (caddr_t)&ip->ip_src, (caddr_t)&ip->ip_dst)) {
                                ipseclog((LOG_ERR, "ipsec tunnel address mismatch "
index 7058b39768ceb08b6e98c873c8d4e9783f791ea5..54840b69e897e15b26ec5f47ee12ee27987d6328 100644 (file)
@@ -265,7 +265,9 @@ in6_gif_input(struct mbuf **mp, int *offp, int proto)
        case IPPROTO_IPV4:
            {
                struct ip *ip;
-               u_int8_t otos8;
+               u_int8_t otos8, old_tos;
+               int sum;
+
                af = AF_INET;
                otos8 = (ntohl(otos) >> 20) & 0xff;
                if (mbuf_len(m) < sizeof (*ip)) {
@@ -274,9 +276,17 @@ in6_gif_input(struct mbuf **mp, int *offp, int proto)
                                return (IPPROTO_DONE);
                }
                ip = mtod(m, struct ip *);
-               if (gifp->if_flags & IFF_LINK1)
+               if (gifp->if_flags & IFF_LINK1) {
+                       old_tos = ip->ip_tos;
                        egress_success = ip_ecn_egress(ECN_NORMAL, &otos8, &ip->ip_tos);
-               else
+                       if (old_tos != ip->ip_tos) {
+                           sum = ~ntohs(ip->ip_sum) & 0xffff;
+                           sum += (~old_tos & 0xffff) + ip->ip_tos;
+                           sum = (sum >> 16) + (sum & 0xffff);
+                           sum += (sum >> 16);  /* add carry */
+                           ip->ip_sum = htons(~sum & 0xffff);
+                       }
+               } else
                        egress_success = ip_ecn_egress(ECN_NOCARE, &otos8, &ip->ip_tos);
                break;
            }
index 660f8da4f03200dd5f4bdd1e611015adfa52655e..35888ba180429e3e0591e867392d1d714edcd25e 100644 (file)
@@ -200,6 +200,7 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
                return (EINVAL);
        if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT)))
                wild = 1;
+
        socket_unlock(so, 0); /* keep reference */
        lck_rw_lock_exclusive(pcbinfo->ipi_lock);
 
@@ -367,6 +368,16 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
        }
 
        socket_lock(so, 0);
+       /*
+        * We unlocked socket's protocol lock for a long time.
+        * The socket might have been dropped/defuncted.
+        * Checking if world has changed since.
+        */
+       if (inp->inp_state == INPCB_STATE_DEAD) {
+               lck_rw_done(pcbinfo->ipi_lock);
+               return (ECONNABORTED);
+       }
+
        /* check if the socket got bound when the lock was released */
        if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
                lck_rw_done(pcbinfo->ipi_lock);
index 7767822d7403cfce884d6b46752e7d831697f485..0bdaaa7e2ed49ca2b61d30b8cb3f48db75ee6c5d 100644 (file)
@@ -844,8 +844,6 @@ skip_ipsec:
                                    (mbuf_t *)&m, ippo);
                                if (result == EJUSTRETURN) {
                                        ipf_unref();
-                                       if (m != NULL)
-                                               m_freem(m);
                                        m = NULL;
                                        goto evaluateloop;
                                }
index 8fe0d4d9ee86679db650a85012fcf0e92d3a2ca3..ce0dfdf216dd4e1ccad1d1fc3584d4beccb7a3e2 100644 (file)
@@ -2692,11 +2692,12 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
 
        case SIOCSPFXFLUSH_IN6: {       /* struct in6_ifreq */
                /* flush all the prefix advertised by routers */
-               struct nd_prefix *next;
+               struct nd_prefix *next = NULL;
 
                lck_mtx_lock(nd6_mutex);
                for (pr = nd_prefix.lh_first; pr; pr = next) {
-                       struct in6_ifaddr *ia;
+                       struct in6_ifaddr *ia = NULL;
+                       bool iterate_pfxlist_again = false;
 
                        next = pr->ndpr_next;
 
@@ -2741,9 +2742,8 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
                                         * The same applies for the prefix list.
                                         */
                                        ia = in6_ifaddrs;
-                                       next = nd_prefix.lh_first;
+                                       iterate_pfxlist_again = true;
                                        continue;
-
                                }
                                IFA_UNLOCK(&ia->ia_ifa);
                                ia = ia->ia_next;
@@ -2753,17 +2753,10 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
                        prelist_remove(pr);
                        NDPR_UNLOCK(pr);
                        pfxlist_onlink_check();
-                       /*
-                        * If we were trying to restart this loop
-                        * above by changing the value of 'next', we might
-                        * end up freeing the only element on the list
-                        * when we call NDPR_REMREF().
-                        * When this happens, we also have get out of this
-                        * loop because we have nothing else to do.
-                        */
-                       if (pr == next)
-                               next = NULL;
                        NDPR_REMREF(pr);
+                       if (iterate_pfxlist_again) {
+                               next = nd_prefix.lh_first;
+                       }
                }
                lck_mtx_unlock(nd6_mutex);
                break;
index 3414db3477d5e79bb3603d47b08d82e9f30ac4cc..0283fa918f7d540f266f004e2ccd44006bb0e4a2 100644 (file)
@@ -518,15 +518,7 @@ nfs_gss_clnt_ctx_find_principal(struct nfsreq *req, uint8_t *principal, uint32_t
                lck_mtx_unlock(cp->gss_clnt_mtx);
        }
 
-       MALLOC(ki, gss_key_info *, sizeof (gss_key_info), M_TEMP, M_WAITOK|M_ZERO);
-       if (ki == NULL) {
-               lck_mtx_unlock(&nmp->nm_lock);
-               return (ENOMEM);
-       }
-
-       if (cp) {
-               cp->gss_clnt_kinfo = ki;
-       } else if (nfs_root_steals_ctx && principal == NULL && kauth_cred_getuid(req->r_cred) == 0) {
+       if (!cp && nfs_root_steals_ctx && principal == NULL && kauth_cred_getuid(req->r_cred) == 0) {
                /*
                 * If superuser is trying to get access, then co-opt
                 * the first valid context in the list.
@@ -544,6 +536,12 @@ nfs_gss_clnt_ctx_find_principal(struct nfsreq *req, uint8_t *principal, uint32_t
                }
        }
 
+       MALLOC(ki, gss_key_info *, sizeof (gss_key_info), M_TEMP, M_WAITOK|M_ZERO);
+       if (ki == NULL) {
+               lck_mtx_unlock(&nmp->nm_lock);
+               return (ENOMEM);
+       }
+
        NFS_GSS_DBG("Context %s%sfound in Neg Cache @  %ld\n",
                    NFS_GSS_CTX(req, cp),
                    cp == NULL ? " not " : "",
@@ -573,6 +571,7 @@ nfs_gss_clnt_ctx_find_principal(struct nfsreq *req, uint8_t *principal, uint32_t
                        nfs_gss_clnt_mnt_ref(nmp);
                }
        } else {
+               cp->gss_clnt_kinfo = ki;
                nfs_gss_clnt_ctx_clean(cp);
                if (principal) {
                        /*
index 12daa55889dcfeb91ed97bdbad8088320de2b23f..1b082e748f252a98386e908790c53c50c452a83a 100644 (file)
@@ -669,6 +669,11 @@ fhopen( proc_t p,
                goto bad;
        }
 
+#if CONFIG_MACF
+       if ((error = mac_vnode_check_open(ctx, vp, fmode)))
+               goto bad;
+#endif
+
        /* compute action to be authorized */
        action = 0;
        if (fmode & FREAD)
index ab38cc22adf8fea78bbb327940abb9795ab12bb3..639920f5e4b412167d0232c98d65008d1eb9332f 100644 (file)
@@ -1165,14 +1165,14 @@ audit_session_setaia(proc_t p, auditinfo_addr_t *new_aia_p)
                my_new_cred = kauth_cred_setauditinfo(my_cred, &tmp_as);
 
                if (my_cred != my_new_cred) {
-                       proc_lock(p);
+                       proc_ucred_lock(p);
                        /* Need to protect for a race where another thread also
                         * changed the credential after we took our reference.
                         * If p_ucred has changed then we should restart this
                         * again with the new cred.
                         */
                        if (p->p_ucred != my_cred) {
-                               proc_unlock(p);
+                               proc_ucred_unlock(p);
                                audit_session_unref(my_new_cred);
                                kauth_cred_unref(&my_new_cred);
                                /* try again */
@@ -1182,7 +1182,7 @@ audit_session_setaia(proc_t p, auditinfo_addr_t *new_aia_p)
                        p->p_ucred = my_new_cred;
                        /* update cred on proc */
                        PROC_UPDATE_CREDS_ONPROC(p);
-                       proc_unlock(p);
+                       proc_ucred_unlock(p);
                }
                /*
                 * Drop old proc reference or our extra reference.
@@ -1390,12 +1390,12 @@ audit_session_join_internal(proc_t p, ipc_port_t port, au_asid_t *new_asid)
                goto done;
        }
 
-       proc_lock(p);
+       proc_ucred_lock(p);
        kauth_cred_ref(p->p_ucred);
        my_cred = p->p_ucred;
        if (!IS_VALID_CRED(my_cred)) {
                kauth_cred_unref(&my_cred);     
-               proc_unlock(p);
+               proc_ucred_unlock(p);
                err = ESRCH;
                goto done;
        }
@@ -1421,7 +1421,7 @@ audit_session_join_internal(proc_t p, ipc_port_t port, au_asid_t *new_asid)
                /* Increment the proc count of new session */
                audit_inc_procount(AU_SENTRY_PTR(new_aia_p));
 
-               proc_unlock(p);
+               proc_ucred_unlock(p);
 
                /* Propagate the change from the process to the Mach task. */
                set_security_token(p);
@@ -1429,7 +1429,7 @@ audit_session_join_internal(proc_t p, ipc_port_t port, au_asid_t *new_asid)
                /* Decrement the process count of the former session. */
                audit_dec_procount(AU_SENTRY_PTR(old_aia_p));
        } else  {
-               proc_unlock(p);
+               proc_ucred_unlock(p);
        }
        kauth_cred_unref(&my_cred);
 
index 62b205eb79d13a33a775bf273703240c9a737a0c..cf2811c5b77c9c57552bbc8d7cf31c5e5b3a7ea0 100644 (file)
@@ -104,6 +104,21 @@ extern int coalitions_get_list(int type, struct procinfo_coalinfo *coal_list, in
  */
 extern boolean_t coalition_is_leader(task_t task, int coal_type, coalition_t *coal);
 
+/*
+ * coalition_get_leader:
+ * Get a task reference on the leader of a given coalition
+ *
+ * Parameters:
+ *     coal      : The coalition to investigate
+ *
+ * Returns: A referenced task pointer of the leader of the given coalition.
+ *          This could be TASK_NULL if the coalition doesn't have a leader.
+ *          If the return value is non-null, the caller is responsible to call
+ *          task_deallocate on the returned value.
+ */
+extern task_t coalition_get_leader(coalition_t coal);
+
+
 /*
  * coalition_get_task_count:
  * Sum up the number of tasks in the given coalition
index 48dfac84a5faeed54d986ffb161fb0bdf13e1ec4..d2e340aa206cc0341de5e852d68da85d6a6b6d27 100644 (file)
@@ -323,6 +323,7 @@ extern void kernel_debug_string_simple(const char *message);
 #define DBG_MACH_SFI                   0xA2    /* Selective Forced Idle (SFI) */
 #define DBG_MACH_ENERGY_PERF   0xA3 /* Energy/performance resource stats */
 #define DBG_MACH_SYSDIAGNOSE   0xA4    /* sysdiagnose keychord */
+#define DBG_MACH_ZALLOC        0xA5    /* Zone allocator */
 
 /* Codes for Scheduler (DBG_MACH_SCHED) */
 #define MACH_SCHED              0x0     /* Scheduler */
@@ -440,6 +441,10 @@ extern void kernel_debug_string_simple(const char *message);
 #define SFI_PID_SET_MANAGED            0x8
 #define SFI_PID_CLEAR_MANAGED          0x9
 #define SFI_GLOBAL_DEFER               0xa
+
+/* Codes for Zone Allocator (DBG_MACH_ZALLOC) */
+#define ZALLOC_ZCRAM                   0x0
+
 /* **** The Kernel Debug Sub Classes for Network (DBG_NETWORK) **** */
 #define DBG_NETIP      1       /* Internet Protocol */
 #define DBG_NETARP     2       /* Address Resolution Protocol */
index 443861ddc22f4a9b48569d6ea5d53dcf6292d7bd..afd022407ecad70e842ad0d2a02b5c310b3ad0ec 100644 (file)
@@ -844,6 +844,19 @@ struct proc_fileportinfo {
 #define LISTCOALITIONS_SINGLE_TYPE     2
 #define LISTCOALITIONS_SINGLE_TYPE_SIZE (sizeof(struct procinfo_coalinfo))
 
+/* reasons for proc_can_use_foreground_hw */
+#define PROC_FGHW_OK                     0 /* pid may use foreground HW */
+#define PROC_FGHW_DAEMON_OK              1
+#define PROC_FGHW_DAEMON_LEADER         10 /* pid is in a daemon coalition */
+#define PROC_FGHW_LEADER_NONUI          11 /* coalition leader is in a non-focal state */
+#define PROC_FGHW_LEADER_BACKGROUND     12 /* coalition leader is in a background state */
+#define PROC_FGHW_DAEMON_NO_VOUCHER     13 /* pid is a daemon with no adopted voucher */
+#define PROC_FGHW_NO_VOUCHER_ATTR       14 /* pid has adopted a voucher with no bank/originator attribute */
+#define PROC_FGHW_NO_ORIGINATOR         15 /* pid has adopted a voucher for a process that's gone away */
+#define PROC_FGHW_ORIGINATOR_BACKGROUND 16 /* pid has adopted a voucher for an app that's in the background */
+#define PROC_FGHW_VOUCHER_ERROR         98 /* error in voucher / originator callout */
+#define PROC_FGHW_ERROR                 99 /* syscall parameter/permissions error */
+
 /* __proc_info() call numbers */
 #define PROC_INFO_CALL_LISTPIDS         0x1
 #define PROC_INFO_CALL_PIDINFO          0x2
@@ -856,6 +869,7 @@ struct proc_fileportinfo {
 #define PROC_INFO_CALL_PIDRUSAGE        0x9
 #define PROC_INFO_CALL_PIDORIGINATORINFO 0xa
 #define PROC_INFO_CALL_LISTCOALITIONS   0xb
+#define PROC_INFO_CALL_CANUSEFGHW       0xc
 
 #endif /* PRIVATE */
 
index a3d8487e5bb89d45a1d4da76756e279dccb885ec..f5ad0fcf4286bfc044a713051a33cf8a0180b9af 100644 (file)
@@ -93,6 +93,7 @@ __END_DECLS
  * PL = Process Lock
  * PGL = Process Group Lock
  * PFDL = Process File Desc Lock
+ * PUCL = Process User Credentials Lock
  * PSL = Process Spin Lock
  * PPL = Parent Process Lock (planed for later usage)
  * LL = List Lock
@@ -221,9 +222,10 @@ struct     proc {
        TAILQ_HEAD( ,eventqelt) p_evlist;       /* (PL) */
 
        lck_mtx_t       p_fdmlock;              /* proc lock to protect fdesc */
+       lck_mtx_t       p_ucred_mlock;          /* mutex lock to protect p_ucred */
 
        /* substructures: */
-       kauth_cred_t    p_ucred;                /* Process owner's identity. (PL) */
+       kauth_cred_t    p_ucred;                /* Process owner's identity. (PUCL) */
        struct  filedesc *p_fd;                 /* Ptr to open files structure. (PFDL) */
        struct  pstats *p_stats;                /* Accounting/statistics (PL). */
        struct  plimit *p_limit;                /* Process limits.(PL) */
@@ -661,6 +663,7 @@ extern lck_grp_t * proc_lck_grp;
 #if CONFIG_FINE_LOCK_GROUPS
 extern lck_grp_t * proc_mlock_grp;
 extern lck_grp_t * proc_fdmlock_grp;
+extern lck_grp_t * proc_ucred_mlock_grp;
 extern lck_grp_t * proc_slock_grp;
 #endif
 extern lck_grp_attr_t * proc_lck_grp_attr;
@@ -683,6 +686,8 @@ extern void proc_fdlock(struct proc *);
 extern void proc_fdlock_spin(struct proc *);
 extern void proc_fdunlock(struct proc *);
 extern void proc_fdlock_assert(proc_t p, int assertflags);
+extern void proc_ucred_lock(struct proc *);
+extern void proc_ucred_unlock(struct proc *);
 __private_extern__ int proc_core_name(const char *name, uid_t uid, pid_t pid,
                char *cr_name, size_t cr_name_len);
 extern int isinferior(struct proc *, struct proc *);
@@ -741,6 +746,7 @@ extern proc_t proc_parentholdref(proc_t);
 extern int proc_parentdropref(proc_t, int);
 int     itimerfix(struct timeval *tv);
 int     itimerdecr(struct proc * p, struct itimerval *itp, int usec);
+int  timespec_is_valid(const struct timespec *);
 void proc_signalstart(struct proc *, int locked);
 void proc_signalend(struct proc *, int locked);
 int  proc_transstart(struct proc *, int locked, int non_blocking);
index 96f2519e556ecdc3385975211f7eda389ad4508c..d5f8ac6364ad136f66228c4318f3f0997e4c3f65 100644 (file)
 #define        SIOCGIFFUNCTIONALTYPE   _IOWR('i', 173, struct ifreq) /* get interface functional type */
 #define        SIOCSIFNETSIGNATURE     _IOWR('i', 174, struct if_nsreq)
 #define        SIOCGIFNETSIGNATURE     _IOWR('i', 175, struct if_nsreq)
+
+#define        SIOCGECNMODE            _IOWR('i', 176, struct ifreq)
+#define        SIOCSECNMODE            _IOW('i', 177, struct ifreq)
 #endif /* PRIVATE */
 
 #endif /* !_SYS_SOCKIO_H_ */
index 2cbbba27fa15c68bf95baf555479469d76ab938b..7d5a43c3153f48823c76dd35a81b2fdc360fe6cc 100644 (file)
@@ -217,6 +217,7 @@ void        bsd_timeout(void (*)(void *), void *arg, struct timespec * ts);
 void   bsd_untimeout(void (*)(void *), void *arg);
 void   set_fsblocksize(struct vnode *);
 uint64_t tvtoabstime(struct timeval *);
+uint64_t tstoabstime(struct timespec *);
 void   *throttle_info_create(void);
 void   throttle_info_mount_ref(mount_t mp, void * throttle_info);
 void   throttle_info_mount_rel(mount_t mp);
index 57fe3431adde89bb9ef6d1907863361220ff1461..c565a3df40a7136d6ba5ef7adbbd8fcde609ed94 100644 (file)
@@ -2405,6 +2405,7 @@ next_dwrite:
                goto wait_for_dwrites;
         }
 
+       task_update_logical_writes(current_task(), (io_req_size & ~PAGE_MASK), TASK_WRITE_IMMEDIATE);
        while (io_req_size >= PAGE_SIZE && uio->uio_offset < newEOF && retval == 0) {
                int     throttle_type;
 
@@ -6008,6 +6009,7 @@ cluster_copy_upl_data(struct uio *uio, upl_t upl, int upl_offset, int *io_resid)
        int       retval = 0;
        int       xsize;
        upl_page_info_t *pl;
+       int       dirty_count;
 
        xsize = *io_resid;
 
@@ -6044,10 +6046,13 @@ cluster_copy_upl_data(struct uio *uio, upl_t upl, int upl_offset, int *io_resid)
        pg_offset = upl_offset & PAGE_MASK;
        csize     = min(PAGE_SIZE - pg_offset, xsize);
 
+       dirty_count = 0;
        while (xsize && retval == 0) {
                addr64_t  paddr;
 
                paddr = ((addr64_t)upl_phys_page(pl, pg_index) << PAGE_SHIFT) + pg_offset;
+               if ((uio->uio_rw == UIO_WRITE) && (upl_dirty_page(pl, pg_index) == FALSE)) 
+                       dirty_count++;
 
                retval = uiomove64(paddr, csize, uio);
 
@@ -6060,6 +6065,7 @@ cluster_copy_upl_data(struct uio *uio, upl_t upl, int upl_offset, int *io_resid)
 
        uio->uio_segflg = segflg;
 
+       task_update_logical_writes(current_task(), (dirty_count * PAGE_SIZE), TASK_WRITE_DEFERRED);
        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_END,
                     (int)uio->uio_offset, xsize, retval, segflg, 0);
 
index 23b21860a87cfbf937411cb70cc33056ac963765..714ba335add8c2e61ebd3fff14214f5601f9418a 100644 (file)
@@ -2991,6 +2991,7 @@ journal_modify_block_end(journal *jnl, struct buf *bp, void (*func)(buf_t bp, vo
                blhdr->binfo[i].bnum = (off_t)(buf_blkno(bp));
                blhdr->binfo[i].u.bp = bp;
 
+               task_update_logical_writes(current_task(), (2 * bsize), TASK_WRITE_METADATA);
                KERNEL_DEBUG_CONSTANT(0x3018004, VM_KERNEL_ADDRPERM(vp), blhdr->binfo[i].bnum, bsize, 0, 0);
 
                if (func) {
index 9fec68cd43b416d124bd3681c35b48bc5cda19e1..ca47a42da818d41a88d3c3e0a76be9ea0ff1c993 100644 (file)
@@ -7425,6 +7425,9 @@ vnode_authorize_callback_int(__unused kauth_cred_t unused_cred, __unused void *i
                        goto out;
                }
                
+               /* Assume that there were DENYs so we don't wrongly cache KAUTH_VNODE_SEARCHBYANYONE */
+               found_deny = TRUE;
+               
                KAUTH_DEBUG("%p    ALLOWED - caller is superuser", vp);
        }
 out:
@@ -7807,10 +7810,10 @@ vnode_authattr(vnode_t vp, struct vnode_attr *vap, kauth_action_t *actionp, vfs_
         * If the size is being set, make sure it's not a directory.
         */
        if (VATTR_IS_ACTIVE(vap, va_data_size)) {
-               /* size is meaningless on a directory, don't permit this */
-               if (vnode_isdir(vp)) {
-                       KAUTH_DEBUG("ATTR - ERROR: size change requested on a directory");
-                       error = EISDIR;
+               /* size is only meaningful on regular files, don't permit otherwise */
+               if (!vnode_isreg(vp)) {
+                       KAUTH_DEBUG("ATTR - ERROR: size change requested on non-file");
+                       error = vnode_isdir(vp) ? EISDIR : EINVAL;
                        goto out;
                }
        }
index a949a717d8753b67a5fdf51f7c589e42883feeec..adea23a19e24851d3a2c976671d002c5873f4285 100644 (file)
@@ -414,6 +414,14 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3
 
        AUDIT_ARG(fflags, flags);
 
+#if SECURE_KERNEL
+       if (flags & MNT_UNION) {
+               /* No union mounts on release kernels */
+               error = EPERM;
+               goto out;
+       }
+#endif
+
        if ((vp->v_flag & VROOT) &&
                        (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
                if (!(flags & MNT_UNION)) {
@@ -431,7 +439,7 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3
                        flags = (flags & ~(MNT_UPDATE));
                }
 
-#ifdef SECURE_KERNEL
+#if SECURE_KERNEL
                if ((flags & MNT_RDONLY) == 0) {
                        /* Release kernels are not allowed to mount "/" as rw */
                        error = EPERM;
@@ -6500,7 +6508,7 @@ copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
                return(EINVAL);
        }
 
-       NDINIT(&fromnd, LOOKUP, OP_COPYFILE, SAVESTART | AUDITVNPATH1,
+       NDINIT(&fromnd, LOOKUP, OP_COPYFILE, AUDITVNPATH1,
                UIO_USERSPACE, uap->from, ctx);
        if ((error = namei(&fromnd)))
                return (error);
@@ -6555,8 +6563,6 @@ out:
 out1:
        vnode_put(fvp);
 
-       if (fromnd.ni_startdir)
-               vnode_put(fromnd.ni_startdir);
        nameidone(&fromnd);
 
        if (error == -1)
index 29c12c6f5efcf09ed367863ce48ed12ecca52b7c..958678722f49a7aad228343fa278515474342c7b 100644 (file)
@@ -741,18 +741,6 @@ __ZN18IOTimerEventSourceC2EPK11OSMetaClass
 __ZN18IOTimerEventSourceC2Ev
 __ZN18IOTimerEventSourceD0Ev
 __ZN18IOTimerEventSourceD2Ev
-__ZN18IOUserNotification10gMetaClassE
-__ZN18IOUserNotification10superClassE
-__ZN18IOUserNotification15setNotificationEP10IONotifier
-__ZN18IOUserNotification4freeEv
-__ZN18IOUserNotification4initEv
-__ZN18IOUserNotification5resetEv
-__ZN18IOUserNotification7isValidEv
-__ZN18IOUserNotification9MetaClassC1Ev
-__ZN18IOUserNotification9MetaClassC2Ev
-__ZN18IOUserNotification9metaClassE
-__ZN18IOUserNotificationC2EPK11OSMetaClass
-__ZN18IOUserNotificationD2Ev
 __ZN18_IOServiceNotifier10gMetaClassE
 __ZN18_IOServiceNotifier10superClassE
 __ZN18_IOServiceNotifier4freeEv
index 3aded3442183b6fb599447d5db522cb1730bee5c..a378f284b59cbe5fc1484d5d321b11fd73262549 100644 (file)
@@ -1,4 +1,4 @@
-15.0.0
+15.2.0
 
 # The first line of this file contains the master version number for the kernel.
 # All other instances of the kernel version in xnu are derived from this file.
index bea0d10f06164c262f65320705d90bd16be39b95..061c64ae3fcd3a48358f6b039100d36cee0dba03 100644 (file)
@@ -110,5 +110,4 @@ enum {
     kIOClassNameOverrideNone = 0x00000001,
 };
 
-
 #endif /* ! _IOKIT_IOKITKEYSPRIVATE_H */
index 21ca1a97ae19652454c178ad18031825b129fa3f..2ecc15e63ccfecfb4f638cc4376884a0681bfb01 100644 (file)
@@ -507,6 +507,7 @@ public:
 
     bool        activitySinceSleep(void);
     bool        abortHibernation(void);
+    void        updateConsoleUsers(void);
 
     IOReturn    joinAggressiveness( IOService * service );
     void        handleAggressivesRequests( void );
@@ -718,6 +719,7 @@ private:
     unsigned int            toldPowerdCapWillChange :1;
     unsigned int            displayPowerOnRequested:1;
 
+    uint8_t                 tasksSuspended;
     uint32_t                hibernateMode;
     AbsoluteTime            userActivityTime;
     AbsoluteTime            userActivityTime_prev;
index 17cd841bcefa9f0776f5d9487e0c3ae9f5c0cc50..ebaa3087d4a4370b5113738c244678eaa8282bde 100644 (file)
@@ -297,7 +297,10 @@ bool IODTNVRAM::serializeProperties(OSSerialize *s) const
           && (current_task() == kernel_task || mac_iokit_check_nvram_get(kauth_cred_get(), key->getCStringNoCopy()) == 0)
 #endif
          ) { }
-      else dict->removeObject(key);
+      else {
+        dict->removeObject(key);
+        iter->reset();
+      }
     }
   }
 
index a2762212863f3bb29974ac0314f860bbeaa61131..2d6c7b79fa3d3f5e4b00ca1286b495ab75091792 100644 (file)
@@ -664,8 +664,19 @@ IOPMrootDomain * IOPMrootDomain::construct( void )
 //******************************************************************************
 
 static void updateConsoleUsersCallout(thread_call_param_t p0, thread_call_param_t p1)
+{
+    IOPMrootDomain * rootDomain = (IOPMrootDomain *) p0;
+    rootDomain->updateConsoleUsers();
+}
+
+void IOPMrootDomain::updateConsoleUsers(void)
 {
     IOService::updateConsoleUsers(NULL, kIOMessageSystemHasPoweredOn);
+    if (tasksSuspended)
+    {
+        tasksSuspended = FALSE;
+        tasks_system_suspend(tasksSuspended);
+    }
 }
 
 //******************************************************************************
@@ -3077,6 +3088,16 @@ void IOPMrootDomain::willNotifyPowerChildren( IOPMPowerStateIndex newPowerState
 
     if (SLEEP_STATE == newPowerState)
     {
+        if (!tasksSuspended)
+        {
+           AbsoluteTime deadline;
+           tasksSuspended = TRUE;
+           tasks_system_suspend(tasksSuspended);
+
+           clock_interval_to_deadline(10, kSecondScale, &deadline);
+           vm_pageout_wait(AbsoluteTime_to_scalar(&deadline));
+        }
+
 #if HIBERNATION
         IOHibernateSystemSleep();
         IOHibernateIOKitSleep();
index bed5b5e4e5917bc23d33816cf30dcf0bf346c554..6ff5f289d8f1ea70fc85f7801a0fd8e7ebf8b25b 100644 (file)
@@ -411,6 +411,7 @@ void IOService::initialize( void )
     gIOStopProviderList    = OSArray::withCapacity( 16 );
     gIOFinalizeList       = OSArray::withCapacity( 16 );
     assert( gIOTerminatePhase2List && gIOStopList && gIOStopProviderList && gIOFinalizeList );
+
 }
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
@@ -4039,6 +4040,7 @@ OSObject * IOService::copyExistingServices( OSDictionary * matching,
            const OSSymbol * sym = OSSymbol::withString(str);
            OSMetaClass::applyToInstancesOfClassName(sym, instanceMatch, &ctx);
            sym->release();
+
        }
        else
        {
@@ -5071,7 +5073,9 @@ bool IOService::matchInternal(OSDictionary * table, uint32_t options, uint32_t *
     {
        count = table->getCount();
        done = 0;
+
        str = OSDynamicCast(OSString, table->getObject(gIOProviderClassKey));
+
        if (str) {
            done++;
            match = ((kIOServiceClassDone & options) || (0 != metaCast(str)));
@@ -5232,6 +5236,7 @@ bool IOService::matchPassive(OSDictionary * table, uint32_t options)
         do
         {
            count = table->getCount();
+            
            if (!(kIOServiceInternalDone & options))
            {
                match = where->matchInternal(table, options, &done);
@@ -5244,7 +5249,7 @@ bool IOService::matchPassive(OSDictionary * table, uint32_t options)
 
             // do family specific matching
             match = where->matchPropertyTable( table, &score );
-
+            
             if( !match) {
 #if IOMATCHDEBUG
                 if( kIOLogMatch & getDebugFlags( table ))
@@ -5267,7 +5272,8 @@ bool IOService::matchPassive(OSDictionary * table, uint32_t options)
 
             nextTable = OSDynamicCast(OSDictionary,
                   table->getObject( gIOParentMatchKey ));
-            if( nextTable) {
+            if(nextTable) {
+                
                // look for a matching entry anywhere up to root
                 match = false;
                 matchParent = true;
index 6c9ec5df7e1248ae9717c5862c8e2f65f0381302..8c628ddf1f83c7ee367a4c792fc1793ee69de629 100644 (file)
@@ -354,16 +354,32 @@ void IOMachPort::free( void )
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
-class IOUserNotification : public OSIterator
+class IOUserIterator : public OSIterator
+{
+    OSDeclareDefaultStructors(IOUserIterator)
+public:
+    OSObject   *       userIteratorObject;
+    IOLock     *       lock;
+
+    static IOUserIterator * withIterator(OSIterator * iter);
+    virtual bool init( void ) APPLE_KEXT_OVERRIDE;
+    virtual void free() APPLE_KEXT_OVERRIDE;
+
+    virtual void reset() APPLE_KEXT_OVERRIDE;
+    virtual bool isValid() APPLE_KEXT_OVERRIDE;
+    virtual OSObject * getNextObject() APPLE_KEXT_OVERRIDE;
+};
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+class IOUserNotification : public IOUserIterator
 {
     OSDeclareDefaultStructors(IOUserNotification)
 
-    IONotifier         *       holdNotify;
-    IOLock     *       lock;
+#define holdNotify     userIteratorObject
 
 public:
 
-    virtual bool init( void ) APPLE_KEXT_OVERRIDE;
     virtual void free() APPLE_KEXT_OVERRIDE;
 
     virtual void setNotification( IONotifier * obj );
@@ -372,6 +388,84 @@ public:
     virtual bool isValid() APPLE_KEXT_OVERRIDE;
 };
 
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+OSDefineMetaClassAndStructors( IOUserIterator, OSIterator )
+
+IOUserIterator *
+IOUserIterator::withIterator(OSIterator * iter)
+{
+    IOUserIterator * me;
+
+    if (!iter) return (0);
+
+    me = new IOUserIterator;
+    if (me && !me->init())
+    {
+       me->release();
+       me = 0;
+    }
+    if (!me) return me;
+    me->userIteratorObject = iter;
+
+    return (me);
+}
+
+bool
+IOUserIterator::init( void )
+{
+    if (!OSObject::init()) return (false);
+
+    lock = IOLockAlloc();
+    if( !lock)
+        return( false );
+
+    return (true);
+}
+
+void
+IOUserIterator::free()
+{
+    if (userIteratorObject) userIteratorObject->release();
+    if (lock) IOLockFree(lock);
+    OSObject::free();
+}
+
+void
+IOUserIterator::reset()
+{
+    IOLockLock(lock);
+    assert(OSDynamicCast(OSIterator, userIteratorObject));
+    ((OSIterator *)userIteratorObject)->reset();
+    IOLockUnlock(lock);
+}
+
+bool
+IOUserIterator::isValid()
+{
+    bool ret;
+
+    IOLockLock(lock);
+    assert(OSDynamicCast(OSIterator, userIteratorObject));
+    ret = ((OSIterator *)userIteratorObject)->isValid();
+    IOLockUnlock(lock);
+
+    return (ret);
+}
+
+OSObject *
+IOUserIterator::getNextObject()
+{
+    OSObject * ret;
+
+    IOLockLock(lock);
+    assert(OSDynamicCast(OSIterator, userIteratorObject));
+    ret = ((OSIterator *)userIteratorObject)->getNextObject();
+    IOLockUnlock(lock);
+
+    return (ret);
+}
+
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 extern "C" {
 
@@ -513,40 +607,29 @@ public:
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
 #undef super
-#define super OSIterator
-OSDefineMetaClass( IOUserNotification, OSIterator )
-OSDefineAbstractStructors( IOUserNotification, OSIterator )
+#define super IOUserIterator
+OSDefineMetaClass( IOUserNotification, IOUserIterator )
+OSDefineAbstractStructors( IOUserNotification, IOUserIterator )
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
-bool IOUserNotification::init( void )
-{
-    if( !super::init())
-       return( false );
-
-    lock = IOLockAlloc();
-    if( !lock)
-        return( false );
-
-    return( true );
-}
-
 void IOUserNotification::free( void )
 {
-    if( holdNotify)
-       holdNotify->remove();
+    if (holdNotify)
+    {
+       assert(OSDynamicCast(IONotifier, holdNotify));
+       ((IONotifier *)holdNotify)->remove();
+       holdNotify = 0;
+    }
     // can't be in handler now
 
-    if( lock)
-       IOLockFree( lock );
-
     super::free();
 }
 
 
 void IOUserNotification::setNotification( IONotifier * notify )
 {
-    IONotifier * previousNotify;
+    OSObject * previousNotify;
 
     IOLockLock( gIOObjectPortLock);
 
@@ -556,7 +639,10 @@ void IOUserNotification::setNotification( IONotifier * notify )
     IOLockUnlock( gIOObjectPortLock);
 
     if( previousNotify)
-       previousNotify->remove();
+    {
+       assert(OSDynamicCast(IONotifier, previousNotify));
+       ((IONotifier *)previousNotify)->remove();
+    }
 }
 
 void IOUserNotification::reset()
@@ -1503,6 +1589,14 @@ extern "C" {
        if( !(out = OSDynamicCast( cls, obj)))  \
            return( kIOReturnBadArgument )
 
+#define CHECKLOCKED(cls,obj,out)                                       \
+       IOUserIterator * oIter;                                         \
+       cls * out;                                                      \
+       if( !(oIter = OSDynamicCast(IOUserIterator, obj)))              \
+           return (kIOReturnBadArgument);                              \
+       if( !(out = OSDynamicCast(cls, oIter->userIteratorObject)))     \
+           return (kIOReturnBadArgument)
+
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
 // Create a vm_map_copy_t or kalloc'ed data for memory
@@ -1671,6 +1765,7 @@ kern_return_t is_io_iterator_next(
        io_object_t iterator,
        io_object_t *object )
 {
+    IOReturn    ret;
     OSObject * obj;
 
     CHECK( OSIterator, iterator, iter );
@@ -1679,9 +1774,11 @@ kern_return_t is_io_iterator_next(
     if( obj) {
        obj->retain();
        *object = obj;
-        return( kIOReturnSuccess );
+        ret = kIOReturnSuccess;
     } else
-        return( kIOReturnNoDevice );
+        ret = kIOReturnNoDevice;
+
+    return (ret);
 }
 
 /* Routine io_iterator_reset */
@@ -1723,6 +1820,7 @@ static kern_return_t internal_io_service_match_property_table(
     obj = matching_size ? OSUnserializeXML(matching, matching_size)
                        : OSUnserializeXML(matching);
     if( (dict = OSDynamicCast( OSDictionary, obj))) {
+
         *matches = service->passiveMatch( dict );
        kr = kIOReturnSuccess;
     } else
@@ -1795,7 +1893,7 @@ static kern_return_t internal_io_service_get_matching_services(
     obj = matching_size ? OSUnserializeXML(matching, matching_size)
                        : OSUnserializeXML(matching);
     if( (dict = OSDynamicCast( OSDictionary, obj))) {
-        *existing = IOService::getMatchingServices( dict );
+        *existing = IOUserIterator::withIterator(IOService::getMatchingServices( dict ));
        kr = kIOReturnSuccess;
     } else
        kr = kIOReturnBadArgument;
@@ -2277,8 +2375,9 @@ kern_return_t is_io_registry_create_iterator(
     if( master_port != master_device_port)
         return( kIOReturnNotPrivileged);
 
-    *iterator = IORegistryIterator::iterateOver(
-       IORegistryEntry::getPlane( plane ), options );
+    *iterator = IOUserIterator::withIterator(
+       IORegistryIterator::iterateOver(
+               IORegistryEntry::getPlane( plane ), options ));
 
     return( *iterator ? kIOReturnSuccess : kIOReturnBadArgument );
 }
@@ -2292,8 +2391,9 @@ kern_return_t is_io_registry_entry_create_iterator(
 {
     CHECK( IORegistryEntry, registry_entry, entry );
 
-    *iterator = IORegistryIterator::iterateOver( entry,
-       IORegistryEntry::getPlane( plane ), options );
+    *iterator = IOUserIterator::withIterator(
+       IORegistryIterator::iterateOver( entry,
+               IORegistryEntry::getPlane( plane ), options ));
 
     return( *iterator ? kIOReturnSuccess : kIOReturnBadArgument );
 }
@@ -2302,9 +2402,11 @@ kern_return_t is_io_registry_entry_create_iterator(
 kern_return_t is_io_registry_iterator_enter_entry(
        io_object_t iterator )
 {
-    CHECK( IORegistryIterator, iterator, iter );
+    CHECKLOCKED( IORegistryIterator, iterator, iter );
 
+    IOLockLock(oIter->lock);
     iter->enterEntry();
+    IOLockUnlock(oIter->lock);
 
     return( kIOReturnSuccess );
 }
@@ -2315,9 +2417,11 @@ kern_return_t is_io_registry_iterator_exit_entry(
 {
     bool       didIt;
 
-    CHECK( IORegistryIterator, iterator, iter );
+    CHECKLOCKED( IORegistryIterator, iterator, iter );
 
+    IOLockLock(oIter->lock);
     didIt = iter->exitEntry();
+    IOLockUnlock(oIter->lock);
 
     return( didIt ? kIOReturnSuccess : kIOReturnNoDevice );
 }
index ee046b925b52855b57b22ab4933ce25d8d29bc72..fb623933c21675e9ef1af2f7b6b28c7b02812f55 100644 (file)
@@ -75,6 +75,12 @@ EXPORT_MI_LIST = \
        kxld_types.h           \
        stack_protector.h
 
+INSTALL_KF_MI_LCL_LIST +=          \
+       section_keywords.h
+
+EXPORT_MI_LIST +=          \
+       section_keywords.h
+
 EXPORT_MI_GEN_LIST = version.h
 
 EXPORT_MI_DIR = libkern
diff --git a/libkern/libkern/section_keywords.h b/libkern/libkern/section_keywords.h
new file mode 100644 (file)
index 0000000..511d9db
--- /dev/null
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _SECTION_KEYWORDS_H
+#define _SECTION_KEYWORDS_H
+
+
+/* Default behaviour */
+#ifndef SECURITY_READ_ONLY_EARLY
+#define __PLACE_IN_SECTION(__segment__section) \
+       __attribute__((used, section(__segment__section)))
+
+#define SECURITY_READ_ONLY_SPECIAL_SECTION(_t,__segment__section) \
+       const _t __PLACE_IN_SECTION(__segment__section)
+
+#define SECURITY_READ_ONLY_EARLY(_t) const _t
+
+#define SECURITY_READ_ONLY_LATE(_t) _t
+
+#define SECURITY_READ_WRITE(_t) _t __attribute__((used))
+#endif /* SECURITY_READ_ONLY_EARLY */
+
+
+#endif /* _SECTION_KEYWORDS_H_ */
index 020291bc1bad733a66ebb11fb5d005f0356f457e..f90ac37a73baf9bc33ab4647cf23cd4027ec4188 100644 (file)
@@ -331,8 +331,15 @@ voidpf zcalloc (opaque, items, size)
     unsigned size;
 {
     if (opaque) items += size - size; /* make compiler happy */
-    return sizeof(uInt) > 2 ? (voidpf)malloc(items * size) :
-                              (voidpf)calloc(items, size);
+    if (sizeof(uInt) > 2) {
+        /*
+            to prevent use of uninitialized memory, malloc and bzero
+        */
+        voidpf  p = malloc(items * size);
+        bzero(p, items * size); 
+        return p;
+    } else
+        return (voidpf)calloc(items, size);
 }
 
 void  zcfree (opaque, ptr)
index 8243933765dd745682d6fbba8a80a1e20b96387e..10371199fa4636cdf985ff9813c47d829af442ea 100644 (file)
                74119F46188F3B6A00C6F48F /* vm_page_size.h in Headers */ = {isa = PBXBuildFile; fileRef = 7466C923170CB99B004557CC /* vm_page_size.h */; };
                7466C924170CBA53004557CC /* vm_page_size.h in Headers */ = {isa = PBXBuildFile; fileRef = 7466C923170CB99B004557CC /* vm_page_size.h */; };
                74F3290B18EB269400B2B70E /* vm_page_size.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 7466C923170CB99B004557CC /* vm_page_size.h */; };
+               978228281B8678DC008385AC /* pselect-darwinext.c in Sources */ = {isa = PBXBuildFile; fileRef = 978228271B8678CB008385AC /* pselect-darwinext.c */; };
+               978228291B8678DF008385AC /* pselect-darwinext-cancel.c in Sources */ = {isa = PBXBuildFile; fileRef = 978228261B8678C2008385AC /* pselect-darwinext-cancel.c */; };
                7AE28FDF18AC41B1006A5626 /* csr.c in Sources */ = {isa = PBXBuildFile; fileRef = 7AE28FDE18AC41B1006A5626 /* csr.c */; };
                9002401118FC9A7F00D73BFA /* rename_ext.c in Sources */ = {isa = PBXBuildFile; fileRef = 906AA2D018F74CD1001C681A /* rename_ext.c */; };
+               928336A11B83ED9100873B90 /* thread_register_state.c in Sources */ = {isa = PBXBuildFile; fileRef = 928336A01B83ED7800873B90 /* thread_register_state.c */; };
+               9299E14A1B841E74005B7350 /* thread_state.h in Headers */ = {isa = PBXBuildFile; fileRef = 928336A21B8412C100873B90 /* thread_state.h */; };
+               9299E14B1B841F59005B7350 /* thread_state.h in Headers */ = {isa = PBXBuildFile; fileRef = 928336A21B8412C100873B90 /* thread_state.h */; };
                A59CB95616669EFB00B064B3 /* stack_logging_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = A59CB95516669DB700B064B3 /* stack_logging_internal.h */; };
                A59CB9581666A1A200B064B3 /* munmap.c in Sources */ = {isa = PBXBuildFile; fileRef = A59CB9571666A1A200B064B3 /* munmap.c */; };
                BA0D9FB1199031AD007E8A73 /* kdebug_trace.c in Sources */ = {isa = PBXBuildFile; fileRef = BA0D9FB0199031AD007E8A73 /* kdebug_trace.c */; };
                E4D45C3F16FB20D30002AF25 /* spawn.h in Headers */ = {isa = PBXBuildFile; fileRef = E4D45C3D16FB20970002AF25 /* spawn.h */; settings = {ATTRIBUTES = (Public, ); }; };
                E4D45C4016FB20DC0002AF25 /* spawn_private.h in Headers */ = {isa = PBXBuildFile; fileRef = E4D45C3E16FB20970002AF25 /* spawn_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
                E4D7E55C16F8776300F92D8D /* index.c in Sources */ = {isa = PBXBuildFile; fileRef = E4D7E55316F8776300F92D8D /* index.c */; };
-               E4D7E55E16F8776300F92D8D /* memset.c in Sources */ = {isa = PBXBuildFile; fileRef = E4D7E55516F8776300F92D8D /* memset.c */; };
+               E4D7E55E16F8776300F92D8D /* memset.c in Sources */ = {isa = PBXBuildFile; fileRef = E4D7E55516F8776300F92D8D /* memset.c */; settings = {COMPILER_FLAGS = "-fno-builtin"; }; };
                E4D7E55F16F8776300F92D8D /* strcmp.c in Sources */ = {isa = PBXBuildFile; fileRef = E4D7E55616F8776300F92D8D /* strcmp.c */; };
                E4D7E56016F8776300F92D8D /* strcpy.c in Sources */ = {isa = PBXBuildFile; fileRef = E4D7E55716F8776300F92D8D /* strcpy.c */; };
                E4D7E56116F8776300F92D8D /* strlcpy.c in Sources */ = {isa = PBXBuildFile; fileRef = E4D7E55916F8776300F92D8D /* strlcpy.c */; };
                4BDD5F1C1891AB2F004BF300 /* mach_approximate_time.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = mach_approximate_time.s; sourceTree = "<group>"; };
                72B1E6EC190723DB00FB3FA2 /* guarded_open_dprotected_np.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = guarded_open_dprotected_np.c; sourceTree = "<group>"; };
                7466C923170CB99B004557CC /* vm_page_size.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = vm_page_size.h; sourceTree = "<group>"; };
+               978228261B8678C2008385AC /* pselect-darwinext-cancel.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "pselect-darwinext-cancel.c"; sourceTree = "<group>"; };
+               978228271B8678CB008385AC /* pselect-darwinext.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "pselect-darwinext.c"; sourceTree = "<group>"; };
                7AE28FDE18AC41B1006A5626 /* csr.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = csr.c; sourceTree = "<group>"; };
                906AA2D018F74CD1001C681A /* rename_ext.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = rename_ext.c; sourceTree = "<group>"; };
+               928336A01B83ED7800873B90 /* thread_register_state.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = thread_register_state.c; sourceTree = "<group>"; };
+               928336A21B8412C100873B90 /* thread_state.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = thread_state.h; sourceTree = "<group>"; };
                A59CB95516669DB700B064B3 /* stack_logging_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = stack_logging_internal.h; sourceTree = "<group>"; };
                A59CB9571666A1A200B064B3 /* munmap.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = munmap.c; sourceTree = "<group>"; };
                BA0D9FB0199031AD007E8A73 /* kdebug_trace.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = kdebug_trace.c; sourceTree = "<group>"; };
                                C6460B7B182025DF00F73CCA /* sfi.c */,
                                24B223B3121DFF12007DAEDE /* sigsuspend-base.c */,
                                13B598931A142F5900DB2D5A /* stackshot.c */,
+                               928336A01B83ED7800873B90 /* thread_register_state.c */,
                                248AA962122C7B2A0085F5B1 /* unlink.c */,
                                29A59AE5183B110C00E8B896 /* unlinkat.c */,
                                374A36E214748EE400AAF39D /* varargs_wrappers.s */,
                248BA04A121C8EE4008C073F /* cancelable */ = {
                        isa = PBXGroup;
                        children = (
+                               978228271B8678CB008385AC /* pselect-darwinext.c */,
+                               978228261B8678C2008385AC /* pselect-darwinext-cancel.c */,
                                248BA04B121C8EE4008C073F /* fcntl-base.c */,
                                248BA04E121C8F06008C073F /* fcntl.c */,
                                248BA051121C8FE2008C073F /* fcntl-cancel.c */,
                                C9D9BCDD114B00600000D8B9 /* mach_interface.h */,
                                C9D9BCDF114B00600000D8B9 /* port_obj.h */,
                                C9D9BCE0114B00600000D8B9 /* sync.h */,
+                               928336A21B8412C100873B90 /* thread_state.h */,
                                C9D9BCE3114B00600000D8B9 /* vm_task.h */,
                                7466C923170CB99B004557CC /* vm_page_size.h */,
                        );
                                C6C401241741566D000AE69F /* gethostuuid_private.h in Headers */,
                                C6D3EFB916542C510052CF30 /* mach.h in Headers */,
                                C6D3EFBA16542C510052CF30 /* mach_error.h in Headers */,
+                               9299E14B1B841F59005B7350 /* thread_state.h in Headers */,
                                C6D3EFBB16542C510052CF30 /* mach_init.h in Headers */,
                                C6D3EFBC16542C510052CF30 /* mach_interface.h in Headers */,
                                C6D3EFBD16542C510052CF30 /* port_obj.h in Headers */,
                                C9D9BD26114B00600000D8B9 /* mach.h in Headers */,
                                C9D9BD27114B00600000D8B9 /* mach_error.h in Headers */,
                                C9D9BD28114B00600000D8B9 /* mach_init.h in Headers */,
+                               9299E14A1B841E74005B7350 /* thread_state.h in Headers */,
                                C6C40122174155E3000AE69F /* gethostuuid_private.h in Headers */,
                                C9D9BD29114B00600000D8B9 /* mach_interface.h in Headers */,
                                C9D9BD2B114B00600000D8B9 /* port_obj.h in Headers */,
                                C9D9BD56114B00600000D8B9 /* slot_name.c in Sources */,
                                24484A7511F6178E00E10CD2 /* string.c in Sources */,
                                E453AF351700FD3C00F2C94C /* getiopolicy_np.c in Sources */,
+                               978228281B8678DC008385AC /* pselect-darwinext.c in Sources */,
                                2485235511582D8F0051B413 /* mach_legacy.c in Sources */,
                                242AB66611EBDC1200107336 /* errno.c in Sources */,
                                E4D45C2E16F868ED0002AF25 /* libproc.c in Sources */,
                                24A7C5C711FF8DA6007669EB /* sendto.c in Sources */,
                                24A7C5C811FF8DA6007669EB /* setattrlist.c in Sources */,
                                24A7C5C911FF8DA6007669EB /* socketpair.c in Sources */,
+                               928336A11B83ED9100873B90 /* thread_register_state.c in Sources */,
                                9002401118FC9A7F00D73BFA /* rename_ext.c in Sources */,
                                2419382B12135FF6003CDE41 /* chmod.c in Sources */,
                                248BA01D121C56BF008C073F /* connect.c in Sources */,
                                248AA967122C7CDA0085F5B1 /* rename.c in Sources */,
                                24B8C2621237F53900D36CC3 /* remove-counter.c in Sources */,
                                C99A4F501305B2BD0054B7B7 /* __get_cpu_capabilities.s in Sources */,
+                               978228291B8678DF008385AC /* pselect-darwinext-cancel.c in Sources */,
                                C99A4F531305B43F0054B7B7 /* init_cpu_capabilities.c in Sources */,
                                030B179B135377B400DAD1F0 /* open_dprotected_np.c in Sources */,
                                E4D45C3116F868ED0002AF25 /* proc_listpidspath.c in Sources */,
index bc059a5dd7b35742858fc070daec8f7b980e1db3..0cfc2646aa7db53f147579eae6df4a83ac950def 100644 (file)
@@ -85,6 +85,11 @@ static const char * const err_codes_kern[] = {
        "(os/kern) let orphan continue",        /* 45 */
        "(os/kern) service not supported",
        "(os/kern) remote node down",
+       "(os/kern) thread not waiting",
+       "(os/kern) operation timed out",
+       "(os/kern) code signing error",         /* 50 */
+       "(os/kern) policy is static",
+       "(os/kern) insufficient input buffer size",
 };
 
 static const char * const err_codes_unix[] = {
diff --git a/libsyscall/mach/mach/thread_state.h b/libsyscall/mach/mach/thread_state.h
new file mode 100644 (file)
index 0000000..42abc4e
--- /dev/null
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2015 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef        _MACH_THREAD_STATE_H_
+#define _MACH_THREAD_STATE_H_
+
+#include <Availability.h>
+#include <mach/mach.h>
+
+#ifndef KERNEL
+/*
+ * Gets all register values in the target thread with pointer-like contents.
+ * There's no guarantee that the returned values are valid pointers, but all
+ * valid pointers will be returned.  The order and count of the provided
+ * register values is unspecified and may change; registers with values that
+ * are not valid pointers may be omitted, so the number of pointers returned
+ * may vary from call to call.
+ *
+ * sp is an out parameter that will contain the stack pointer
+ * length is an in/out parameter for the length of the values array
+ * values is an array of pointers
+ *
+ * This may only be called on threads in the current task.  If the current
+ * platform defines a stack red zone, the stack pointer returned will be
+ * adjusted to account for red zone.
+ *
+ * If length is insufficient KERN_INSUFFICIENT_BUFFER_SIZE will be returned and
+ * length set to the amount of memory required.  Callers MUST NOT assume that
+ * any particular size of buffer will be sufficient and should retry with an
+ * aproproately sized buffer upon this error.
+ */
+__OSX_UNAVAILABLE
+__IOS_UNAVAILABLE
+__TVOS_AVAILABLE(9.0)
+__WATCHOS_UNAVAILABLE
+kern_return_t thread_get_register_pointer_values(thread_t thread, uintptr_t *sp, size_t *length, uintptr_t *values);
+#endif
+
+#endif /* _MACH_THREAD_STATE_H_ */
diff --git a/libsyscall/mach/tvos_prohibited_mig.txt b/libsyscall/mach/tvos_prohibited_mig.txt
new file mode 100644 (file)
index 0000000..5b432b2
--- /dev/null
@@ -0,0 +1,47 @@
+__TVOS_PROHIBITED
+thread_terminate
+act_get_state
+act_set_state
+thread_depress_abort
+thread_get_special_port
+thread_set_special_port
+thread_set_exception_ports
+thread_get_exception_ports
+thread_swap_exception_ports
+thread_get_mach_voucher
+thread_set_mach_voucher
+thread_swap_mach_voucher
+mach_ports_register
+mach_ports_lookup
+task_suspend
+task_resume
+task_set_info
+task_get_special_port
+task_set_special_port
+thread_create
+thread_create_running
+task_set_exception_ports
+task_get_exception_ports
+task_swap_exception_ports
+task_policy_set
+task_policy_get
+task_zone_info
+task_get_state
+task_set_state
+task_set_phys_footprint_limit
+task_suspend2
+task_resume2
+task_get_mach_voucher
+task_set_mach_voucher
+task_swap_mach_voucher
+task_set_port_space
+host_request_notification
+host_info
+task_wire
+mach_port_allocate_name
+host_create_mach_voucher
+host_register_mach_voucher_attr_manager
+host_register_well_known_mach_voucher_attr_manager
+host_set_atm_diagnostic_flag
+host_get_atm_diagnostic_flag
+
index 4d27c6243564a23164c7d869834597833bdb8f7a..5201c016535925b94109c3cbdf7febdf57f2fe4a 100644 (file)
@@ -1,4 +1,4 @@
-__WATCHOS_PROHIBITED __TVOS_PROHIBITED
+__WATCHOS_PROHIBITED
 thread_terminate
 act_get_state
 act_set_state
diff --git a/libsyscall/wrappers/cancelable/pselect-darwinext-cancel.c b/libsyscall/wrappers/cancelable/pselect-darwinext-cancel.c
new file mode 100644 (file)
index 0000000..54ea913
--- /dev/null
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#define VARIANT_CANCELABLE
+#define VARIANT_DARWIN_EXTSN
+
+#include "../select-base.c"
diff --git a/libsyscall/wrappers/cancelable/pselect-darwinext.c b/libsyscall/wrappers/cancelable/pselect-darwinext.c
new file mode 100644 (file)
index 0000000..4bfb1b7
--- /dev/null
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#undef __DARWIN_NON_CANCELABLE
+#define __DARWIN_NON_CANCELABLE 1
+#define VARIANT_DARWIN_EXTSN
+
+#include "../select-base.c"
index 09f8816f662b53e0e8cb7b9c95dbcac402bb962a..f688d6f36bbcdc00f95711157a2b4a35fb1c41d8 100644 (file)
 #define __DARWIN_NON_CANCELABLE 0
 #endif /* __LP64__ && (VARIANT_CANCELABLE || VARIANT_PRE1050) */
 
+#if defined(VARIANT_DARWIN_EXTSN)
+#define _DARWIN_C_SOURCE
+#define _DARWIN_UNLIMITED_SELECT
+#endif
+
 #include <sys/select.h>
+#include <sys/time.h>
+#include <sys/signal.h>
 #include "_errno.h"
 
 #if defined(VARIANT_CANCELABLE) || defined(VARIANT_PRE1050)
+#if !defined(VARIANT_DARWIN_EXTSN)
 extern int __select(int, fd_set * __restrict, fd_set * __restrict,
        fd_set * __restrict, struct timeval * __restrict);
+#endif
+int __pselect(int, fd_set * __restrict, fd_set * __restrict,
+       fd_set * __restrict, const struct timespec * __restrict, const sigset_t * __restrict);
 #else /* !VARIANT_CANCELABLE && !VARIANT_PRE1050 */
+#if !defined(VARIANT_DARWIN_EXTSN)
 int __select_nocancel(int, fd_set * __restrict, fd_set * __restrict,
        fd_set * __restrict, struct timeval * __restrict);
+#endif
+int __pselect_nocancel(int, fd_set * __restrict, fd_set * __restrict,
+       fd_set * __restrict, const struct timespec * __restrict, const sigset_t * __restrict);
 #endif /* VARIANT_CANCELABLE || VARIANT_PRE1050 */
 
+#if !defined(VARIANT_DARWIN_EXTSN)
 /*
- * select stub, return error if nfds > FD_SETSIZE
- * add pthread cancelability
- * mandated for conformance.
- *
- * This is only for (non DARWINEXTSN) UNIX03 (both cancelable and
- * non-cancelable) and for legacy
+ * select() implementation for 1050 and legacy (cancelable and non-cancelable)
+ * variants. The darwin extension variants (both cancelable & non-cancelable) are
+ * mapped directly to the syscall stub.
  */
 int
 select(int nfds, fd_set * __restrict readfds, fd_set * __restrict writefds,
@@ -55,7 +68,6 @@ select(int nfds, fd_set * __restrict readfds, fd_set * __restrict writefds,
 #endif /* VARIANT_LEGACY || VARIANT_PRE1050 */
        )
 {
-
 #if defined(VARIANT_LEGACY) || defined(VARIANT_PRE1050)
        struct timeval tb, *timeout;
 
@@ -66,17 +78,111 @@ select(int nfds, fd_set * __restrict readfds, fd_set * __restrict writefds,
                tb.tv_sec = 0;
                tb.tv_usec = 10000;
                timeout = &tb;
-       } else
+       } else {
                timeout = intimeout;
+       }
 #else /* !VARIANT_LEGACY && !VARIANT_PRE1050 */
        if (nfds > FD_SETSIZE) {
                errno = EINVAL;
                return -1;
        }
-#endif /* VARIANT_LEGACY || VARIANT_PRE1050 */
+#endif
+
 #if defined(VARIANT_CANCELABLE) || defined(VARIANT_PRE1050)
        return __select(nfds, readfds, writefds, exceptfds, timeout);
 #else /* !VARIANT_CANCELABLE && !VARIANT_PRE1050 */
        return __select_nocancel(nfds, readfds, writefds, exceptfds, timeout);
 #endif /* VARIANT_CANCELABLE || VARIANT_PRE1050 */
 }
+#endif /* !defined(VARIANT_DARWIN_EXTSN) */
+
+
+/*
+ * User-space emulation of pselect() syscall for B&I
+ * TODO: remove when B&I move to xnu with native pselect()
+ */
+extern int __pthread_sigmask(int, const sigset_t *, sigset_t *);
+static int
+_pselect_emulated(int count, fd_set * __restrict rfds, fd_set * __restrict wfds,
+               fd_set * __restrict efds, const struct timespec * __restrict timo,
+               const sigset_t * __restrict mask)
+{
+       sigset_t omask;
+       struct timeval tvtimo, *tvp;
+       int rv, sverrno;
+
+       if (timo) {
+               tvtimo.tv_sec = timo->tv_sec;
+               tvtimo.tv_usec = (__darwin_suseconds_t)(timo->tv_nsec / 1000);
+               tvp = &tvtimo;
+       } else {
+               tvp = 0;
+       }
+
+       if (mask != 0) {
+               rv = __pthread_sigmask(SIG_SETMASK, mask, &omask);
+               if (rv != 0)
+                       return rv;
+       }
+
+       rv = select(count, rfds, wfds, efds, tvp);
+       if (mask != 0) {
+               sverrno = errno;
+               __pthread_sigmask(SIG_SETMASK, &omask, (sigset_t *)0);
+               errno = sverrno;
+       }
+
+       return rv;
+}
+
+/*
+ * pselect() implementation for all variants. Unlike select(), we implement the
+ * darwin extension variants here to catch cases where xnu doesn't implement
+ * pselect and we need to emulate.
+ */
+int
+pselect(int nfds, fd_set * __restrict readfds, fd_set * __restrict writefds,
+       fd_set * __restrict exceptfds, const struct timespec * __restrict
+#if defined(VARIANT_LEGACY) || defined(VARIANT_PRE1050)
+       intimeout,
+#else /* !VARIANT_LEGACY && !VARIANT_PRE1050 */
+       timeout,
+#endif /* VARIANT_LEGACY || VARIANT_PRE1050 */
+       const sigset_t * __restrict sigmask)
+{
+       int ret;
+#if defined(VARIANT_LEGACY) || defined(VARIANT_PRE1050)
+       struct timespec tb;
+       const struct timespec *timeout;
+
+       /*
+        * Legacy select behavior is minimum 10 msec when tv_usec is non-zero
+        */
+       if (intimeout && intimeout->tv_sec == 0 && intimeout->tv_nsec > 0 && intimeout->tv_nsec < 10000000L) {
+               tb.tv_sec = 0;
+               tb.tv_nsec = 10000000L;
+               timeout = &tb;
+       } else {
+               timeout = intimeout;
+       }
+#elif defined(VARIANT_DARWIN_EXTSN)
+#else
+       /* 1050 variant */
+       if (nfds > FD_SETSIZE) {
+               errno = EINVAL;
+               return -1;
+       }
+#endif
+
+#if defined(VARIANT_CANCELABLE) || defined(VARIANT_PRE1050)
+       ret = __pselect(nfds, readfds, writefds, exceptfds, timeout, sigmask);
+#else /* !VARIANT_CANCELABLE && !VARIANT_PRE1050 */
+       ret = __pselect_nocancel(nfds, readfds, writefds, exceptfds, timeout, sigmask);
+#endif /* VARIANT_CANCELABLE || VARIANT_PRE1050 */
+
+       if (ret == -1 && errno == ENOSYS) {
+               ret = _pselect_emulated(nfds, readfds, writefds, exceptfds, timeout, sigmask);
+       }
+
+       return ret;
+}
index 3ae3c7717abca3f2fa6e488a9647031e6c772157..cab6587d6f059317ae0a244c941ba5693ba67e93 100644 (file)
@@ -36,6 +36,8 @@
 #define        wsize   sizeof(u_int)
 #define        wmask   (wsize - 1)
 
+// n.b. this must be compiled with -fno-builtin or it might get optimized into
+// a recursive call to bzero.
 __attribute__((visibility("hidden")))
 void
 bzero(void *dst0, size_t length)
diff --git a/libsyscall/wrappers/thread_register_state.c b/libsyscall/wrappers/thread_register_state.c
new file mode 100644 (file)
index 0000000..2fa4783
--- /dev/null
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include <mach/mach.h>
+
+kern_return_t
+thread_get_register_pointer_values(thread_t thread, uintptr_t *sp, size_t *length, uintptr_t *values)
+{
+    if (!length) return KERN_INVALID_ARGUMENT;
+    if (*length > 0 && values == NULL) return KERN_INVALID_ARGUMENT;
+
+    size_t in_length = *length;
+    size_t out_length = 0;
+
+#if defined(__i386__)
+    i386_thread_state_t state = {};
+    thread_state_flavor_t flavor = x86_THREAD_STATE32;
+    mach_msg_type_number_t count = i386_THREAD_STATE_COUNT;
+#elif defined(__x86_64__)
+    x86_thread_state64_t state = {};
+    thread_state_flavor_t flavor = x86_THREAD_STATE64;
+    mach_msg_type_number_t count = x86_THREAD_STATE64_COUNT;
+#else
+#error thread_get_register_pointer_values not defined for this architecture
+#endif
+
+    kern_return_t ret = thread_get_state(thread, flavor, (thread_state_t)&state, &count);
+    if (ret != KERN_SUCCESS){
+        return ret;
+    }
+
+    // If the provided pointer value is > PAGE_SIZE, add it to the output array
+    // if there's available space.  (Values between 0 and PAGE_SIZE are the NULL page
+    // and not valid pointers.)
+#define push_register_value(p) do { \
+    if ((uintptr_t)p > PAGE_SIZE) { \
+        if (out_length < in_length && values) \
+            values[out_length] = p; \
+        out_length++; \
+    } } while (0)
+
+#if defined(__i386__)
+    if (sp) *sp = state.__esp;
+
+    push_register_value(state.__eip);
+
+    push_register_value(state.__eax);
+    push_register_value(state.__ebx);
+    push_register_value(state.__ecx);
+    push_register_value(state.__edx);
+    push_register_value(state.__edi);
+    push_register_value(state.__esi);
+    push_register_value(state.__ebp);
+#elif defined(__x86_64__)
+    if (sp) *sp = state.__rsp - 128 /* redzone */;
+
+    push_register_value(state.__rip);
+
+    push_register_value(state.__rax);
+    push_register_value(state.__rbx);
+    push_register_value(state.__rcx);
+    push_register_value(state.__rdx);
+    push_register_value(state.__rdi);
+    push_register_value(state.__rbp);
+    push_register_value(state.__r8);
+    push_register_value(state.__r9);
+    push_register_value(state.__r10);
+    push_register_value(state.__r11);
+    push_register_value(state.__r12);
+    push_register_value(state.__r13);
+    push_register_value(state.__r14);
+    push_register_value(state.__r15);
+#else
+#error thread_get_register_pointer_values not defined for this architecture
+#endif
+
+    *length = out_length;
+
+    if (in_length == 0 || out_length > in_length){
+        return KERN_INSUFFICIENT_BUFFER_SIZE;
+    }
+
+       return KERN_SUCCESS;
+}
index 9364707d9de3e7bb1d3eac5b5d2649cf148bbe80..f90bab981a4f4381587f6d75907c45cc795d93c4 100755 (executable)
@@ -77,7 +77,7 @@ MIGS_PRIVATE=""
 
 MIGS_DUAL_PUBLIC_PRIVATE=""
 
-if [[ "$PLATFORM_NAME" = "iphoneos" || "$PLATFORM_NAME" = "iphonesimulator"  || "$PLATFORM_NAME" = "iphoneosnano" || "$PLATFORM_NAME" = "iphonenanosimulator" || "$PLATFORM_NAME" = "tvos" || "$PLATFOM_NAME" = "tvsimulator" || "$PLATFOM_NAME" = "appletvos" || "$PLATFOM_NAME" = "appletvsimulator" || "$PLATFOM_NAME" = "watchos" || "$PLATFOM_NAME" = "watchsimulator" ]]
+if ( echo {iphone,tv,appletv,watch}{os,simulator} iphone{osnano,nanosimulator} | grep -wFq "$PLATFORM_NAME" )
 then
        MIGS_PRIVATE="mach_vm.defs"
 else
@@ -101,9 +101,10 @@ MACH_HDRS="mach.h
        port_obj.h
        sync.h
        vm_task.h
-       vm_page_size.h"
+       vm_page_size.h
+       thread_state.h"
 
-MIG_FILTERS="watchos_prohibited_mig.txt"
+MIG_FILTERS="watchos_prohibited_mig.txt tvos_prohibited_mig.txt"
 
 # install /usr/include/server headers 
 mkdir -p $SERVER_HEADER_DST
index a697ebe0f0818f7ff5fecf287cd79640331b0a2b..d4a138af8f7c70724c3fde598fd237a6a411fac3 100644 (file)
@@ -270,7 +270,9 @@ machine_startup(void)
 
        if (PE_parse_boot_argn("debug", &debug_boot_arg, sizeof (debug_boot_arg))) {
                panicDebugging = TRUE;
+#if DEVELOPMENT || DEBUG
                if (debug_boot_arg & DB_HALT) halt_in_debugger=1;
+#endif
                if (debug_boot_arg & DB_PRT) disable_debug_output=FALSE; 
                if (debug_boot_arg & DB_SLOG) systemLogDiags=TRUE; 
                if (debug_boot_arg & DB_LOG_PI_SCRN) logPanicDataToScreen=TRUE;
index 844b547506852547f395d359844187647e76340b..dc528cce12c9ad96a4231e83efc03b9b859f6f58 100644 (file)
@@ -415,7 +415,6 @@ i386_init(void)
        unsigned int    cpus = 0;
        boolean_t       fidn;
        boolean_t       IA32e = TRUE;
-       char            namep[16];
 
        postcode(I386_INIT_ENTRY);
 
@@ -443,9 +442,6 @@ i386_init(void)
        kernel_debug_string_simple("PE_init_kprintf");
        PE_init_kprintf(FALSE);
 
-       if(PE_parse_boot_argn("-show_pointers", &namep, sizeof (namep)))
-               doprnt_hide_pointers = FALSE;
-
        kernel_debug_string_simple("kernel_early_bootstrap");
        kernel_early_bootstrap();
 
index ae4a98a94f511f4e7e72adf3aa2d07b4784704f0..8d13b3645ba903750f3ff96a6c2419156feb7460 100644 (file)
@@ -1173,7 +1173,8 @@ mp_cpus_call_action(void)
                        mp_call_head_unlock(cqp, intrs_enabled);
                        KERNEL_DEBUG_CONSTANT(
                                TRACE_MP_CPUS_CALL_ACTION,
-                               call.func, call.arg0, call.arg1, call.maskp, 0);
+                               VM_KERNEL_UNSLIDE(call.func), VM_KERNEL_UNSLIDE_OR_PERM(call.arg0),
+                               VM_KERNEL_UNSLIDE_OR_PERM(call.arg1), VM_KERNEL_ADDRPERM(call.maskp), 0);
                        call.func(call.arg0, call.arg1);
                        (void) mp_call_head_lock(cqp);
                }
@@ -1265,7 +1266,7 @@ mp_cpus_call1(
 
        KERNEL_DEBUG_CONSTANT(
                TRACE_MP_CPUS_CALL | DBG_FUNC_START,
-               cpus, mode, VM_KERNEL_UNSLIDE(action_func), arg0, arg1);
+               cpus, mode, VM_KERNEL_UNSLIDE(action_func), VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1));
 
        if (!smp_initialized) {
                if ((cpus & CPUMASK_SELF) == 0)
@@ -1309,7 +1310,7 @@ mp_cpus_call1(
                                KERNEL_DEBUG_CONSTANT(
                                        TRACE_MP_CPUS_CALL_LOCAL,
                                        VM_KERNEL_UNSLIDE(action_func),
-                                       arg0, arg1, 0, 0);
+                                       VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1), 0, 0);
                                action_func(arg0, arg1);
                        }
                } else {
@@ -1375,7 +1376,7 @@ mp_cpus_call1(
        if (mode != SYNC && call_self ) {
                KERNEL_DEBUG_CONSTANT(
                        TRACE_MP_CPUS_CALL_LOCAL,
-                       VM_KERNEL_UNSLIDE(action_func), arg0, arg1, 0, 0);
+                       VM_KERNEL_UNSLIDE(action_func), VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1), 0, 0);
                if (action_func != NULL) {
                        ml_set_interrupts_enabled(FALSE);
                        action_func(arg0, arg1);
index 9841a0754f64cb741ad22f023d5f554a1b53c4e2..c6352893ad9e7aa33354290850b26fb30e7558e0 100644 (file)
@@ -1942,7 +1942,8 @@ pmap_change_wiring(
        PMAP_LOCK(map);
 
        if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL)
-               panic("pmap_change_wiring: pte missing");
+               panic("pmap_change_wiring(%p,0x%llx,%d): pte missing",
+                     map, vaddr, wired);
 
        if (wired && !iswired(*pte)) {
                /*
@@ -2020,26 +2021,26 @@ pmap_map_bd(
        return(virt);
 }
 
-unsigned int
+mach_vm_size_t
 pmap_query_resident(
        pmap_t          pmap,
        addr64_t        s64,
        addr64_t        e64,
-       unsigned int    *compressed_count_p)
+       mach_vm_size_t  *compressed_bytes_p)
 {
        pt_entry_t     *pde;
        pt_entry_t     *spte, *epte;
        addr64_t        l64;
        uint64_t        deadline;
-       unsigned int    result;
+       mach_vm_size_t  resident_bytes;
+       mach_vm_size_t  compressed_bytes;
        boolean_t       is_ept;
-       unsigned int    compressed_count;
 
        pmap_intr_assert();
 
        if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64) {
-               if (compressed_count_p) {
-                       *compressed_count_p = 0;
+               if (compressed_bytes_p) {
+                       *compressed_bytes_p = 0;
                }
                return 0;
        }
@@ -2051,8 +2052,8 @@ pmap_query_resident(
                   (uint32_t) (s64 >> 32), s64,
                   (uint32_t) (e64 >> 32), e64);
 
-       result = 0;
-       compressed_count = 0;
+       resident_bytes = 0;
+       compressed_bytes = 0;
 
        PMAP_LOCK(pmap);
 
@@ -2075,9 +2076,9 @@ pmap_query_resident(
 
                                for (; spte < epte; spte++) {
                                        if (pte_to_pa(*spte) != 0) {
-                                               result++;
+                                               resident_bytes += PAGE_SIZE;
                                        } else if (*spte & PTE_COMPRESSED) {
-                                               compressed_count++;
+                                               compressed_bytes += PAGE_SIZE;
                                        }
                                }
 
@@ -2097,10 +2098,10 @@ pmap_query_resident(
        PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
                   pmap, 0, 0, 0, 0);
 
-       if (compressed_count_p) {
-               *compressed_count_p = compressed_count;
+       if (compressed_bytes_p) {
+               *compressed_bytes_p = compressed_bytes;
        }
-       return result;
+       return resident_bytes;
 }
 
 #if MACH_ASSERT
index b9fcb252beadec778f57d5be745dec3e8e37f3b9..dbf819f6c1fe807890cb6d513cd73cbeeeaade10 100644 (file)
@@ -603,9 +603,7 @@ __END_DECLS
 
 #define MSR_IA32_PP0_ENERGY_STATUS             0x639
 #define MSR_IA32_PP1_ENERGY_STATUS             0x641
-#if !defined(XNU_HIDE_SKYLAKE)
 #define MSR_IA32_IA_PERF_LIMIT_REASONS_SKL     0x64F
-#endif
 
 #define MSR_IA32_IA_PERF_LIMIT_REASONS         0x690
 #define MSR_IA32_GT_PERF_LIMIT_REASONS         0x6B0
index e234c3a18361b3822dd268ff19985f9fbc6613ca..bd48b1da7e51f5c54367cd5101ea72dbb206e5dc 100644 (file)
@@ -653,7 +653,7 @@ ipc_importance_task_propagate_helper(
                assert(IP_VALID(port));
                ip_lock(port);
                temp_task_imp = IIT_NULL;
-               if (!ipc_port_importance_delta_internal(port, &delta, &temp_task_imp)) {
+               if (!ipc_port_importance_delta_internal(port, IPID_OPTION_NORMAL, &delta, &temp_task_imp)) {
                        ip_unlock(port);
                }
 
@@ -2045,6 +2045,276 @@ ipc_importance_disconnect_task(task_t task)
        task_deallocate(task);
 }
 
+/*
+ *     Routine:        ipc_importance_check_circularity
+ *     Purpose:
+ *             Check if queueing "port" in a message for "dest"
+ *             would create a circular group of ports and messages.
+ *
+ *             If no circularity (FALSE returned), then "port"
+ *             is changed from "in limbo" to "in transit".
+ *
+ *             That is, we want to set port->ip_destination == dest,
+ *             but guaranteeing that this doesn't create a circle
+ *             port->ip_destination->ip_destination->... == port
+ *
+ *             Additionally, if port was successfully changed to "in transit",
+ *             propagate boost assertions from the "in limbo" port to all
+ *             the ports in the chain, and, if the destination task accepts
+ *             boosts, to the destination task.
+ *
+ *     Conditions:
+ *             No ports locked.  References held for "port" and "dest".
+ */
+
+boolean_t
+ipc_importance_check_circularity(
+       ipc_port_t      port,
+       ipc_port_t      dest)
+{
+       ipc_importance_task_t imp_task = IIT_NULL;
+       ipc_importance_task_t release_imp_task = IIT_NULL;
+       boolean_t imp_lock_held = FALSE;
+       int assertcnt = 0;
+       ipc_port_t base;
+
+       assert(port != IP_NULL);
+       assert(dest != IP_NULL);
+
+       if (port == dest)
+               return TRUE;
+       base = dest;
+
+       /* port is in limbo, so donation status is safe to latch */
+       if (port->ip_impdonation != 0) {
+               imp_lock_held = TRUE;
+               ipc_importance_lock();
+       }
+
+       /*
+        *      First try a quick check that can run in parallel.
+        *      No circularity if dest is not in transit.
+        */
+       ip_lock(port);
+
+       /* 
+        * Even if port is just carrying assertions for others,
+        * we need the importance lock.
+        */
+       if (port->ip_impcount > 0 && !imp_lock_held) {
+               if (!ipc_importance_lock_try()) {
+                       ip_unlock(port);
+                       ipc_importance_lock();
+                       ip_lock(port);
+               }
+               imp_lock_held = TRUE;
+       }
+
+       if (ip_lock_try(dest)) {
+               if (!ip_active(dest) ||
+                   (dest->ip_receiver_name != MACH_PORT_NULL) ||
+                   (dest->ip_destination == IP_NULL))
+                       goto not_circular;
+
+               /* dest is in transit; further checking necessary */
+
+               ip_unlock(dest);
+       }
+       ip_unlock(port);
+
+       /* 
+        * We're about to pay the cost to serialize,
+        * just go ahead and grab importance lock.
+        */
+       if (!imp_lock_held) {
+               ipc_importance_lock();
+               imp_lock_held = TRUE;
+       }
+
+       ipc_port_multiple_lock(); /* massive serialization */
+
+       /*
+        *      Search for the end of the chain (a port not in transit),
+        *      acquiring locks along the way.
+        */
+
+       for (;;) {
+               ip_lock(base);
+
+               if (!ip_active(base) ||
+                   (base->ip_receiver_name != MACH_PORT_NULL) ||
+                   (base->ip_destination == IP_NULL))
+                       break;
+
+               base = base->ip_destination;
+       }
+
+       /* all ports in chain from dest to base, inclusive, are locked */
+
+       if (port == base) {
+               /* circularity detected! */
+
+               ipc_port_multiple_unlock();
+
+               /* port (== base) is in limbo */
+
+               assert(ip_active(port));
+               assert(port->ip_receiver_name == MACH_PORT_NULL);
+               assert(port->ip_destination == IP_NULL);
+
+               while (dest != IP_NULL) {
+                       ipc_port_t next;
+
+                       /* dest is in transit or in limbo */
+
+                       assert(ip_active(dest));
+                       assert(dest->ip_receiver_name == MACH_PORT_NULL);
+
+                       next = dest->ip_destination;
+                       ip_unlock(dest);
+                       dest = next;
+               }
+
+               if (imp_lock_held)
+                       ipc_importance_unlock();
+
+               return TRUE;
+       }
+
+       /*
+        *      The guarantee:  lock port while the entire chain is locked.
+        *      Once port is locked, we can take a reference to dest,
+        *      add port to the chain, and unlock everything.
+        */
+
+       ip_lock(port);
+       ipc_port_multiple_unlock();
+
+    not_circular:
+
+       /* port is in limbo */
+
+       assert(ip_active(port));
+       assert(port->ip_receiver_name == MACH_PORT_NULL);
+       assert(port->ip_destination == IP_NULL);
+
+       ip_reference(dest);
+       port->ip_destination = dest;
+
+       /* must have been in limbo or still bound to a task */
+       assert(port->ip_tempowner != 0);
+
+       /*
+        * We delayed dropping assertions from a specific task.
+        * Cache that info now (we'll drop assertions and the
+        * task reference below).
+        */
+       release_imp_task = port->ip_imp_task;
+       if (IIT_NULL != release_imp_task) {
+               port->ip_imp_task = IIT_NULL;
+       }
+       assertcnt = port->ip_impcount;
+
+       /* take the port out of limbo w.r.t. assertions */
+       port->ip_tempowner = 0;
+
+       /* now unlock chain */
+
+       ip_unlock(port);
+
+       for (;;) {
+
+               /* every port along chain track assertions behind it */
+               ipc_port_impcount_delta(dest, assertcnt, base);
+
+               if (dest == base)
+                       break;
+
+               /* port is in transit */
+
+               assert(ip_active(dest));
+               assert(dest->ip_receiver_name == MACH_PORT_NULL);
+               assert(dest->ip_destination != IP_NULL);
+               assert(dest->ip_tempowner == 0);
+
+               port = dest->ip_destination;
+               ip_unlock(dest);
+               dest = port;
+       }
+
+       /* base is not in transit */
+       assert(!ip_active(base) ||
+              (base->ip_receiver_name != MACH_PORT_NULL) ||
+              (base->ip_destination == IP_NULL));
+
+       /*
+        * Find the task to boost (if any).
+        * We will boost "through" ports that don't know
+        * about inheritance to deliver receive rights that
+        * do.
+        */
+       if (ip_active(base) && (assertcnt > 0)) {
+               assert(imp_lock_held);
+               if (base->ip_tempowner != 0) {
+                       if (IIT_NULL != base->ip_imp_task) {
+                               /* specified tempowner task */
+                               imp_task = base->ip_imp_task;
+                               assert(ipc_importance_task_is_any_receiver_type(imp_task));
+                       }
+                       /* otherwise don't boost current task */
+
+               } else if (base->ip_receiver_name != MACH_PORT_NULL) {
+                       ipc_space_t space = base->ip_receiver;
+
+                       /* only spaces with boost-accepting tasks */
+                       if (space->is_task != TASK_NULL &&
+                           ipc_importance_task_is_any_receiver_type(space->is_task->task_imp_base))
+                               imp_task = space->is_task->task_imp_base;
+               }
+
+               /* take reference before unlocking base */
+               if (imp_task != IIT_NULL) {
+                       ipc_importance_task_reference(imp_task);
+               }
+       }
+
+       ip_unlock(base);
+
+       /*
+        * Transfer assertions now that the ports are unlocked.
+        * Avoid extra overhead if transferring to/from the same task.
+        *
+        * NOTE: If a transfer is occurring, the new assertions will
+        * be added to imp_task BEFORE the importance lock is unlocked.
+        * This is critical - to avoid decrements coming from the kmsgs
+        * beating the increment to the task.
+        */
+       boolean_t transfer_assertions = (imp_task != release_imp_task);
+
+       if (imp_task != IIT_NULL) {
+               assert(imp_lock_held);
+               if (transfer_assertions)
+                       ipc_importance_task_hold_internal_assertion_locked(imp_task, assertcnt);
+       }
+
+       if (release_imp_task != IIT_NULL) {
+               assert(imp_lock_held);
+               if (transfer_assertions)
+                       ipc_importance_task_drop_internal_assertion_locked(release_imp_task, assertcnt);
+       }
+
+       if (imp_lock_held)
+               ipc_importance_unlock();
+
+       if (imp_task != IIT_NULL)
+               ipc_importance_task_release(imp_task);
+
+       if (release_imp_task != IIT_NULL)
+               ipc_importance_task_release(release_imp_task);
+
+       return FALSE;
+}
+
 /*
  *     Routine:        ipc_importance_send
  *     Purpose:
@@ -2067,7 +2337,6 @@ ipc_importance_send(
        ipc_importance_task_t task_imp;
        kern_return_t kr;
 
-
        assert(IP_VALID(port));
 
        /* If no donation to be made, return quickly */
@@ -2169,13 +2438,12 @@ ipc_importance_send(
        /*
         * If we need to relock the port, do it with the importance still locked.
         * This assures we get to add the importance boost through the port to
-        * the task BEFORE anyone else can attempt to undo that operation because
+        * the task BEFORE anyone else can attempt to undo that operation if
         * the sender lost donor status.
         */
        if (TRUE == port_lock_dropped) {
                ip_lock(port);
        }
-       ipc_importance_unlock();
 
  portupdate:
                                
@@ -2190,11 +2458,36 @@ ipc_importance_send(
        }
 #endif /* IMPORTANCE_DEBUG */
 
-       /* adjust port boost count (with port locked) */
-       if (TRUE == ipc_port_importance_delta(port, 1)) {
+       mach_port_delta_t delta = 1;
+       boolean_t need_port_lock;
+       task_imp = IIT_NULL;
+
+       /* adjust port boost count (with importance and port locked) */
+       need_port_lock = ipc_port_importance_delta_internal(port, IPID_OPTION_NORMAL, &delta, &task_imp);
+
+       /* if we need to adjust a task importance as a result, apply that here */
+       if (IIT_NULL != task_imp && delta != 0) {
+               assert(delta == 1);
+
+               /* if this results in a change of state, propagate the transistion */
+               if (ipc_importance_task_check_transition(task_imp, IIT_UPDATE_HOLD, delta)) {
+
+                       /* can't hold the port lock during task transition(s) */
+                       if (!need_port_lock) {
+                               need_port_lock = TRUE;
+                               ip_unlock(port);
+                       }
+                       ipc_importance_task_propagate_assertion_locked(task_imp, IIT_UPDATE_HOLD, TRUE);
+               }
+       }
+
+       ipc_importance_unlock();
+
+       if (need_port_lock) {
                port_lock_dropped = TRUE;
                ip_lock(port);
        }
+
        return port_lock_dropped;
 }
        
@@ -2449,7 +2742,12 @@ ipc_importance_inherit_from(ipc_kmsg_t kmsg)
                ipc_importance_unlock();
        }
 
-       /* decrement port boost count */ 
+       /*
+        * decrement port boost count
+        * This is OK to do without the importance lock as we atomically
+        * unlinked the kmsg and snapshot the donating state while holding
+        * the importance lock
+        */ 
        if (donating) {
                ip_lock(port);
                if (III_NULL != inherit) {
@@ -2458,14 +2756,14 @@ ipc_importance_inherit_from(ipc_kmsg_t kmsg)
                        ip_unlock(port);
                }  else {
                        /* drop importance from port and destination task */
-                       if (ipc_port_importance_delta(port, -1) == FALSE) {
+                       if (ipc_port_importance_delta(port, IPID_OPTION_NORMAL, -1) == FALSE) {
                                ip_unlock(port);
                        }
                }
        } else if (cleared_self_donation) {
                ip_lock(port);
                /* drop cleared donation from port and destination task */
-               if (ipc_port_importance_delta(port, -1) == FALSE) {
+               if (ipc_port_importance_delta(port, IPID_OPTION_NORMAL, -1) == FALSE) {
                        ip_unlock(port);
                }
        }
@@ -2588,7 +2886,6 @@ ipc_importance_receive(
                        ipc_importance_task_t task_imp = task_self->task_imp_base;
                        ipc_port_t port = kmsg->ikm_header->msgh_remote_port;
 
-                       /* defensive deduction for release builds lacking the assert */
                        ip_lock(port);
                        ipc_port_impcount_delta(port, -1, IP_NULL);
                        ip_unlock(port);
@@ -2688,7 +2985,7 @@ ipc_importance_clean(
                        ip_lock(port);
                        /* inactive ports already had their importance boosts dropped */
                        if (!ip_active(port) || 
-                           ipc_port_importance_delta(port, -1) == FALSE) {
+                           ipc_port_importance_delta(port, IPID_OPTION_NORMAL, -1) == FALSE) {
                                ip_unlock(port);
                        }
                }
index 3b009b42d67e6dae47b3bed54337133a0682a899..15ad62d66a78f2ff4c5004077b73ab0734e600b8 100644 (file)
@@ -224,6 +224,8 @@ extern kern_return_t ipc_importance_task_drop_file_lock_assertion(ipc_importance
 extern kern_return_t ipc_importance_task_hold_legacy_external_assertion(ipc_importance_task_t task_imp, uint32_t count);
 extern kern_return_t ipc_importance_task_drop_legacy_external_assertion(ipc_importance_task_t task_imp, uint32_t count);
 
+extern boolean_t ipc_importance_check_circularity(ipc_port_t port, ipc_port_t dest);
+
 /* prepare importance attributes for sending */
 extern boolean_t ipc_importance_send(
        ipc_kmsg_t              kmsg,
index 2db958d4db3cb81688c4c48b48ad62983611ef43..f5737416afa4c434aa8ea783fdbfff673de47e7f 100644 (file)
@@ -1375,6 +1375,7 @@ ipc_kmsg_send(
        ipc_port_t port;
        thread_t th = current_thread();
        mach_msg_return_t error = MACH_MSG_SUCCESS;
+       boolean_t kernel_reply = FALSE;
        spl_t s;
 
        /* Check if honor qlimit flag is set on thread. */
@@ -1445,6 +1446,7 @@ retry:
                assert(IP_VALID(port));
                ip_lock(port);
                /* fall thru with reply - same options */
+               kernel_reply = TRUE;
        }
 
 #if IMPORTANCE_INHERITANCE
@@ -1520,6 +1522,18 @@ retry:
                ipc_kmsg_destroy(kmsg);
                return MACH_MSG_SUCCESS;
        }
+
+       if (error != MACH_MSG_SUCCESS && kernel_reply) {
+               /*
+                * Kernel reply messages that fail can't be allowed to
+                * pseudo-receive on error conditions. We need to just treat
+                * the message as a successful delivery.
+                */
+               ip_release(port); /* JMM - Future: release right, not just ref */
+               kmsg->ikm_header->msgh_remote_port = MACH_PORT_NULL;
+               ipc_kmsg_destroy(kmsg);
+               return MACH_MSG_SUCCESS;
+       }
        return error;
 }
 
@@ -2017,7 +2031,7 @@ ipc_kmsg_copyin_header(
                ipc_port_t dport = (ipc_port_t)dest_port;
 
                /* dport still locked from above */
-               if (ipc_port_importance_delta(dport, 1) == FALSE) {
+               if (ipc_port_importance_delta(dport, IPID_OPTION_SENDPOSSIBLE, 1) == FALSE) {
                        ip_unlock(dport);
                }
        }
index e8fbd9449aaac567419d578c56ff0b762f080b8e..9a580b9e65de2a7e2ba1f8aa3780836a9816dd11 100644 (file)
@@ -205,7 +205,6 @@ ipc_port_request_alloc(
                                if (port->ip_impdonation != 0 &&
                                    port->ip_spimportant == 0 &&
                                    (task_is_importance_donor(current_task()))) {
-                                       port->ip_spimportant = 1;
                                        *importantp = TRUE;
                                }
 #endif /* IMPORTANCE_INHERTANCE */
@@ -383,7 +382,6 @@ ipc_port_request_sparm(
                            (port->ip_spimportant == 0) &&
                            (((option & MACH_SEND_IMPORTANCE) != 0) ||
                             (task_is_importance_donor(current_task())))) {
-                               port->ip_spimportant = 1;
                                return TRUE;
                        }
 #else
@@ -994,11 +992,6 @@ ipc_port_destroy(
  *             but guaranteeing that this doesn't create a circle
  *             port->ip_destination->ip_destination->... == port
  *
- *             Additionally, if port was successfully changed to "in transit",
- *             propagate boost assertions from the "in limbo" port to all
- *             the ports in the chain, and, if the destination task accepts
- *             boosts, to the destination task.
- *
  *     Conditions:
  *             No ports locked.  References held for "port" and "dest".
  */
@@ -1008,13 +1001,11 @@ ipc_port_check_circularity(
        ipc_port_t      port,
        ipc_port_t      dest)
 {
-       ipc_port_t base;
-
 #if IMPORTANCE_INHERITANCE
-       ipc_importance_task_t imp_task = IIT_NULL;
-       ipc_importance_task_t release_imp_task = IIT_NULL;
-       int assertcnt = 0;
-#endif /* IMPORTANCE_INHERITANCE */
+       /* adjust importance counts at the same time */
+       return ipc_importance_check_circularity(port, dest);
+#else
+       ipc_port_t base;
 
        assert(port != IP_NULL);
        assert(dest != IP_NULL);
@@ -1027,7 +1018,6 @@ ipc_port_check_circularity(
         *      First try a quick check that can run in parallel.
         *      No circularity if dest is not in transit.
         */
-
        ip_lock(port);
        if (ip_lock_try(dest)) {
                if (!ip_active(dest) ||
@@ -1108,37 +1098,11 @@ ipc_port_check_circularity(
        ip_reference(dest);
        port->ip_destination = dest;
 
-#if IMPORTANCE_INHERITANCE
-       /* must have been in limbo or still bound to a task */
-       assert(port->ip_tempowner != 0);
-
-       /*
-        * We delayed dropping assertions from a specific task.
-        * Cache that info now (we'll drop assertions and the
-        * task reference below).
-        */
-       release_imp_task = port->ip_imp_task;
-       if (IIT_NULL != release_imp_task) {
-               port->ip_imp_task = IIT_NULL;
-       }
-       assertcnt = port->ip_impcount;
-
-       /* take the port out of limbo w.r.t. assertions */
-       port->ip_tempowner = 0;
-
-#endif /* IMPORTANCE_INHERITANCE */
-
        /* now unlock chain */
 
        ip_unlock(port);
 
        for (;;) {
-
-#if IMPORTANCE_INHERITANCE
-               /* every port along chain track assertions behind it */
-               dest->ip_impcount += assertcnt;
-#endif /* IMPORTANCE_INHERITANCE */
-
                if (dest == base)
                        break;
 
@@ -1148,10 +1112,6 @@ ipc_port_check_circularity(
                assert(dest->ip_receiver_name == MACH_PORT_NULL);
                assert(dest->ip_destination != IP_NULL);
 
-#if IMPORTANCE_INHERITANCE
-               assert(dest->ip_tempowner == 0);
-#endif /* IMPORTANCE_INHERITANCE */
-
                port = dest->ip_destination;
                ip_unlock(dest);
                dest = port;
@@ -1162,63 +1122,10 @@ ipc_port_check_circularity(
               (base->ip_receiver_name != MACH_PORT_NULL) ||
               (base->ip_destination == IP_NULL));
 
-#if IMPORTANCE_INHERITANCE
-       /*
-        * Find the task to boost (if any).
-        * We will boost "through" ports that don't know
-        * about inheritance to deliver receive rights that
-        * do.
-        */
-       if (ip_active(base) && (assertcnt > 0)) {
-               if (base->ip_tempowner != 0) {
-                       if (IIT_NULL != base->ip_imp_task) {
-                               /* specified tempowner task */
-                               imp_task = base->ip_imp_task;
-                               assert(ipc_importance_task_is_any_receiver_type(imp_task));
-                       }
-                       /* otherwise don't boost current task */
-
-               } else if (base->ip_receiver_name != MACH_PORT_NULL) {
-                       ipc_space_t space = base->ip_receiver;
-
-                       /* only spaces with boost-accepting tasks */
-                       if (space->is_task != TASK_NULL &&
-                           ipc_importance_task_is_any_receiver_type(space->is_task->task_imp_base))
-                               imp_task = space->is_task->task_imp_base;
-               }
-
-               /* take reference before unlocking base */
-               if (imp_task != IIT_NULL) {
-                       ipc_importance_task_reference(imp_task);
-               }
-       }
-#endif /* IMPORTANCE_INHERITANCE */
-
        ip_unlock(base);
 
-#if IMPORTANCE_INHERITANCE
-       /*
-        * Transfer assertions now that the ports are unlocked.
-        * Avoid extra overhead if transferring to/from the same task.
-        */
-       boolean_t transfer_assertions = (imp_task != release_imp_task) ? TRUE : FALSE;
-
-       if (imp_task != IIT_NULL) {
-               if (transfer_assertions)
-                       ipc_importance_task_hold_internal_assertion(imp_task, assertcnt);
-               ipc_importance_task_release(imp_task);
-               imp_task = IIT_NULL;
-       }
-
-       if (release_imp_task != IIT_NULL) {
-               if (transfer_assertions)
-                       ipc_importance_task_drop_internal_assertion(release_imp_task, assertcnt);
-               ipc_importance_task_release(release_imp_task);
-               release_imp_task = IIT_NULL;
-       }
-#endif /* IMPORTANCE_INHERITANCE */
-
        return FALSE;
+#endif /* !IMPORTANCE_INHERITANCE */
 }
 
 /*
@@ -1255,14 +1162,12 @@ ipc_port_impcount_delta(
        }
 
        absdelta = 0 - delta;           
-       //assert(port->ip_impcount >= absdelta);
-       /* if we have enough to deduct, we're done */
        if (port->ip_impcount >= absdelta) {
                port->ip_impcount -= absdelta;
                return delta;
        }
 
-#if DEVELOPMENT || DEBUG
+#if (DEVELOPMENT || DEBUG)
        if (port->ip_receiver_name != MACH_PORT_NULL) {
                task_t target_task = port->ip_receiver->is_task;
                ipc_importance_task_t target_imp = target_task->task_imp_base;
@@ -1279,7 +1184,7 @@ ipc_port_impcount_delta(
                printf("Over-release of importance assertions for port 0x%x receiver pid %d (%s), "
                       "dropping %d assertion(s) but port only has %d remaining.\n",
                       port->ip_receiver_name, 
-                      target_imp->iit_bsd_pid, target_imp->iit_procname,
+                      target_pid, target_procname,
                       absdelta, port->ip_impcount);
 
        } else if (base != IP_NULL) {
@@ -1295,14 +1200,16 @@ ipc_port_impcount_delta(
                        target_procname = "unknown";
                        target_pid = -1;
                }
-               printf("Over-release of importance assertions for port %p "
+               printf("Over-release of importance assertions for port 0x%lx "
                       "enqueued on port 0x%x with receiver pid %d (%s), "
                       "dropping %d assertion(s) but port only has %d remaining.\n",
-                      port, base->ip_receiver_name, 
-                      target_imp->iit_bsd_pid, target_imp->iit_procname,
+                      (unsigned long)VM_KERNEL_UNSLIDE_OR_PERM((uintptr_t)port),
+                      base->ip_receiver_name,
+                      target_pid, target_procname,
                       absdelta, port->ip_impcount);
        }
 #endif
+
        delta = 0 - port->ip_impcount;
        port->ip_impcount = 0;
        return delta;
@@ -1318,6 +1225,7 @@ ipc_port_impcount_delta(
  *             and if so, apply the delta.
  *     Conditions:
  *             The port is referenced and locked on entry.
+ *             Importance may be locked.
  *             Nothing else is locked.
  *             The lock may be dropped on exit.
  *             Returns TRUE if lock was dropped.
@@ -1327,6 +1235,7 @@ ipc_port_impcount_delta(
 boolean_t
 ipc_port_importance_delta_internal(
        ipc_port_t              port,
+       natural_t               options,
        mach_port_delta_t       *deltap,
        ipc_importance_task_t   *imp_task)
 {
@@ -1338,6 +1247,8 @@ ipc_port_importance_delta_internal(
        if (*deltap == 0)
                return FALSE;
 
+       assert(options == IPID_OPTION_NORMAL || options == IPID_OPTION_SENDPOSSIBLE);
+
        base = port;
 
        /* if port is in transit, have to search for end of chain */
@@ -1361,21 +1272,27 @@ ipc_port_importance_delta_internal(
                ipc_port_multiple_unlock();
        }
 
-       /* unlock down to the base, adding a boost at each level */
+       /*
+        * If the port lock is dropped b/c the port is in transit, there is a
+        * race window where another thread can drain messages and/or fire a
+        * send possible notification before we get here.
+        *
+        * We solve this race by checking to see if our caller armed the send
+        * possible notification, whether or not it's been fired yet, and
+        * whether or not we've already set the port's ip_spimportant bit. If
+        * we don't need a send-possible boost, then we'll just apply a
+        * harmless 0-boost to the port.
+        */
+       if (options & IPID_OPTION_SENDPOSSIBLE) {
+               assert(*deltap == 1);
+               if (port->ip_sprequests && port->ip_spimportant == 0)
+                       port->ip_spimportant = 1;
+               else
+                       *deltap = 0;
+       }
+
+       /* unlock down to the base, adjusting boost(s) at each level */
        for (;;) {
-               /*
-                * JMM TODO - because of the port unlock to grab the multiple lock
-                * above, a subsequent drop of importance could race and beat
-                * the "previous" increase - causing the port impcount to go
-                * negative briefly.  The defensive deduction performed by
-                * ipc_port_impcount_delta() defeats that, and therefore can
-                * cause an importance leak once the increase finally arrives.
-                *
-                * Need to rework the importance delta logic to be more like
-                * ipc_importance_inherit_from() where it locks all it needs in
-                * one pass to avoid any lock drops - to keep that race from
-                * ever occuring.
-                */
                *deltap = ipc_port_impcount_delta(port, *deltap, base);
 
                if (port == base) {
@@ -1444,20 +1361,19 @@ ipc_port_importance_delta_internal(
 boolean_t
 ipc_port_importance_delta(
        ipc_port_t              port,
+       natural_t               options,
        mach_port_delta_t       delta)
 {
        ipc_importance_task_t imp_task = IIT_NULL;
        boolean_t dropped;
 
-       dropped = ipc_port_importance_delta_internal(port, &delta, &imp_task);
+       dropped = ipc_port_importance_delta_internal(port, options, &delta, &imp_task);
 
-       if (IIT_NULL == imp_task)
+       if (IIT_NULL == imp_task || delta == 0)
                return dropped;
 
-       if (!dropped) {
-               dropped = TRUE;
+       if (!dropped)
                ip_unlock(port);
-       }
 
        assert(ipc_importance_task_is_any_receiver_type(imp_task));
 
@@ -1467,7 +1383,7 @@ ipc_port_importance_delta(
                ipc_importance_task_drop_internal_assertion(imp_task, -delta);
 
        ipc_importance_task_release(imp_task);
-       return dropped;
+       return TRUE;
 }
 #endif /* IMPORTANCE_INHERITANCE */
 
index 48a2fc49dbf570eb24bdfd50ad5845871578b3f4..92bb0e70abac4e511a278ccc5eb7cc3180696735 100644 (file)
@@ -432,6 +432,12 @@ ipc_port_check_circularity(
        ipc_port_t      dest);
 
 #if IMPORTANCE_INHERITANCE
+
+enum {
+       IPID_OPTION_NORMAL       = 0, /* normal boost */
+       IPID_OPTION_SENDPOSSIBLE = 1, /* send-possible induced boost */
+};
+
 /* apply importance delta to port only */
 extern mach_port_delta_t
 ipc_port_impcount_delta(
@@ -443,13 +449,15 @@ ipc_port_impcount_delta(
 extern boolean_t
 ipc_port_importance_delta_internal(
        ipc_port_t              port,
-       mach_port_delta_t       *delta,
+       natural_t               options,
+       mach_port_delta_t       *deltap,
        ipc_importance_task_t   *imp_task);
 
 /* Apply an importance delta to a port and reflect change in receiver task */
 extern boolean_t
 ipc_port_importance_delta(
        ipc_port_t              port,
+       natural_t               options,
        mach_port_delta_t       delta);
 #endif /* IMPORTANCE_INHERITANCE */
 
index b8feb54dc2cc826e8512b6e4d3b5c9434227271c..bf655396e11548cbeee0192f44f4bc34e9de9c7f 100644 (file)
@@ -378,7 +378,7 @@ ipc_right_request_alloc(
 
 #if IMPORTANCE_INHERITANCE
                                if (needboost == TRUE) {
-                                       if (ipc_port_importance_delta(port, 1) == FALSE)
+                                       if (ipc_port_importance_delta(port, IPID_OPTION_SENDPOSSIBLE, 1) == FALSE)
                                                ip_unlock(port);
                                } else
 #endif /* IMPORTANCE_INHERITANCE */
index 43880d7f933e25921f14563476d7016fd89af96f..47e9bdf3a80bd651e04a7190eab7e64897bca9de 100644 (file)
@@ -1984,7 +1984,9 @@ mach_voucher_extract_attr_content(
         * manager referenced during the callout.
         */
        ivgt_lookup(key_index, FALSE, &manager, NULL);
-       assert(IVAM_NULL != manager);
+       if (IVAM_NULL == manager) {
+               return KERN_INVALID_ARGUMENT;
+       }
 
        /*
         * Get the value(s) to pass to the manager
@@ -2060,7 +2062,9 @@ mach_voucher_extract_attr_recipe(
         * manager referenced during the callout.
         */
        ivgt_lookup(key_index, FALSE, &manager, NULL);
-       assert(IVAM_NULL != manager);
+       if (IVAM_NULL == manager) {
+               return KERN_INVALID_ARGUMENT;
+       }
 
        /*
         * Get the value(s) to pass to the manager
@@ -2126,9 +2130,6 @@ mach_voucher_extract_all_attr_recipes(
                if (recipe_size - recipe_used < sizeof(*recipe))
                        return KERN_NO_SPACE;
 
-               recipe = (mach_voucher_attr_recipe_t)(void *)&recipes[recipe_used];
-               content_size = recipe_size - recipe_used - sizeof(*recipe);
-               
                /*
                 * Get the manager for this key_index.  The
                 * existence of a non-default value for this
@@ -2137,6 +2138,12 @@ mach_voucher_extract_all_attr_recipes(
                 */
                ivgt_lookup(key_index, FALSE, &manager, NULL);
                assert(IVAM_NULL != manager);
+               if (IVAM_NULL == manager) {
+                       continue;
+               }
+
+               recipe = (mach_voucher_attr_recipe_t)(void *)&recipes[recipe_used];
+               content_size = recipe_size - recipe_used - sizeof(*recipe);
 
                /*
                 * Get the value(s) to pass to the manager
@@ -2266,7 +2273,9 @@ mach_voucher_attr_command(
         * execution.
         */
        ivgt_lookup(key_index, TRUE, &manager, &control);
-       assert(IVAM_NULL != manager);
+       if (IVAM_NULL == manager) {
+               return KERN_INVALID_ARGUMENT;
+       }
 
        /*
         * Get the values for this <voucher, key> pair
index a5cf30c939af6506fb6b5be5cec707330c0c83bb..a21666275328fdc78566db574d36d7b127e66ec7 100644 (file)
@@ -51,6 +51,7 @@
  */
 int coalitions_get_list(int type, struct procinfo_coalinfo *coal_list, int list_sz);
 boolean_t coalition_is_leader(task_t task, int coal_type, coalition_t *coal);
+task_t coalition_get_leader(coalition_t coal);
 int coalition_get_task_count(coalition_t coal);
 uint64_t coalition_get_page_count(coalition_t coal, int *ntasks);
 int coalition_get_pid_list(coalition_t coal, uint32_t rolemask, int sort_order,
@@ -168,6 +169,10 @@ struct i_resource_coalition {
        uint64_t bytesread;
        uint64_t byteswritten;
        uint64_t gpu_time;
+       uint64_t logical_immediate_writes;
+       uint64_t logical_deferred_writes;
+       uint64_t logical_invalidated_writes;
+       uint64_t logical_metadata_writes;
 
        uint64_t task_count;      /* tasks that have started in this coalition */
        uint64_t dead_task_count; /* tasks that have exited in this coalition;
@@ -385,6 +390,10 @@ i_coal_resource_remove_task(coalition_t coal, task_t task)
        cr->bytesread += task->task_io_stats->disk_reads.size;
        cr->byteswritten += task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size;
        cr->gpu_time += task_gpu_utilisation(task);
+       cr->logical_immediate_writes += task->task_immediate_writes;    
+       cr->logical_deferred_writes += task->task_deferred_writes;
+       cr->logical_invalidated_writes += task->task_invalidated_writes;
+       cr->logical_metadata_writes += task->task_metadata_writes;
 
        /* remove the task from the coalition's list */
        remqueue(&task->task_coalition[COALITION_TYPE_RESOURCE]);
@@ -451,6 +460,10 @@ coalition_resource_usage_internal(coalition_t coal, struct coalition_resource_us
        uint64_t bytesread = coal->r.bytesread;
        uint64_t byteswritten = coal->r.byteswritten;
        uint64_t gpu_time = coal->r.gpu_time;
+       uint64_t logical_immediate_writes = coal->r.logical_immediate_writes;
+       uint64_t logical_deferred_writes = coal->r.logical_deferred_writes;
+       uint64_t logical_invalidated_writes = coal->r.logical_invalidated_writes;
+       uint64_t logical_metadata_writes = coal->r.logical_metadata_writes;
        int64_t cpu_time_billed_to_me = 0;
        int64_t cpu_time_billed_to_others = 0;
 
@@ -482,6 +495,10 @@ coalition_resource_usage_internal(coalition_t coal, struct coalition_resource_us
                bytesread += task->task_io_stats->disk_reads.size;
                byteswritten += task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size;
                gpu_time += task_gpu_utilisation(task);
+               logical_immediate_writes += task->task_immediate_writes;
+               logical_deferred_writes += task->task_deferred_writes;
+               logical_invalidated_writes += task->task_invalidated_writes;
+               logical_metadata_writes += task->task_metadata_writes;
                cpu_time_billed_to_me += (int64_t)bank_billed_time(task->bank_context);
                cpu_time_billed_to_others += (int64_t)bank_serviced_time(task->bank_context);
        }
@@ -522,6 +539,10 @@ coalition_resource_usage_internal(coalition_t coal, struct coalition_resource_us
        cru_out->bytesread = bytesread;
        cru_out->byteswritten = byteswritten;
        cru_out->gpu_time = gpu_time;
+       cru_out->logical_immediate_writes = logical_immediate_writes;
+       cru_out->logical_deferred_writes = logical_deferred_writes;
+       cru_out->logical_invalidated_writes = logical_invalidated_writes;
+       cru_out->logical_metadata_writes = logical_metadata_writes;
 
        ledger_dereference(sum_ledger);
        sum_ledger = LEDGER_NULL;
@@ -1571,6 +1592,27 @@ boolean_t coalition_is_leader(task_t task, int coal_type, coalition_t *coal)
 }
 
 
+task_t coalition_get_leader(coalition_t coal)
+{
+       task_t leader = TASK_NULL;
+
+       if (!coal)
+               return TASK_NULL;
+
+       coalition_lock(coal);
+       if (coal->type != COALITION_TYPE_JETSAM)
+               goto out_unlock;
+
+       leader = coal->j.leader;
+       if (leader != TASK_NULL)
+               task_reference(leader);
+
+out_unlock:
+       coalition_unlock(coal);
+       return leader;
+}
+
+
 int coalition_get_task_count(coalition_t coal)
 {
        int ntasks = 0;
index 2c79aacdf7eeb9f012676b4b9829a6e15fdfbaf1..cd9b5bb2351216aa34952d64461bafd884bdfaf2 100644 (file)
@@ -317,7 +317,8 @@ restart:
                        nestedpanic +=1;
                        PANIC_UNLOCK();
                        Debugger("double panic");
-                       printf("double panic:  We are hanging here...\n");
+                       // a printf statement here was removed to avoid a panic-loop caused
+                       // by a panic from printf
                        panic_stop();
                        /* NOTREACHED */
                }
@@ -341,12 +342,16 @@ panic_epilogue(spl_t      s)
        panicstr = (char *)0;
        PANIC_UNLOCK();
 
+#if DEVELOPMENT || DEBUG
        if (return_on_panic) {
                panic_normal();
                enable_preemption();
                splx(s);
                return;
        }
+#else
+       (void)s;
+#endif
        kdb_printf("panic: We are hanging here...\n");
        panic_stop();
        /* NOTREACHED */
index 1789ae5f6bf16346d03b2212d339f71e71fca18f..7aa0466dbc615016bcde299a4d5386260450672f 100644 (file)
@@ -266,6 +266,7 @@ ipc_kobject_server(
        ipc_kmsg_t reply;
        kern_return_t kr;
        ipc_port_t *destp;
+       ipc_port_t  replyp = IPC_PORT_NULL;
        mach_msg_format_0_trailer_t *trailer;
        register mig_hash_t *ptr;
 
@@ -341,10 +342,10 @@ ipc_kobject_server(
            }
            else {
                if (!ipc_kobject_notify(request->ikm_header, reply->ikm_header)){
-#if    MACH_IPC_TEST
+#if DEVELOPMENT || DEBUG
                    printf("ipc_kobject_server: bogus kernel message, id=%d\n",
                        request->ikm_header->msgh_id);
-#endif /* MACH_IPC_TEST */
+#endif /* DEVELOPMENT || DEBUG */
                    _MIG_MSGID_INVALID(request->ikm_header->msgh_id);
 
                    ((mig_reply_error_t *) reply->ikm_header)->RetCode
@@ -419,6 +420,8 @@ ipc_kobject_server(
                ipc_kmsg_destroy(request);
        }
 
+       replyp = (ipc_port_t)reply->ikm_header->msgh_remote_port;
+
        if (kr == MIG_NO_REPLY) {
                /*
                 *      The server function will send a reply message
@@ -428,7 +431,7 @@ ipc_kobject_server(
                ipc_kmsg_free(reply);
 
                return IKM_NULL;
-       } else if (!IP_VALID((ipc_port_t)reply->ikm_header->msgh_remote_port)) {
+       } else if (!IP_VALID(replyp)) {
                /*
                 *      Can't queue the reply message if the destination
                 *      (the reply port) isn't valid.
@@ -436,6 +439,17 @@ ipc_kobject_server(
 
                ipc_kmsg_destroy(reply);
 
+               return IKM_NULL;
+       } else if (replyp->ip_receiver == ipc_space_kernel) {
+               /*
+                * Don't send replies to kobject kernel ports
+                */
+#if DEVELOPMENT || DEBUG
+               printf("%s: refusing to send reply to kobject %d port (id:%d)\n",
+                      __func__, ip_kotype(replyp),
+                      request->ikm_header->msgh_id);
+#endif /* DEVELOPMENT || DEBUG */
+               ipc_kmsg_destroy(reply);
                return IKM_NULL;
        }
 
@@ -528,9 +542,22 @@ ipc_kobject_notify(
        mach_msg_header_t *request_header,
        mach_msg_header_t *reply_header)
 {
+       mach_msg_max_trailer_t * trailer;
        ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port;
 
        ((mig_reply_error_t *) reply_header)->RetCode = MIG_NO_REPLY;
+
+       trailer = (mach_msg_max_trailer_t *)
+                 ((vm_offset_t)request_header + request_header->msgh_size);
+       if (0 != bcmp(&trailer->msgh_audit, &KERNEL_AUDIT_TOKEN,
+                       sizeof(trailer->msgh_audit))) {
+               return FALSE;
+       }
+       if (0 != bcmp(&trailer->msgh_sender, &KERNEL_SECURITY_TOKEN,
+                       sizeof(trailer->msgh_sender))) {
+               return FALSE;
+       }
+
        switch (request_header->msgh_id) {
                case MACH_NOTIFY_NO_SENDERS:
                        switch (ip_kotype(port)) {
index c091eb115322bb82ea7d6acef38a0ab59d1888e3..26bdae8f297d0324da7d3d41f0e277f7d11ad07a 100644 (file)
@@ -412,6 +412,7 @@ kpc_get_config(uint32_t classes, kpc_config_t *current_config)
 int
 kpc_set_config(uint32_t classes, kpc_config_t *configv)
 {
+       int ret = 0;
        struct kpc_config_remote mp_config = {
                .classes = classes, .configv = configv,
                .pmc_mask = kpc_get_configurable_pmc_mask(classes)
@@ -437,11 +438,11 @@ kpc_set_config(uint32_t classes, kpc_config_t *configv)
        if (classes & KPC_CLASS_POWER_MASK)
                mp_config.classes |= KPC_CLASS_CONFIGURABLE_MASK;
 
-       kpc_set_config_arch( &mp_config );
+       ret = kpc_set_config_arch( &mp_config );
 
        lck_mtx_unlock(&kpc_config_lock);
 
-       return 0;
+       return ret;
 }
 
 /* allocate a buffer large enough for all possible counters */
index 3deffa426376ce53f3d665dacabfdfb313598d53..dfe33564daf69530a432c5eaed8bbb2a31ff7678 100644 (file)
@@ -151,10 +151,12 @@ host_reboot(
 
        assert(host_priv == &realhost);
 
+#if DEVELOPMENT || DEBUG
        if (options & HOST_REBOOT_DEBUGGER) {
                Debugger("Debugger");
                return (KERN_SUCCESS);
        }
+#endif
 
     if (options & HOST_REBOOT_UPSDELAY) {
         // UPS power cutoff path
index 9a0a9427cdf7af516c2b64a53482e6617e1c1127..5ee363dc7b114a44273ca6f1dc7f9c0c37f1fc7d 100644 (file)
@@ -896,7 +896,7 @@ assert_wait(
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
-               VM_KERNEL_UNSLIDE(event), 0, 0, 0, 0);
+               VM_KERNEL_UNSLIDE_OR_PERM(event), 0, 0, 0, 0);
 
        struct waitq *waitq;
        waitq = global_eventq(event);
@@ -929,7 +929,7 @@ assert_wait_timeout(
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                                  MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
-                                 VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0);
+                                 VM_KERNEL_UNSLIDE_OR_PERM(event), interruptible, deadline, 0, 0);
 
        wresult = waitq_assert_wait64_locked(waitq, CAST_EVENT64_T(event),
                                             interruptible,
@@ -978,7 +978,7 @@ assert_wait_timeout_with_leeway(
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                                  MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
-                                 VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0);
+                                 VM_KERNEL_UNSLIDE_OR_PERM(event), interruptible, deadline, 0, 0);
 
        wresult = waitq_assert_wait64_locked(waitq, CAST_EVENT64_T(event),
                                             interruptible,
@@ -1013,7 +1013,7 @@ assert_wait_deadline(
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                                  MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
-                                 VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0);
+                                 VM_KERNEL_UNSLIDE_OR_PERM(event), interruptible, deadline, 0, 0);
 
        wresult = waitq_assert_wait64_locked(waitq, CAST_EVENT64_T(event),
                                             interruptible,
@@ -1049,7 +1049,7 @@ assert_wait_deadline_with_leeway(
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                                  MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
-                                 VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0);
+                                 VM_KERNEL_UNSLIDE_OR_PERM(event), interruptible, deadline, 0, 0);
 
        wresult = waitq_assert_wait64_locked(waitq, CAST_EVENT64_T(event),
                                             interruptible,
index 53013fa79ef7dfa820f3459e6254ebfc29764777..eada1fb64b249b9dc5530b30838cdc77cd682a62 100644 (file)
@@ -284,6 +284,11 @@ kernel_bootstrap(void)
        csr_init();
 #endif
 
+       if (PE_i_can_has_debugger(NULL) &&
+           PE_parse_boot_argn("-show_pointers", &namep, sizeof (namep))) {
+               doprnt_hide_pointers = FALSE;
+       }
+
        kernel_bootstrap_log("stackshot_lock_init");    
        stackshot_lock_init();
 
@@ -548,12 +553,12 @@ kernel_bootstrap_thread(void)
        vm_commpage_init();
        vm_commpage_text_init();
 
-
 #if CONFIG_MACF
        kernel_bootstrap_log("mac_policy_initmach");
        mac_policy_initmach();
 #endif
 
+
 #if CONFIG_SCHED_SFI
        kernel_bootstrap_log("sfi_init");
        sfi_init();
index 0cba287b0ad96c207619958ef8c5a271275bcb53..4304559e67d3011188330dbf871c73447c58857a 100644 (file)
@@ -206,37 +206,29 @@ semaphore_create(
 /*
  *     Routine:        semaphore_destroy_internal
  *
- *     This call will only succeed if the specified task is the SAME task
- *     specified at the semaphore's creation.
+ *     Disassociate a semaphore from its owning task, mark it inactive,
+ *     and set any waiting threads running with THREAD_RESTART.
  *
- *     All threads currently blocked on the semaphore are awoken.  These
- *     threads will return with the KERN_TERMINATED error.
+ *     Conditions:
+ *                     task is locked
+ *                     semaphore is locked
+ *                     semaphore is owned by the specified task
+ *     Returns:
+ *                     with semaphore unlocked
  */
-kern_return_t
+static void
 semaphore_destroy_internal(
        task_t                  task,
        semaphore_t             semaphore)
 {
        int                     old_count;
-       spl_t                   spl_level;
-
-       /*
-        *  Disown semaphore
-        */
-       task_lock(task);
-       if (semaphore->owner != task) {
-               task_unlock(task);
-               return KERN_INVALID_ARGUMENT;
-       }
-       spl_level = splsched();
-       semaphore_lock(semaphore);
 
+       /* unlink semaphore from owning task */
+       assert(semaphore->owner == task);
        remqueue((queue_entry_t) semaphore);
        semaphore->owner = TASK_NULL;
        task->semaphores_owned--;
 
-       task_unlock(task);
-
        /*
         *  Deactivate semaphore
         */
@@ -259,9 +251,6 @@ semaphore_destroy_internal(
        } else {
                semaphore_unlock(semaphore);
        }
-       splx(spl_level);
-
-       return KERN_SUCCESS;
 }
 
 /*
@@ -275,18 +264,75 @@ semaphore_destroy(
        task_t                  task,
        semaphore_t             semaphore)
 {
-       kern_return_t kr;
+       spl_t spl_level;
 
        if (semaphore == SEMAPHORE_NULL)
                return KERN_INVALID_ARGUMENT;
 
        if (task == TASK_NULL) {
-               kr = KERN_INVALID_ARGUMENT;
-       } else {
-               kr = semaphore_destroy_internal(task, semaphore);
+               semaphore_dereference(semaphore);
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       task_lock(task);
+       spl_level = splsched();
+       semaphore_lock(semaphore);
+
+       if (semaphore->owner != task) {
+               semaphore_unlock(semaphore);
+               splx(spl_level);
+               task_unlock(task);
+               return KERN_INVALID_ARGUMENT;
        }
+                       
+       semaphore_destroy_internal(task, semaphore);
+       /* semaphore unlocked */
+
+       splx(spl_level);
+       task_unlock(task);
+
        semaphore_dereference(semaphore);
-       return kr;
+       return KERN_SUCCESS;
+}
+
+/*
+ *     Routine:        semaphore_destroy_all
+ *
+ *     Destroy all the semaphores associated with a given task.
+ */
+#define SEMASPERSPL 20  /* max number of semaphores to destroy per spl hold */
+
+void
+semaphore_destroy_all(
+       task_t                  task)
+{
+       uint32_t count;
+       spl_t spl_level;
+
+       count = 0;
+       task_lock(task);
+       while (!queue_empty(&task->semaphore_list)) {
+               semaphore_t semaphore;
+
+               semaphore = (semaphore_t) queue_first(&task->semaphore_list);
+
+               if (count == 0) 
+                       spl_level = splsched();
+               semaphore_lock(semaphore);
+
+               semaphore_destroy_internal(task, semaphore);
+               /* semaphore unlocked */
+
+               /* throttle number of semaphores per interrupt disablement */
+               if (++count == SEMASPERSPL) {
+                       count = 0;
+                       splx(spl_level);
+               }
+       }
+       if (count != 0)
+               splx(spl_level);
+
+       task_unlock(task);
 }
 
 /*
@@ -1072,6 +1118,9 @@ void
 semaphore_dereference(
        semaphore_t             semaphore)
 {
+       uint32_t collisions;
+       spl_t spl_level;
+
        if (semaphore == NULL)
                return;
 
@@ -1090,10 +1139,37 @@ semaphore_dereference(
                assert(!port->ip_srights);
                ipc_port_dealloc_kernel(port);
        }
-       if (semaphore->active) {
-               assert(semaphore->owner != TASK_NULL);
-               semaphore_destroy_internal(semaphore->owner, semaphore);
+
+       /*
+        * Lock the semaphore to lock in the owner task reference.
+        * Then continue to try to lock the task (inverse order).
+        */
+       spl_level = splsched();
+       semaphore_lock(semaphore);
+       for (collisions = 0; semaphore->active; collisions++) {
+               task_t task = semaphore->owner;
+
+               assert(task != TASK_NULL);
+               
+               if (task_lock_try(task)) {
+                       semaphore_destroy_internal(task, semaphore);
+                       /* semaphore unlocked */
+                       splx(spl_level);
+                       task_unlock(task);
+                       goto out;
+               }
+               
+               /* failed to get out-of-order locks */
+               semaphore_unlock(semaphore);
+               splx(spl_level);
+               mutex_pause(collisions);
+               spl_level = splsched();
+               semaphore_lock(semaphore);
        }
+       semaphore_unlock(semaphore);
+       splx(spl_level);
+
+ out:
        zfree(semaphore_zone, semaphore);
 }
 
index 339eb9e9388c425f4607f9d74f38be6fc01a52ee..2187c6bae69b5e795088f390d4deb448b3e38a03 100644 (file)
@@ -64,7 +64,7 @@ extern void semaphore_init(void);
 
 extern void            semaphore_reference(semaphore_t semaphore);
 extern void            semaphore_dereference(semaphore_t semaphore);
-extern  kern_return_t  semaphore_destroy_internal(task_t task, semaphore_t semaphore);
+extern void            semaphore_destroy_all(task_t task);
 
 #endif /* MACH_KERNEL_PRIVATE */
 
index 498fd09c8e8bef1abd67a1358103a9d47d4e26ca..d0e982ee42a0b03d8b95cb9cbfccbe3547b87138 100644 (file)
@@ -189,6 +189,10 @@ struct _task_ledger_indices task_ledgers __attribute__((used)) =
 #endif
        };
 
+/* System sleep state */
+boolean_t tasks_suspend_state;
+
+
 void init_task_ledgers(void);
 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
@@ -968,6 +972,10 @@ task_create_internal(
        new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
        assert(new_task->task_io_stats != NULL);
        bzero(new_task->task_io_stats, sizeof(struct io_stat_info));
+       new_task->task_immediate_writes = 0;
+       new_task->task_deferred_writes = 0;     
+       new_task->task_invalidated_writes = 0;
+       new_task->task_metadata_writes = 0;
 
        bzero(&(new_task->cpu_time_qos_stats), sizeof(struct _cpu_time_qos_stats));
 
@@ -1015,6 +1023,9 @@ task_create_internal(
        lck_mtx_lock(&tasks_threads_lock);
        queue_enter(&tasks, new_task, task_t, tasks);
        tasks_count++;
+        if (tasks_suspend_state) {
+            task_suspend_internal(new_task);
+        }
        lck_mtx_unlock(&tasks_threads_lock);
 
        *child_task = new_task;
@@ -1613,6 +1624,23 @@ task_terminate_internal(
        return (KERN_SUCCESS);
 }
 
+void
+tasks_system_suspend(boolean_t suspend)
+{
+       task_t task;
+
+       lck_mtx_lock(&tasks_threads_lock);
+       assert(tasks_suspend_state != suspend);
+       tasks_suspend_state = suspend;
+       queue_iterate(&tasks, task, task_t, tasks) {
+               if (task == kernel_task) {
+                       continue;
+               }
+               suspend ? task_suspend_internal(task) : task_resume_internal(task);
+       }
+       lck_mtx_unlock(&tasks_threads_lock);
+}
+
 /*
  * task_start_halt:
  *
@@ -3831,16 +3859,10 @@ task_set_ras_pc(
 void
 task_synchronizer_destroy_all(task_t task)
 {
-       semaphore_t     semaphore;
-
        /*
         *  Destroy owned semaphores
         */
-
-       while (!queue_empty(&task->semaphore_list)) {
-               semaphore = (semaphore_t) queue_first(&task->semaphore_list);
-               (void) semaphore_destroy_internal(task, semaphore);
-       }
+       semaphore_destroy_all(task);
 }
 
 /*
@@ -4517,3 +4539,23 @@ boolean_t task_is_gpu_denied(task_t task)
        /* We don't need the lock to read this flag */
        return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
 }
+
+void task_update_logical_writes(task_t task, uint32_t io_size, int flags)
+{
+       KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE, task_pid(task), io_size, flags, 0, 0);
+       switch(flags) {
+               case TASK_WRITE_IMMEDIATE:
+                       OSAddAtomic64(io_size, (SInt64 *)&(task->task_immediate_writes));
+                       break;
+               case TASK_WRITE_DEFERRED:
+                       OSAddAtomic64(io_size, (SInt64 *)&(task->task_deferred_writes));
+                       break;
+               case TASK_WRITE_INVALIDATED:
+                       OSAddAtomic64(io_size, (SInt64 *)&(task->task_invalidated_writes));
+                       break;
+               case TASK_WRITE_METADATA:
+                       OSAddAtomic64(io_size, (SInt64 *)&(task->task_metadata_writes));
+                       break;
+       }
+       return;
+}
index 7b7c153065abedf0e0d54f7bb8f7266226652b3a..5ddff0c754e76d457f68fa25d94642271c8ac5d4 100644 (file)
@@ -352,8 +352,12 @@ struct task {
                        low_mem_privileged_listener     :1,     /* if set, task would like to know about pressure changes before other tasks on the system */
                        mem_notify_reserved             :27;    /* reserved for future use */
 
-       io_stat_info_t  task_io_stats;
-       
+       io_stat_info_t          task_io_stats;
+       uint64_t                task_immediate_writes __attribute__((aligned(8)));
+       uint64_t                task_deferred_writes __attribute__((aligned(8)));
+       uint64_t                task_invalidated_writes __attribute__((aligned(8)));
+       uint64_t                task_metadata_writes __attribute__((aligned(8)));
+
        /* 
         * The cpu_time_qos_stats fields are protected by the task lock
         */
@@ -490,6 +494,8 @@ extern kern_return_t        task_send_trace_memory(
                                                        uint32_t        pid,
                                                        uint64_t        uniqueid);
 
+extern void                    tasks_system_suspend(boolean_t suspend);
+
 #if CONFIG_FREEZE
 
 /* Freeze a task's resident pages */
@@ -851,6 +857,12 @@ extern kern_return_t task_purge_volatile_memory(task_t task);
 extern void      task_set_gpu_denied(task_t task, boolean_t denied);
 extern boolean_t task_is_gpu_denied(task_t task);
 
+#define TASK_WRITE_IMMEDIATE           0x1
+#define TASK_WRITE_DEFERRED            0x2
+#define TASK_WRITE_INVALIDATED         0x4
+#define TASK_WRITE_METADATA            0x8
+extern void    task_update_logical_writes(task_t task, uint32_t io_size, int flags);
+
 #endif /* XNU_KERNEL_PRIVATE */
 
 #ifdef KERNEL_PRIVATE
index 6e9a472a05eb57ae2b5fe657f822e692b3705708..535d1e319972f16be4116ced05a026433164a0c6 100644 (file)
@@ -1920,18 +1920,6 @@ THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU__SENDING_EXC_RESOURCE(void)
        }
 }
 
-#define UPDATE_IO_STATS(info, size)                            \
-{                                                              \
-       info.count++;                                           \
-       info.size += size;                                      \
-}
-
-#define UPDATE_IO_STATS_ATOMIC(info, size)                     \
-{                                                              \
-       OSIncrementAtomic64((SInt64 *)&(info.count));           \
-       OSAddAtomic64(size, (SInt64 *)&(info.size));            \
-}
-
 void thread_update_io_stats(thread_t thread, int size, int io_flags)
 {
        int io_tier;
index 82339e20864c66fca9c5bf76323955eaa9f365e5..6e406e8b74ec84eead2e20d9190867aa5c64a5b0 100644 (file)
@@ -1207,7 +1207,7 @@ thread_call_thread(
 #if DEVELOPMENT || DEBUG
                KERNEL_DEBUG_CONSTANT(
                                MACHDBG_CODE(DBG_MACH_SCHED,MACH_CALLOUT) | DBG_FUNC_NONE,
-                               VM_KERNEL_UNSLIDE(func), param0, param1, 0, 0);
+                               VM_KERNEL_UNSLIDE(func), VM_KERNEL_UNSLIDE_OR_PERM(param0), VM_KERNEL_UNSLIDE_OR_PERM(param1), 0, 0);
 #endif /* DEVELOPMENT || DEBUG */
 
 #if CONFIG_DTRACE
index 8c65ed2f89df8b011c37f865b92d7bc7f3814b4c..fead4d66327ffd65a2dd7aa050b4eee75a7e0eb9 100644 (file)
@@ -417,9 +417,9 @@ timer_call_enqueue_deadline_unlocked(
 #if TIMER_ASSERT
                        TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 
                                DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
-                               call,
+                               VM_KERNEL_UNSLIDE_OR_PERM(call),
                                call->async_dequeue,
-                               TCE(call)->queue,
+                               VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
                                0x1c, 0);
                        timer_call_enqueue_deadline_unlocked_async1++;
 #endif
@@ -471,9 +471,9 @@ timer_call_dequeue_unlocked(
 #if TIMER_ASSERT
        TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 
                DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
-               call,
+               VM_KERNEL_UNSLIDE_OR_PERM(call),
                call->async_dequeue,
-               TCE(call)->queue,
+               VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
                0, 0);
 #endif
        if (old_queue != NULL) {
@@ -483,9 +483,9 @@ timer_call_dequeue_unlocked(
 #if TIMER_ASSERT
                        TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 
                                DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
-                               call,
+                           VM_KERNEL_UNSLIDE_OR_PERM(call),
                                call->async_dequeue,
-                               TCE(call)->queue,
+                               VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
                                0x1c, 0);
                        timer_call_dequeue_unlocked_async1++;
 #endif
@@ -570,8 +570,8 @@ timer_call_enter_internal(
 
        TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
                DECR_TIMER_ENTER | DBG_FUNC_START,
-               call,
-               param1, deadline, flags, 0); 
+           VM_KERNEL_UNSLIDE_OR_PERM(call),
+           VM_KERNEL_UNSLIDE_OR_PERM(param1), deadline, flags, 0); 
 
        urgency = (flags & TIMER_CALL_URGENCY_MASK);
 
@@ -634,7 +634,7 @@ timer_call_enter_internal(
 
        TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
                DECR_TIMER_ENTER | DBG_FUNC_END,
-               call,
+               VM_KERNEL_UNSLIDE_OR_PERM(call),
                (old_queue != NULL), deadline, queue->count, 0); 
 
        splx(s);
@@ -688,7 +688,7 @@ timer_call_cancel(
 
        TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
                DECR_TIMER_CANCEL | DBG_FUNC_START,
-               call,
+               VM_KERNEL_UNSLIDE_OR_PERM(call),
                TCE(call)->deadline, call->soft_deadline, call->flags, 0);
 
        old_queue = timer_call_dequeue_unlocked(call);
@@ -708,8 +708,8 @@ timer_call_cancel(
        }
        TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
                DECR_TIMER_CANCEL | DBG_FUNC_END,
-               call,
-               old_queue,
+               VM_KERNEL_UNSLIDE_OR_PERM(call),
+               VM_KERNEL_UNSLIDE_OR_PERM(old_queue),
                TCE(call)->deadline - mach_absolute_time(),
                TCE(call)->deadline - TCE(call)->entry_time, 0);
        splx(s);
@@ -754,9 +754,9 @@ timer_queue_shutdown(
 #if TIMER_ASSERT
                        TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 
                                DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
-                               call,
+                               VM_KERNEL_UNSLIDE_OR_PERM(call),
                                call->async_dequeue,
-                               TCE(call)->queue,
+                               VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
                                0x2b, 0);
 #endif
                        timer_queue_unlock(queue);
@@ -824,7 +824,7 @@ timer_queue_expire_with_options(
                        TCOAL_DEBUG(0xDDDD0000, queue->earliest_soft_deadline, call->soft_deadline, 0, 0, 0);
                        TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 
                                DECR_TIMER_EXPIRE | DBG_FUNC_NONE,
-                               call,
+                               VM_KERNEL_UNSLIDE_OR_PERM(call),
                                call->soft_deadline,
                                TCE(call)->deadline,
                                TCE(call)->entry_time, 0);
@@ -854,7 +854,10 @@ timer_queue_expire_with_options(
 
                        TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 
                                DECR_TIMER_CALLOUT | DBG_FUNC_START,
-                               call, VM_KERNEL_UNSLIDE(func), param0, param1, 0);
+                               VM_KERNEL_UNSLIDE_OR_PERM(call), VM_KERNEL_UNSLIDE(func),
+                               VM_KERNEL_UNSLIDE_OR_PERM(param0),
+                               VM_KERNEL_UNSLIDE_OR_PERM(param1),
+                               0);
 
 #if CONFIG_DTRACE
                        DTRACE_TMR7(callout__start, timer_call_func_t, func,
@@ -876,7 +879,10 @@ timer_queue_expire_with_options(
 
                        TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 
                                DECR_TIMER_CALLOUT | DBG_FUNC_END,
-                               call, VM_KERNEL_UNSLIDE(func), param0, param1, 0);
+                               VM_KERNEL_UNSLIDE_OR_PERM(call), VM_KERNEL_UNSLIDE(func),
+                               VM_KERNEL_UNSLIDE_OR_PERM(param0),
+                               VM_KERNEL_UNSLIDE_OR_PERM(param1),
+                               0);
                        call = NULL;
                        timer_queue_lock_spin(queue);
                } else {
@@ -1013,9 +1019,9 @@ timer_queue_migrate(mpqueue_head_t *queue_from, mpqueue_head_t *queue_to)
 #ifdef TIMER_ASSERT
                        TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 
                                DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
-                               call,
-                               TCE(call)->queue,
-                               call->lock.interlock.lock_data,
+                               VM_KERNEL_UNSLIDE_OR_PERM(call),
+                               VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
+                               VM_KERNEL_UNSLIDE_OR_PERM(call->lock.interlock.lock_data),
                                0x2b, 0);
 #endif
                        timer_queue_migrate_lock_skips++;
@@ -1071,7 +1077,7 @@ timer_queue_trace(
                                call->soft_deadline,
                                TCE(call)->deadline,
                                TCE(call)->entry_time,
-                               TCE(call)->func,
+                               VM_KERNEL_UNSLIDE(TCE(call)->func),
                                0);
                        call = TIMER_CALL(queue_next(qe(call)));
                } while (!queue_end(&queue->head, qe(call)));
@@ -1223,9 +1229,9 @@ timer_longterm_scan(timer_longterm_t      *tlp,
 #ifdef TIMER_ASSERT
                        TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
                                DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
-                               call,
-                               TCE(call)->queue,
-                               call->lock.interlock.lock_data,
+                               VM_KERNEL_UNSLIDE_OR_PERM(call),
+                               VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
+                               VM_KERNEL_UNSLIDE_OR_PERM(call->lock.interlock.lock_data),
                                0x2c, 0);
 #endif
                        timer_call_entry_dequeue_async(call);
@@ -1240,7 +1246,7 @@ timer_longterm_scan(timer_longterm_t      *tlp,
                        if (deadline < now)
                                TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
                                                DECR_TIMER_OVERDUE | DBG_FUNC_NONE,
-                                       call,
+                                       VM_KERNEL_UNSLIDE_OR_PERM(call),
                                        deadline,
                                        now,
                                        threshold,
@@ -1248,10 +1254,10 @@ timer_longterm_scan(timer_longterm_t    *tlp,
 #endif
                        TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
                                        DECR_TIMER_ESCALATE | DBG_FUNC_NONE,
-                               call,
+                               VM_KERNEL_UNSLIDE_OR_PERM(call),
                                TCE(call)->deadline,
                                TCE(call)->entry_time,
-                               TCE(call)->func,
+                               VM_KERNEL_UNSLIDE(TCE(call)->func),
                                0);
                        tlp->escalates++;
                        timer_call_entry_dequeue(call);
@@ -1289,7 +1295,7 @@ timer_longterm_update_locked(timer_longterm_t *tlp)
 
        TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 
                DECR_TIMER_UPDATE | DBG_FUNC_START,
-               &tlp->queue,
+               VM_KERNEL_UNSLIDE_OR_PERM(&tlp->queue),
                tlp->threshold.deadline,
                tlp->threshold.preempted,
                tlp->queue.count, 0);
@@ -1336,7 +1342,7 @@ timer_longterm_update_locked(timer_longterm_t *tlp)
 
        TIMER_KDEBUG_TRACE(KDEBUG_TRACE, 
                DECR_TIMER_UPDATE | DBG_FUNC_END,
-               &tlp->queue,
+               VM_KERNEL_UNSLIDE_OR_PERM(&tlp->queue),
                tlp->threshold.deadline,
                tlp->threshold.scans,
                tlp->queue.count, 0);
index be41d6143dc6829448bba425ac9cd42f62115ed8..fad26e5ba50443d1d8ce2088a30551582e6569e9 100644 (file)
@@ -584,9 +584,9 @@ struct wqt_elem *wq_table_alloc_elem(struct wq_table *table, int type, int nelem
                      type, table);
 
        assert(nelem > 0);
-       elem = NULL;
 
 try_again:
+       elem = NULL;
        if (ntries++ > max_retries) {
                struct wqt_elem *tmp;
                if (table->used_elem + nelem >= table_size)
@@ -4568,7 +4568,7 @@ static inline int waitq_maybe_remove_link(struct waitq *waitq,
                         * WQS we're unlinking, or to an invalid object:
                         * no need to invalidate it
                         */
-                       *wq_setid = right->sl_set_id.id;
+                       *wq_setid = right ? right->sl_set_id.id : 0;
                        lt_invalidate(parent);
                        wqdbg_v("S1, L");
                        return left ? WQ_ITERATE_UNLINKED : WQ_ITERATE_INVALID;
@@ -4578,7 +4578,7 @@ static inline int waitq_maybe_remove_link(struct waitq *waitq,
                         * WQS we're unlinking, or to an invalid object:
                         * no need to invalidate it
                         */
-                       *wq_setid = left->sl_set_id.id;
+                       *wq_setid = left ? left->sl_set_id.id : 0;
                        lt_invalidate(parent);
                        wqdbg_v("S1, R");
                        return right ? WQ_ITERATE_UNLINKED : WQ_ITERATE_INVALID;
index 6bcab735d1eb582b37a63afcb0d425544d4f75c4..351268d3454930e451cace047a95db6c92629554 100644 (file)
@@ -375,6 +375,13 @@ uint64_t zone_map_table_page_count = 0;
 vm_offset_t     zone_map_min_address = 0;  /* initialized in zone_init */
 vm_offset_t     zone_map_max_address = 0;
 
+/* Globals for random boolean generator for elements in free list */
+#define MAX_ENTROPY_PER_ZCRAM          4
+#define RANDOM_BOOL_GEN_SEED_COUNT      4
+static unsigned int bool_gen_seed[RANDOM_BOOL_GEN_SEED_COUNT];
+static unsigned int bool_gen_global = 0;
+decl_simple_lock_data(, bool_gen_lock)
+
 /* Helpful for walking through a zone's free element list. */
 struct zone_free_element {
        struct zone_free_element *next;
@@ -1924,6 +1931,84 @@ zone_prio_refill_configure(zone_t z, vm_size_t low_water_mark) {
        thread_deallocate(z->zone_replenish_thread);
 }
 
+/*
+ * Boolean Random Number Generator for generating booleans to randomize 
+ * the order of elements in newly zcram()'ed memory. The algorithm is a 
+ * modified version of the KISS RNG proposed in the paper:
+ * http://stat.fsu.edu/techreports/M802.pdf
+ * The modifications have been documented in the technical paper 
+ * paper from UCL:
+ * http://www0.cs.ucl.ac.uk/staff/d.jones/GoodPracticeRNG.pdf 
+ */
+
+static void random_bool_gen_entropy(
+               int     *buffer,
+               int     count)
+{
+
+       int i, t;
+       simple_lock(&bool_gen_lock);
+       for (i = 0; i < count; i++) {
+               bool_gen_seed[1] ^= (bool_gen_seed[1] << 5);
+               bool_gen_seed[1] ^= (bool_gen_seed[1] >> 7);
+               bool_gen_seed[1] ^= (bool_gen_seed[1] << 22);
+               t = bool_gen_seed[2] + bool_gen_seed[3] + bool_gen_global;
+               bool_gen_seed[2] = bool_gen_seed[3];
+               bool_gen_global = t < 0;
+               bool_gen_seed[3] = t &2147483647;
+               bool_gen_seed[0] += 1411392427;
+               buffer[i] = (bool_gen_seed[0] + bool_gen_seed[1] + bool_gen_seed[3]);
+       }
+       simple_unlock(&bool_gen_lock);
+}
+
+static boolean_t random_bool_gen(
+               int     *buffer,
+               int     index,
+               int     bufsize)
+{
+       int valindex, bitpos;
+       valindex = (index / (8 * sizeof(int))) % bufsize;
+       bitpos = index % (8 * sizeof(int));
+       return (boolean_t)(buffer[valindex] & (1 << bitpos));
+} 
+
+static void 
+random_free_to_zone(
+                       zone_t          zone,
+                       vm_offset_t     newmem,
+                       vm_offset_t     first_element_offset,
+                       int             element_count,
+                       boolean_t       from_zm,
+                       int             *entropy_buffer)
+{
+       vm_offset_t     last_element_offset;
+       vm_offset_t     element_addr;
+       vm_size_t       elem_size;
+       int             index;  
+
+       elem_size = zone->elem_size;
+       last_element_offset = first_element_offset + ((element_count * elem_size) - elem_size);
+       for (index = 0; index < element_count; index++) {
+               assert(first_element_offset <= last_element_offset);
+               if (random_bool_gen(entropy_buffer, index, MAX_ENTROPY_PER_ZCRAM)) {
+                       element_addr = newmem + first_element_offset;
+                       first_element_offset += elem_size;
+               } else {
+                       element_addr = newmem + last_element_offset;
+                       last_element_offset -= elem_size;
+               }
+               if (element_addr != (vm_offset_t)zone) {
+                       zone->count++;  /* compensate for free_to_zone */
+                       free_to_zone(zone, element_addr, FALSE);
+               }
+               if (!zone->use_page_list && from_zm) {
+                       zone_page_alloc(element_addr, elem_size);
+               }
+               zone->cur_size += elem_size;
+       }
+}
+
 /*
  *     Cram the given memory into the specified zone. Update the zone page count accordingly.
  */
@@ -1935,6 +2020,9 @@ zcram(
 {
        vm_size_t       elem_size;
        boolean_t   from_zm = FALSE;
+       vm_offset_t first_element_offset;
+       int element_count;
+       int entropy_buffer[MAX_ENTROPY_PER_ZCRAM];
 
        /* Basic sanity checks */
        assert(zone != ZONE_NULL && newmem != (vm_offset_t)0);
@@ -1943,6 +2031,8 @@ zcram(
 
        elem_size = zone->elem_size;
 
+       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_ZALLOC, ZALLOC_ZCRAM) | DBG_FUNC_START, VM_KERNEL_ADDRPERM(zone), size, 0, 0, 0);
+
        if (from_zone_map(newmem, size))
                from_zm = TRUE;
 
@@ -1955,6 +2045,8 @@ zcram(
 
        ZONE_PAGE_COUNT_INCR(zone, (size / PAGE_SIZE));
 
+       random_bool_gen_entropy(entropy_buffer, MAX_ENTROPY_PER_ZCRAM);
+       
        lock_zone(zone);
 
        if (zone->use_page_list) {
@@ -1965,7 +2057,6 @@ zcram(
                assert((size & PAGE_MASK) == 0);
                for (; size > 0; newmem += PAGE_SIZE, size -= PAGE_SIZE) {
 
-                       vm_size_t pos_in_page;
                        page_metadata = (struct zone_page_metadata *)(newmem);
                        
                        page_metadata->pages.next = NULL;
@@ -1977,36 +2068,24 @@ zcram(
 
                        enqueue_tail(&zone->pages.all_used, (queue_entry_t)page_metadata);
 
-                       vm_offset_t first_element_offset;
                        if (zone_page_metadata_size % ZONE_ELEMENT_ALIGNMENT == 0){
                                first_element_offset = zone_page_metadata_size;
                        } else {
                                first_element_offset = zone_page_metadata_size + (ZONE_ELEMENT_ALIGNMENT - (zone_page_metadata_size % ZONE_ELEMENT_ALIGNMENT));
                        }
-
-                       for (pos_in_page = first_element_offset; (newmem + pos_in_page + elem_size) < (vm_offset_t)(newmem + PAGE_SIZE); pos_in_page += elem_size) {
-                               page_metadata->alloc_count++;
-                               zone->count++;  /* compensate for free_to_zone */
-                               free_to_zone(zone, newmem + pos_in_page, FALSE);
-                               zone->cur_size += elem_size;
-                       }
-               }
-       } else {
-               while (size >= elem_size) {
-                       zone->count++;  /* compensate for free_to_zone */
-                       if (newmem == (vm_offset_t)zone) {
-                               /* Don't free zone_zone zone */
-                       } else {
-                               free_to_zone(zone, newmem, FALSE);
-                       }
-                       if (from_zm)
-                               zone_page_alloc(newmem, elem_size);
-                       size -= elem_size;
-                       newmem += elem_size;
-                       zone->cur_size += elem_size;
+                       element_count = (int)((PAGE_SIZE - first_element_offset) / elem_size);
+                       page_metadata->alloc_count += element_count;
+                       random_free_to_zone(zone, newmem, first_element_offset, element_count, from_zm, entropy_buffer);                        
                }
+       } else {        
+               first_element_offset = 0;
+               element_count = (int)((size - first_element_offset) / elem_size);               
+               random_free_to_zone(zone, newmem, first_element_offset, element_count, from_zm, entropy_buffer);
        }
        unlock_zone(zone);
+       
+       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_ZALLOC, ZALLOC_ZCRAM) | DBG_FUNC_END, VM_KERNEL_ADDRPERM(zone), 0, 0, 0, 0);
+
 }
 
 
@@ -2070,6 +2149,7 @@ void
 zone_bootstrap(void)
 {
        char temp_buf[16];
+       unsigned int i;
 
        if (PE_parse_boot_argn("-zinfop", temp_buf, sizeof(temp_buf))) {
                zinfo_per_task = TRUE;
@@ -2081,6 +2161,12 @@ zone_bootstrap(void)
        /* Set up zone element poisoning */
        zp_init();
 
+       /* Seed the random boolean generator for elements in zone free list */
+       for (i = 0; i < RANDOM_BOOL_GEN_SEED_COUNT; i++) {
+               bool_gen_seed[i] = (unsigned int)early_random();
+       }
+       simple_lock_init(&bool_gen_lock, 0);
+
        /* should zlog log to debug zone corruption instead of leaks? */
        if (PE_parse_boot_argn("-zc", temp_buf, sizeof(temp_buf))) {
                corruption_debug_flag = TRUE;
@@ -2139,7 +2225,6 @@ zone_bootstrap(void)
        /* initialize fake zones and zone info if tracking by task */
        if (zinfo_per_task) {
                vm_size_t zisize = sizeof(zinfo_usage_store_t) * ZINFO_SLOTS;
-               unsigned int i;
 
                for (i = 0; i < num_fake_zones; i++)
                        fake_zones[i].init(ZINFO_SLOTS - num_fake_zones + i);
index 2c0b22e41dc67c5b023e89caa711ecde1212e35a..6b2038de4585600501fd409e8ad2c9c8ce20dab6 100644 (file)
@@ -88,6 +88,10 @@ struct coalition_resource_usage {
        uint64_t gpu_time;
        uint64_t cpu_time_billed_to_me;
        uint64_t cpu_time_billed_to_others;
+       uint64_t logical_immediate_writes;
+       uint64_t logical_deferred_writes;
+       uint64_t logical_invalidated_writes;
+       uint64_t logical_metadata_writes;
 };
 
 #ifdef PRIVATE
index bfedcc3eceddc2b3329eb5c139c3de4a9ac4caa9..f0fa37d2732b47ff07d5052ef544ca4e774bfa97 100644 (file)
                /* The requested property cannot be changed at this time.
                 */
 
+#define KERN_INSUFFICIENT_BUFFER_SIZE  52
+               /* The provided buffer is of insufficient size for the requested data.
+                */
+
 #define        KERN_RETURN_MAX                 0x100
                /* Maximum return value allowable
                 */
index 09ea8bb8a9c174bb7ae332d95d3684facdf44f18..9c459178a7fa390d244a68cb15a0b7eecf5f35e2 100644 (file)
@@ -405,7 +405,6 @@ __END_DECLS
 #define CPUFAMILY_ARM_SWIFT            0x1e2d6381
 #define CPUFAMILY_ARM_CYCLONE          0x37a09642
 #define CPUFAMILY_ARM_TYPHOON          0x2c91a47e
-#define CPUFAMILY_ARM_TWISTER          0x92fb37c8
 
 /* The following synonyms are deprecated: */
 #define CPUFAMILY_INTEL_6_14   CPUFAMILY_INTEL_YONAH
index c4794aab52fc1211af7b5141e9f8ba39d90a59f8..e39523ffc210883a6c7c6edd01299c7822cfaf49 100644 (file)
@@ -182,16 +182,19 @@ typedef struct thread_debug_info_internal  thread_debug_info_internal_data_t;
 
 #endif /* PRIVATE */
 
+#define IO_NUM_PRIORITIES      4
 
-/*
- * Obsolete interfaces.
- */
-
-#define THREAD_SCHED_TIMESHARE_INFO    10
-#define THREAD_SCHED_RR_INFO           11
-#define THREAD_SCHED_FIFO_INFO         12
+#define UPDATE_IO_STATS(info, size)                            \
+{                                                              \
+       info.count++;                                           \
+       info.size += size;                                      \
+}
 
-#define IO_NUM_PRIORITIES      4
+#define UPDATE_IO_STATS_ATOMIC(info, io_size)                  \
+{                                                              \
+       OSIncrementAtomic64((SInt64 *)&(info.count));           \
+       OSAddAtomic64(io_size, (SInt64 *)&(info.size));         \
+}
 
 struct io_stat_entry {
        uint64_t        count;
@@ -208,4 +211,12 @@ struct io_stat_info {
 
 typedef struct io_stat_info *io_stat_info_t;
 
+/* 
+ * Obsolete interfaces.
+ */
+
+#define THREAD_SCHED_TIMESHARE_INFO     10
+#define THREAD_SCHED_RR_INFO            11
+#define THREAD_SCHED_FIFO_INFO          12
+
 #endif /* _MACH_THREAD_INFO_H_ */
index 6a33043a6a5a4df55dd9de3cff11632e9171ea24..1fa361488256fe4209d6d53b7f706df179ccf640 100644 (file)
@@ -308,7 +308,9 @@ extern vm_offset_t      vm_elinkedit;
  * VM_KERNEL_UNSLIDE_OR_ADDRPERM:
  *     Use this macro when you are exposing an address to userspace that could
  *     come from either kernel text/data *or* the heap. This is a rare case,
- *     but one that does come up and must be handled correctly.
+ *     but one that does come up and must be handled correctly. If the argument
+ *     is known to be lower than any potential heap address, no transformation
+ *     is applied, to avoid revealing the operation on a constant.
  *
  * Nesting of these macros should be considered invalid.
  */
@@ -333,7 +335,7 @@ extern vm_offset_t      vm_elinkedit;
           VM_KERNEL_IS_PRELINKINFO(_v) ||   \
           VM_KERNEL_IS_KEXT_LINKEDIT(_v)) ?         \
                        (vm_offset_t)(_v) - vm_kernel_slide :    \
-                       VM_KERNEL_ADDRPERM(_v))
+                ((vm_offset_t)(_v) >= VM_MIN_KERNEL_AND_KEXT_ADDRESS ? VM_KERNEL_ADDRPERM(_v) : (vm_offset_t)(_v)))
        
 
 #endif /* XNU_KERNEL_PRIVATE */
index 07e906a6fd2f915c4be357009f90a4e72483aa75..51e62a58443fdf12054c53d2862a82d7bf36f861 100644 (file)
@@ -102,7 +102,7 @@ type page_address_array_t = ^array[] of integer_t;
 
 type symtab_name_t = c_string[*:32];
 
-type lockgroup_info_t = struct[63] of integer_t;
+type lockgroup_info_t = struct[33] of uint64_t;
 type lockgroup_info_array_t = array[] of lockgroup_info_t;
 
 type mach_memory_info_t = struct[8] of uint64_t;
index f7e485bde671ff30e51db3dbddf9cc8b1e9746e4..2dbe896b4b0d5b4a20c4dc93f27d29ba41186949 100644 (file)
@@ -375,6 +375,7 @@ memory_object_control_uiomove(
        int                     i;
        int                     orig_offset;
        vm_page_t               page_run[MAX_RUN];
+       int                     dirty_count;    /* keeps track of number of pages dirtied as part of this uiomove */
 
        object = memory_object_control_to_vm_object(control);
        if (object == VM_OBJECT_NULL) {
@@ -395,14 +396,15 @@ memory_object_control_uiomove(
                return 0;
        }
        orig_offset = start_offset;
-           
+
+       dirty_count = 0;        
        while (io_requested && retval == 0) {
 
                cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE;
 
                if (cur_needed > MAX_RUN)
                        cur_needed = MAX_RUN;
-
+               
                for (cur_run = 0; cur_run < cur_needed; ) {
 
                        if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
@@ -435,6 +437,8 @@ memory_object_control_uiomove(
                        assert(!dst_page->encrypted);
 
                        if (mark_dirty) {
+                               if (dst_page->dirty == FALSE)
+                                       dirty_count++;
                                SET_PAGE_DIRTY(dst_page, FALSE);
                                if (dst_page->cs_validated && 
                                    !dst_page->cs_tainted) {
@@ -518,7 +522,7 @@ memory_object_control_uiomove(
                orig_offset = 0;
        }
        vm_object_unlock(object);
-
+       task_update_logical_writes(current_task(), (dirty_count * PAGE_SIZE), TASK_WRITE_DEFERRED);
        return (retval);
 }
 
index e8ace8c9b90fb0cc128c1657ee7947afe6a8700b..848b1eea804e295a1e90176de10ebcb46c23b544 100644 (file)
@@ -537,10 +537,12 @@ vm_object_update_extent(
        struct vm_page_delayed_work     *dwp;
        int             dw_count;
        int             dw_limit;
+       int             dirty_count;
 
         dwp = &dw_array[0];
         dw_count = 0;
        dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
+       dirty_count = 0;
 
        for (;
             offset < offset_end && object->resident_page_count;
@@ -595,6 +597,8 @@ vm_object_update_extent(
                                break;
 
                        case MEMORY_OBJECT_LOCK_RESULT_MUST_FREE:
+                               if (m->dirty == TRUE)
+                                       dirty_count++;
                                dwp->dw_mask |= DW_vm_page_free;
                                break;
 
@@ -646,6 +650,10 @@ vm_object_update_extent(
                        break;
                }
        }
+       
+       if (dirty_count) {
+               task_update_logical_writes(current_task(), (dirty_count * PAGE_SIZE), TASK_WRITE_INVALIDATED);
+       }
        /*
         *      We have completed the scan for applicable pages.
         *      Clean any pages that have been saved.
index 9d72bf588968489b0ca5cc13b0f5ae8bb21b2bf3..8a4b26961782deee6d0f493d6e1bd95b5c4cecad 100644 (file)
@@ -658,10 +658,10 @@ extern void pmap_unmap_sharedpage(pmap_t pmap);
 void pmap_pre_expand(pmap_t pmap, vm_map_offset_t vaddr);
 #endif
 
-unsigned int pmap_query_resident(pmap_t pmap,
-                                vm_map_offset_t s,
-                                vm_map_offset_t e,
-                                unsigned int *compressed_count_p);
+mach_vm_size_t pmap_query_resident(pmap_t pmap,
+                                  vm_map_offset_t s,
+                                  vm_map_offset_t e,
+                                  mach_vm_size_t *compressed_bytes_p);
 
 #if CONFIG_PGTRACE
 int pmap_pgtrace_add_page(pmap_t pmap, vm_map_offset_t start, vm_map_offset_t end);
index cf05d1950b8c6493b5c16e655951b1ae2f51dee3..64b7d7bd9be50829648fa8d1641435c6b86fb8eb 100644 (file)
@@ -324,7 +324,10 @@ vm_wants_task_throttled(task_t task)
        if (task == kernel_task)
                return (0);
 
-       if (vm_compressor_mode == COMPRESSED_PAGER_IS_ACTIVE || vm_compressor_mode == DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
+       if (COMPRESSED_PAGER_IS_SWAPLESS || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS)
+               return (0);
+
+       if (COMPRESSED_PAGER_IS_SWAPBACKED || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) {
                if ((vm_compressor_low_on_space() || HARD_THROTTLE_LIMIT_REACHED()) &&
                    (unsigned int)pmap_compressed(task->map->pmap) > (c_segment_pages_compressed / 4))
                        return (1);
@@ -2152,6 +2155,9 @@ do_fastwake_warmup(void)
                    c_seg->c_generation_id > last_c_segment_to_warm_generation_id)
                        break;
 
+               if (vm_page_free_count < (AVAILABLE_MEMORY / 4))
+                       break;
+
                lck_mtx_lock_spin_always(&c_seg->c_lock);
                lck_mtx_unlock_always(c_list_lock);
                
index 663199f2ab8a3e9036af8577183724885b5ee199..655c302d25931f85e78370f29e89f2516e71957c 100644 (file)
@@ -3192,7 +3192,8 @@ MACRO_END
 
                        if (m->wpmapped == FALSE) {
                                vm_object_lock_assert_exclusive(m->object);
-
+                               if (!m->object->internal)
+                                       task_update_logical_writes(current_task(), PAGE_SIZE, TASK_WRITE_DEFERRED);
                                m->wpmapped = TRUE;
                        }
                        if (must_disconnect) {
@@ -4827,7 +4828,10 @@ handle_copy_delay:
                        int superpage;
 
                        if (!object->pager_created &&
-                           object->phys_contiguous) {
+                           object->phys_contiguous &&
+                           VME_OFFSET(entry) == 0 &&
+                           (entry->vme_end - entry->vme_start == object->vo_size) &&
+                           VM_MAP_PAGE_ALIGNED(entry->vme_start, (object->vo_size-1))) {
                                superpage = VM_MEM_SUPERPAGE;
                        } else {
                                superpage = 0;
index d015ebd2c7cc79e1728f7fc89411875777cd5feb..c75b23835407c30f6c8900904ed11e37b71ec696 100644 (file)
@@ -281,9 +281,10 @@ kernel_memory_allocate(
         * limit the size of a single extent of wired memory
         * to try and limit the damage to the system if
         * too many pages get wired down
-        * limit raised to 2GB with 128GB max physical limit
+        * limit raised to 2GB with 128GB max physical limit,
+        * but scaled by installed memory above this
         */
-        if ( !(flags & KMA_VAONLY) && map_size > (1ULL << 31)) {
+        if ( !(flags & KMA_VAONLY) && map_size > MAX(1ULL<<31, sane_size/64)) {
                 return KERN_RESOURCE_SHORTAGE;
         }
 
index 7800e4a237d524d4e35c03e9793f49c654a606f9..ac2edb6a5a3635e4cb59fab57314629c50602b75 100644 (file)
@@ -4999,6 +4999,7 @@ vm_map_wire_nested(
                                           &real_map)) {
 
                                        vm_map_unlock_read(lookup_map);
+                                       assert(map_pmap == NULL);
                                        vm_map_unwire(map, start,
                                                      s, user_wire);
                                        return(KERN_FAILURE);
@@ -5347,7 +5348,8 @@ done:
 
        if (rc != KERN_SUCCESS) {
                /* undo what has been wired so far */
-               vm_map_unwire(map, start, s, user_wire);
+               vm_map_unwire_nested(map, start, s, user_wire,
+                                    map_pmap, pmap_addr);
                if (physpage_p) {
                        *physpage_p = 0;
                }
@@ -9152,13 +9154,35 @@ vm_map_copyin_common(
        __unused boolean_t      src_volatile,
        vm_map_copy_t   *copy_result,   /* OUT */
        boolean_t       use_maxprot)
+{
+       int flags;
+
+       flags = 0;
+       if (src_destroy) {
+               flags |= VM_MAP_COPYIN_SRC_DESTROY;
+       }
+       if (use_maxprot) {
+               flags |= VM_MAP_COPYIN_USE_MAXPROT;
+       }
+       return vm_map_copyin_internal(src_map,
+                                     src_addr,
+                                     len,
+                                     flags,
+                                     copy_result);
+}
+kern_return_t
+vm_map_copyin_internal(
+       vm_map_t        src_map,
+       vm_map_address_t src_addr,
+       vm_map_size_t   len,
+       int             flags,
+       vm_map_copy_t   *copy_result)   /* OUT */
 {
        vm_map_entry_t  tmp_entry;      /* Result of last map lookup --
                                         * in multi-level lookup, this
                                         * entry contains the actual
                                         * vm_object/offset.
                                         */
-       register
        vm_map_entry_t  new_entry = VM_MAP_ENTRY_NULL;  /* Map entry for copy */
 
        vm_map_offset_t src_start;      /* Start of current entry --
@@ -9171,10 +9195,18 @@ vm_map_copyin_common(
        boolean_t       map_share=FALSE;
        submap_map_t    *parent_maps = NULL;
 
-       register
        vm_map_copy_t   copy;           /* Resulting copy */
        vm_map_address_t copy_addr;
        vm_map_size_t   copy_size;
+       boolean_t       src_destroy;
+       boolean_t       use_maxprot;
+
+       if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
+               return KERN_INVALID_ARGUMENT;
+       }
+               
+       src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
+       use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
 
        /*
         *      Check for copies of zero bytes.
@@ -9198,7 +9230,9 @@ vm_map_copyin_common(
         * setting up VM (and taking C-O-W faults) dominates the copy costs
         * for small regions.
         */
-       if ((len < msg_ool_size_small) && !use_maxprot)
+       if ((len < msg_ool_size_small) &&
+           !use_maxprot &&
+           !(flags & VM_MAP_COPYIN_ENTRY_LIST))
                return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
                                                   src_destroy, copy_result);
 
@@ -15885,7 +15919,6 @@ vm_map_query_volatile(
        mach_vm_size_t  volatile_pmap_count;
        mach_vm_size_t  volatile_compressed_pmap_count;
        mach_vm_size_t  resident_count;
-       unsigned int    compressed_count;
        vm_map_entry_t  entry;
        vm_object_t     object;
 
@@ -15900,6 +15933,8 @@ vm_map_query_volatile(
        for (entry = vm_map_first_entry(map);
             entry != vm_map_to_entry(map);
             entry = entry->vme_next) {
+               mach_vm_size_t  pmap_resident_bytes, pmap_compressed_bytes;
+
                if (entry->is_sub_map) {
                        continue;
                }
@@ -15937,12 +15972,15 @@ vm_map_query_volatile(
                        volatile_compressed_count +=
                                vm_compressor_pager_get_count(object->pager);
                }
-               compressed_count = 0;
-               volatile_pmap_count += pmap_query_resident(map->pmap,
-                                                          entry->vme_start,
-                                                          entry->vme_end,
-                                                          &compressed_count);
-               volatile_compressed_pmap_count += compressed_count;
+               pmap_compressed_bytes = 0;
+               pmap_resident_bytes =
+                       pmap_query_resident(map->pmap,
+                                           entry->vme_start,
+                                           entry->vme_end,
+                                           &pmap_compressed_bytes);
+               volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
+               volatile_compressed_pmap_count += (pmap_compressed_bytes
+                                                  / PAGE_SIZE);
        }
 
        /* map is still locked on return */
index 7f53e532554c8c9323303dcf18654b3dff17be2d..44c9879254af3838e272ff4e20f827150a3fa44e 100644 (file)
@@ -1159,6 +1159,17 @@ extern kern_return_t     vm_map_copyin_common(
                                vm_map_copy_t           *copy_result,   /* OUT */
                                boolean_t               use_maxprot);
 
+#define VM_MAP_COPYIN_SRC_DESTROY      0x00000001
+#define VM_MAP_COPYIN_USE_MAXPROT      0x00000002
+#define VM_MAP_COPYIN_ENTRY_LIST       0x00000004
+#define VM_MAP_COPYIN_ALL_FLAGS                0x00000007
+extern kern_return_t   vm_map_copyin_internal(
+                               vm_map_t                src_map,
+                               vm_map_address_t        src_addr,
+                               vm_map_size_t           len,
+                               int                     flags,
+                               vm_map_copy_t           *copy_result); /* OUT */
+
 extern kern_return_t   vm_map_copy_extract(
        vm_map_t                src_map,
        vm_map_address_t        src_addr,
index a2d77426bee33803fa07c3470d5cb772da234189..35c9ba57b07603e9bf019f0dda8d572254d05f41 100644 (file)
@@ -6632,6 +6632,10 @@ vm_object_lock_request(
 void
 vm_object_purge(vm_object_t object, int flags)
 {
+       unsigned int    object_page_count = 0;
+       unsigned int    pgcount = 0;
+       boolean_t       skipped_object = FALSE;
+
         vm_object_lock_assert_exclusive(object);
 
        if (object->purgable == VM_PURGABLE_DENY)
@@ -6677,11 +6681,12 @@ vm_object_purge(vm_object_t object, int flags)
        }
        assert(object->purgable == VM_PURGABLE_EMPTY);
        
+       object_page_count = object->resident_page_count;
+
        vm_object_reap_pages(object, REAP_PURGEABLE);
 
        if (object->pager != NULL &&
            COMPRESSED_PAGER_IS_ACTIVE) {
-               unsigned int pgcount;
 
                if (object->activity_in_progress == 0 &&
                    object->paging_in_progress == 0) {
@@ -6726,10 +6731,19 @@ vm_object_purge(vm_object_t object, int flags)
                         * pager if there's any kind of operation in
                         * progress on the VM object.
                         */
+                       skipped_object = TRUE;
                }
        }
 
        vm_object_lock_assert_exclusive(object);
+
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_VM, OBJECT_PURGE_ONE)),
+                             VM_KERNEL_UNSLIDE_OR_PERM(object), /* purged object */
+                             object_page_count,
+                             pgcount,
+                             skipped_object,
+                             0);
+
 }
                                
 
index 53185086d7953452ff30ebf63f47b74dfa052a23..2fca44b363078f6109625af90a60552895b16108 100644 (file)
@@ -322,6 +322,9 @@ static void vm_pageout_immediate(vm_page_t, boolean_t);
 boolean_t      vm_compressor_immediate_preferred = FALSE;
 boolean_t      vm_compressor_immediate_preferred_override = FALSE;
 boolean_t      vm_restricted_to_single_processor = FALSE;
+static boolean_t vm_pageout_waiter  = FALSE;
+static boolean_t vm_pageout_running = FALSE;
+
 
 static thread_t        vm_pageout_external_iothread = THREAD_NULL;
 static thread_t        vm_pageout_internal_iothread = THREAD_NULL;
@@ -349,7 +352,6 @@ int vm_upl_wait_for_pages = 0;
  */
 
 unsigned int vm_pageout_active = 0;            /* debugging */
-unsigned int vm_pageout_active_busy = 0;       /* debugging */
 unsigned int vm_pageout_inactive = 0;          /* debugging */
 unsigned int vm_pageout_inactive_throttled = 0;        /* debugging */
 unsigned int vm_pageout_inactive_forced = 0;   /* debugging */
@@ -3126,6 +3128,10 @@ vm_pageout_continue(void)
        DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL);
        vm_pageout_scan_event_counter++;
 
+       lck_mtx_lock(&vm_page_queue_free_lock);
+       vm_pageout_running = TRUE;
+       lck_mtx_unlock(&vm_page_queue_free_lock);
+
        vm_pageout_scan();
        /*
         * we hold both the vm_page_queue_free_lock
@@ -3135,6 +3141,12 @@ vm_pageout_continue(void)
        assert(vm_page_free_wanted_privileged == 0);
        assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
 
+       vm_pageout_running = FALSE;
+       if (vm_pageout_waiter) {
+               vm_pageout_waiter = FALSE;
+               thread_wakeup((event_t)&vm_pageout_waiter);
+       }
+
        lck_mtx_unlock(&vm_page_queue_free_lock);
        vm_page_unlock_queues();
 
@@ -3143,6 +3155,25 @@ vm_pageout_continue(void)
        /*NOTREACHED*/
 }
 
+kern_return_t
+vm_pageout_wait(uint64_t deadline)
+{
+       kern_return_t kr;
+
+       lck_mtx_lock(&vm_page_queue_free_lock);
+       for (kr = KERN_SUCCESS; vm_pageout_running && (KERN_SUCCESS == kr); ) {
+               vm_pageout_waiter = TRUE;
+               if (THREAD_AWAKENED != lck_mtx_sleep_deadline(
+                               &vm_page_queue_free_lock, LCK_SLEEP_DEFAULT,
+                               (event_t) &vm_pageout_waiter, THREAD_UNINT, deadline)) {
+                       kr = KERN_OPERATION_TIMED_OUT;
+               }
+       }
+       lck_mtx_unlock(&vm_page_queue_free_lock);
+
+       return (kr);
+}
+
 
 #ifdef FAKE_DEADLOCK
 
index 35ab0b34371ff383de2e9a478b0fff1e0b595891..bd7cb800a620c56d0a22549b116433b0875ca8ba 100644 (file)
@@ -131,6 +131,8 @@ extern int  vm_debug_events;
 #define VM_EXECVE                      0x131
 #define VM_WAKEUP_COMPACTOR_SWAPPER    0x132
 
+#define VM_DATA_WRITE                  0x140
+
 #define VM_DEBUG_EVENT(name, event, control, arg1, arg2, arg3, arg4)   \
        MACRO_BEGIN                                             \
        if (vm_debug_events) {                                  \
@@ -186,6 +188,8 @@ extern vm_page_t          vm_page_get_next(vm_page_t page);
 
 extern kern_return_t   mach_vm_pressure_level_monitor(boolean_t wait_for_pressure, unsigned int *pressure_level);
 
+extern kern_return_t   vm_pageout_wait(uint64_t deadline);
+
 #ifdef MACH_KERNEL_PRIVATE
 
 #include <vm/vm_page.h>
index 6df155404f39b5f9cb57ded1d6bbfc1ce59c4735..3c6807cb5850b2422e97bf11a00500182a7d6282 100644 (file)
@@ -71,14 +71,6 @@ int purgeable_nonvolatile_count;
 
 decl_lck_mtx_data(,vm_purgeable_queue_lock)
 
-#define TOKEN_ADD              0x40    /* 0x100 */
-#define TOKEN_DELETE           0x41    /* 0x104 */
-#define TOKEN_RIPEN            0x42    /* 0x108 */
-#define OBJECT_ADD             0x48    /* 0x120 */
-#define OBJECT_REMOVE          0x49    /* 0x124 */
-#define OBJECT_PURGE           0x4a    /* 0x128 */
-#define OBJECT_PURGE_ALL       0x4b    /* 0x12c */
-
 static token_idx_t vm_purgeable_token_remove_first(purgeable_q_t queue);
 
 static void vm_purgeable_stats_helper(vm_purgeable_stat_t *stat, purgeable_q_t queue, int group, task_t target_task);
@@ -688,6 +680,8 @@ vm_purgeable_object_find_and_lock(
        int             best_object_task_importance;
        int             best_object_skipped;
        int             num_objects_skipped;
+       int             try_lock_failed = 0;
+       int             try_lock_succeeded = 0;
        task_t          owner;
 
        best_object = VM_OBJECT_NULL;
@@ -700,12 +694,29 @@ vm_purgeable_object_find_and_lock(
         * remaining elements in order.
         */
 
-       num_objects_skipped = -1;
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_VM, OBJECT_PURGE_LOOP) | DBG_FUNC_START),
+                             pick_ripe,
+                             group,
+                             VM_KERNEL_UNSLIDE_OR_PERM(queue),
+                             0,
+                             0);
+
+       num_objects_skipped = 0;
        for (object = (vm_object_t) queue_first(&queue->objq[group]);
             !queue_end(&queue->objq[group], (queue_entry_t) object);
             object = (vm_object_t) queue_next(&object->objq),
                num_objects_skipped++) {
 
+               /*
+                * To prevent us looping for an excessively long time, choose
+                * the best object we've seen after looking at PURGEABLE_LOOP_MAX elements.
+                * If we haven't seen an eligible object after PURGEABLE_LOOP_MAX elements,
+                * we keep going until we find the first eligible object.
+                */
+               if ((num_objects_skipped >= PURGEABLE_LOOP_MAX) && (best_object != NULL)) {
+                       break;
+               }
+
                if (pick_ripe &&
                    ! object->purgeable_when_ripe) {
                        /* we want an object that has a ripe token */
@@ -721,6 +732,7 @@ vm_purgeable_object_find_and_lock(
 
                if (object_task_importance < best_object_task_importance) {
                        if (vm_object_lock_try(object)) {
+                               try_lock_succeeded++;
                                if (best_object != VM_OBJECT_NULL) {
                                        /* forget about previous best object */
                                        vm_object_unlock(best_object);
@@ -732,9 +744,19 @@ vm_purgeable_object_find_and_lock(
                                        /* can't get any better: stop looking */
                                        break;
                                }
+                       } else {
+                               try_lock_failed++;
                        }
                }
        }
+
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_VM, OBJECT_PURGE_LOOP) | DBG_FUNC_END),
+                             num_objects_skipped, /* considered objects */
+                             try_lock_failed,
+                             try_lock_succeeded,
+                             VM_KERNEL_UNSLIDE_OR_PERM(best_object),
+                             ((best_object == NULL) ? 0 : best_object->resident_page_count));
+
        object = best_object;
 
        if (object == VM_OBJECT_NULL) {
index c958f6b0e059da97a1ae0d59164e43942a750c7c..c982a6307c2e43e481b4500c507298ab1fae4391 100644 (file)
@@ -128,4 +128,16 @@ void vm_purgeable_accounting(vm_object_t   object,
 void vm_purgeable_compressed_update(vm_object_t        object,
                                    int         delta);
 
+#define PURGEABLE_LOOP_MAX 64
+
+#define TOKEN_ADD              0x40    /* 0x100 */
+#define TOKEN_DELETE           0x41    /* 0x104 */
+#define TOKEN_RIPEN            0x42    /* 0x108 */
+#define OBJECT_ADD             0x48    /* 0x120 */
+#define OBJECT_REMOVE          0x49    /* 0x124 */
+#define OBJECT_PURGE           0x4a    /* 0x128 */
+#define OBJECT_PURGE_ALL       0x4b    /* 0x12c */
+#define OBJECT_PURGE_ONE       0x4c    /* 0x12d */
+#define OBJECT_PURGE_LOOP      0x4e    /* 0x12e */
+
 #endif /* __VM_PURGEABLE_INTERNAL__ */
index 0f1c6c9905103f683be5dec8115248b0df99fda0..25c1edb266ec449eae17e8cb2f0579ba9956b958 100644 (file)
@@ -3104,14 +3104,16 @@ vm_page_unwire(
 
        VM_PAGE_CHECK(mem);
        assert(VM_PAGE_WIRED(mem));
+       assert(!mem->gobbled);
        assert(mem->object != VM_OBJECT_NULL);
 #if DEBUG
        vm_object_lock_assert_exclusive(mem->object);
        lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
 #endif
        if (--mem->wire_count == 0) {
-               assert(!mem->private && !mem->fictitious);
-               vm_page_wire_count--;
+               if (!mem->private && !mem->fictitious) {
+                       vm_page_wire_count--;
+               }
                assert(mem->object->wired_page_count > 0);
                mem->object->wired_page_count--;
                if (!mem->object->wired_page_count) {
index 7b1eb07034e0ae94775e026f09c738efe84844f6..8ed0fc4836dcd6f9d9838019a77d2c54b746b429 100644 (file)
@@ -2114,11 +2114,11 @@ mach_make_memory_entry_64(
                        offset_in_page = 0;
                }
 
-               kr = vm_map_copyin(target_map,
-                                  map_start,
-                                  map_size,
-                                  FALSE,
-                                  &copy);
+               kr = vm_map_copyin_internal(target_map,
+                                           map_start,
+                                           map_size,
+                                           VM_MAP_COPYIN_ENTRY_LIST,
+                                           &copy);
                if (kr != KERN_SUCCESS) {
                        return kr;
                }
index 0dc07f85020b2b1082e3ea3f663ee23987adc180..edba4c4db95c79cb96df73c391aec3fbb1d062d9 100644 (file)
@@ -307,9 +307,9 @@ L_common_dispatch:
        shr     $32, %rcx
        testl   %ecx, %ecx
        jz      4f
-       movl    $0, %gs:CPU_TLB_INVALID
        testl   $(1<<16), %ecx                  /* Global? */
        jz      3f
+       movl    $0, %gs:CPU_TLB_INVALID
        mov     %cr4, %rcx      /* RMWW CR4, for lack of an alternative*/
        and     $(~CR4_PGE), %rcx
        mov     %rcx, %cr4
@@ -317,6 +317,7 @@ L_common_dispatch:
        mov     %rcx, %cr4
        jmp     4f
 3:
+       movb    $0, %gs:CPU_TLB_INVALID_LOCAL
        mov     %cr3, %rcx
        mov     %rcx, %cr3
 4:
index 69a3bdc267efb6cdc363df06a9f7ffeabd8e142f..45be582574e381f16b027ad6b772d5462cbb51d8 100644 (file)
@@ -2434,6 +2434,7 @@ pmap_flush_tlbs(pmap_t    pmap, vm_map_offset_t startv, vm_map_offset_t endv, int o
        boolean_t       pmap_is_shared = (pmap->pm_shared || (pmap == kernel_pmap));
        boolean_t       need_global_flush = FALSE;
        uint32_t        event_code;
+       vm_map_offset_t event_startv, event_endv;
        boolean_t       is_ept = is_ept_pmap(pmap);
 
        assert((processor_avail_count < 2) ||
@@ -2441,14 +2442,20 @@ pmap_flush_tlbs(pmap_t  pmap, vm_map_offset_t startv, vm_map_offset_t endv, int o
 
        if (pmap == kernel_pmap) {
                event_code = PMAP_CODE(PMAP__FLUSH_KERN_TLBS);
+               event_startv = VM_KERNEL_UNSLIDE_OR_PERM(startv);
+               event_endv = VM_KERNEL_UNSLIDE_OR_PERM(endv);
        } else if (is_ept) {
                event_code = PMAP_CODE(PMAP__FLUSH_EPT);
+               event_startv = startv;
+               event_endv = endv;
        } else {
                event_code = PMAP_CODE(PMAP__FLUSH_TLBS);
+               event_startv = startv;
+               event_endv = endv;
        }
 
        PMAP_TRACE_CONSTANT(event_code | DBG_FUNC_START,
-                           pmap, options, startv, endv, 0);
+                               VM_KERNEL_UNSLIDE_OR_PERM(pmap), options, event_startv, event_endv, 0);
 
        if (is_ept) {
                mp_cpus_call(CPUMASK_ALL, ASYNC, invept, (void*)pmap->pm_eptp);
@@ -2574,7 +2581,7 @@ pmap_flush_tlbs(pmap_t    pmap, vm_map_offset_t startv, vm_map_offset_t endv, int o
                                                continue;
                                        PMAP_TRACE_CONSTANT(
                                                PMAP_CODE(PMAP__FLUSH_TLBS_TO),
-                                               pmap, cpus_to_signal, cpus_to_respond, 0, 0);
+                                               VM_KERNEL_UNSLIDE_OR_PERM(pmap), cpus_to_signal, cpus_to_respond, 0, 0);
                                        is_timeout_traced = TRUE;
                                        continue;
                                }
@@ -2595,7 +2602,7 @@ pmap_flush_tlbs(pmap_t    pmap, vm_map_offset_t startv, vm_map_offset_t endv, int o
 
 out:
        PMAP_TRACE_CONSTANT(event_code | DBG_FUNC_END,
-                           pmap, cpus_to_signal, startv, endv, 0);
+                               VM_KERNEL_UNSLIDE_OR_PERM(pmap), cpus_to_signal, event_startv, event_endv, 0);
 
 }
 
index d75117bc8d0d8261af951280a8575ba59a15757a..7f147cff1c953dbc1a963e0529d063ba9f2c4862 100644 (file)
@@ -364,6 +364,7 @@ mac_policy_init(void)
        mac_policy_list.chunks = 1;
 
        mac_policy_list.entries = kalloc(sizeof(struct mac_policy_list_element) * MAC_POLICY_LIST_CHUNKSIZE);
+
        bzero(mac_policy_list.entries, sizeof(struct mac_policy_list_element) * MAC_POLICY_LIST_CHUNKSIZE); 
 
        LIST_INIT(&mac_label_element_list);
index 7f079563c2acea0d112af2a3da784e1c69c1ddbf..26cb4b64a2a09d8ab45364a66ef6d178fc09fa79 100644 (file)
@@ -109,9 +109,10 @@ def GetRegistryEntrySummary(entry):
     vtableAddr = dereference(Cast(entry, 'uintptr_t *')) - 2 * sizeof('uintptr_t *')
     vtype = kern.SymbolicateFromAddress(vtableAddr)
     if vtype is None or len(vtype) < 1:
-        out_string += "<object 0x{0: <16x}, id 0x{1:x}, vtable 0x{2: <16x}".format(entry, entry.reserved.fRegistryEntryID, vtableAddr)
+        out_string += "<object 0x{0: <16x}, id 0x{1:x}, vtable 0x{2: <16x}".format(entry, CastIOKitClass(entry, 'IORegistryEntry *').reserved.fRegistryEntryID, vtableAddr)
     else:
-        out_string += "<object 0x{0: <16x}, id 0x{1:x}, vtable 0x{2: <16x} <{3:s}>".format(entry, entry.reserved.fRegistryEntryID, vtableAddr, vtype[0].GetName())
+        out_string += "<object 0x{0: <16x}, id 0x{1:x}, vtable 0x{2: <16x} <{3:s}>".format(entry, CastIOKitClass(entry, 'IORegistryEntry *').reserved.fRegistryEntryID,
+                                                                                           vtableAddr, vtype[0].GetName())
     
     ztvAddr = kern.GetLoadAddressForSymbol('_ZTV15IORegistryEntry')
     if vtableAddr != ztvAddr:
@@ -274,7 +275,7 @@ def ReadIOPort8(cmd_args=None):
     ReadIOPortInt(portAddr, 1, lcpu)
 
 @lldb_command('readioport16')
-def ReadIOPort8(cmd_args=None):
+def ReadIOPort16(cmd_args=None):
     """ Read value stored in the specified IO port. The CPU can be optionally
         specified as well.
         Prints 0xBAD10AD in case of a bad read
@@ -294,7 +295,7 @@ def ReadIOPort8(cmd_args=None):
     ReadIOPortInt(portAddr, 2, lcpu)
 
 @lldb_command('readioport32')
-def ReadIOPort8(cmd_args=None):
+def ReadIOPort32(cmd_args=None):
     """ Read value stored in the specified IO port. The CPU can be optionally
         specified as well.
         Prints 0xBAD10AD in case of a bad read
@@ -336,7 +337,7 @@ def WriteIOPort8(cmd_args=None):
     WriteIOPortInt(portAddr, 1, value, lcpu)
 
 @lldb_command('writeioport16')
-def WriteIOPort8(cmd_args=None):
+def WriteIOPort16(cmd_args=None):
     """ Write the value to the specified IO port. The size of the value is
         determined by the name of the command. The CPU used can be optionally
         specified as well.
@@ -358,7 +359,7 @@ def WriteIOPort8(cmd_args=None):
     WriteIOPortInt(portAddr, 2, value, lcpu)
 
 @lldb_command('writeioport32')
-def WriteIOPort8(cmd_args=None):
+def WriteIOPort32(cmd_args=None):
     """ Write the value to the specified IO port. The size of the value is
         determined by the name of the command. The CPU used can be optionally
         specified as well.
@@ -719,16 +720,14 @@ def ReadIOPortInt(addr, numbytes, lcpu):
         result_pkt = Cast(addressof(kern.globals.manual_pkt.data), 'kdp_readioport_reply_t *')
         
         if(result_pkt.error == 0):
-            print "This macro is incomplete till <rdar://problem/12868059> is fixed"
-            # FIXME: Uncomment me when <rdar://problem/12868059> is fixed
-            #if numbytes == 1:
-            #    result = dereference(Cast(result_pkt.data, 'uint8_t *'))
-            #elif numbytes == 2:
-            #    result = dereference(Cast(result_pkt.data, 'uint16_t *'))
-            #elif numbytes == 4:
-            #    result = dereference(cast(result_pkt.data, 'uint32_t *'))
-    
-    print "0x{0: <4x}: 0x{1: <1x}".format(addr, result)
+            if numbytes == 1:
+                result = dereference(Cast(addressof(result_pkt.data), 'uint8_t *'))
+            elif numbytes == 2:
+                result = dereference(Cast(addressof(result_pkt.data), 'uint16_t *'))
+            elif numbytes == 4:
+                result = dereference(Cast(addressof(result_pkt.data), 'uint32_t *'))
+
+    print "{0: <#6x}: {1:#0{2}x}".format(addr, result, (numbytes*2)+2)
 
 def WriteIOPortInt(addr, numbytes, value, lcpu):
     """ Writes 'value' into ioport specified by 'addr'. Prints errors if it encounters any
@@ -742,12 +741,12 @@ def WriteIOPortInt(addr, numbytes, value, lcpu):
     len_address = unsigned(addressof(kern.globals.manual_pkt.len))
     data_address = unsigned(addressof(kern.globals.manual_pkt.data))
     if not WriteInt32ToMemoryAddress(0, input_address):
-        print "error writing 0x{0: x} to port 0x{1: <4x}".format(value, addr)
+        print "error writing {0: #x} to port {1: <#6x}: failed to write 0 to input_address".format(value, addr)
         return
     
     kdp_pkt_size = GetType('kdp_writeioport_req_t').GetByteSize()
     if not WriteInt32ToMemoryAddress(kdp_pkt_size, len_address):
-        print "error writing 0x{0: x} to port 0x{1: <4x}".format(value, addr)
+        print "error writing {0: #x} to port {1: <#6x}: failed to write kdp_pkt_size".format(value, addr)
         return
     
     kgm_pkt = kern.GetValueFromAddress(data_address, 'kdp_writeioport_req_t *')
@@ -759,29 +758,29 @@ def WriteIOPortInt(addr, numbytes, value, lcpu):
         WriteInt32ToMemoryAddress(numbytes, int(addressof(kgm_pkt.nbytes))) and
         WriteInt16ToMemoryAddress(lcpu, int(addressof(kgm_pkt.lcpu)))
         ):
-        print "This macro is incomplete till <rdar://problem/12868059> is fixed"
-        # FIXME: Uncomment me when <rdar://problem/12868059> is fixed
-        #if numbytes == 1:
-        #    if not WriteInt8ToMemoryAddress(value, int(addressof(kgm_pkt.data))):
-        #        print "error writing 0x{0: x} to port 0x{1: <4x}".format(value, addr)
-        #elif numbytes == 2:
-        #    if not WriteInt16ToMemoryAddress(value, int(addressof(kgm_pkt.data))):
-        #        print "error writing 0x{0: x} to port 0x{1: <4x}".format(value, addr)
-        #elif numbytes == 4:
-        #    if not WriteInt32ToMemoryAddress(value, int(addressof(kgm_pkt.data))):
-        #        print "error writing 0x{0: x} to port 0x{1: <4x}".format(value, addr)
-        
+        if numbytes == 1:
+            if not WriteInt8ToMemoryAddress(value, int(addressof(kgm_pkt.data))):
+                print "error writing {0: #x} to port {1: <#6x}: failed to write 8 bit data".format(value, addr)
+                return
+        elif numbytes == 2:
+            if not WriteInt16ToMemoryAddress(value, int(addressof(kgm_pkt.data))):
+                print "error writing {0: #x} to port {1: <#6x}: failed to write 16 bit data".format(value, addr)
+                return
+        elif numbytes == 4:
+            if not WriteInt32ToMemoryAddress(value, int(addressof(kgm_pkt.data))):
+                print "error writing {0: #x} to port {1: <#6x}: failed to write 32 bit data".format(value, addr)
+                return
         if not WriteInt32ToMemoryAddress(1, input_address):
-            print "error writing 0x{0: x} to port 0x{1: <4x}".format(value, addr)
+            print "error writing {0: #x} to port {1: <#6x}: failed to write to input_address".format(value, addr)
             return
 
         result_pkt = Cast(addressof(kern.globals.manual_pkt.data), 'kdp_writeioport_reply_t *')
         
         # Done with the write
         if(result_pkt.error == 0):
-            print "Writing 0x {0: x} to port {1: <4x} was successful".format(value, addr)
+            print "Writing {0: #x} to port {1: <#6x} was successful".format(value, addr)
     else:
-        print "error writing 0x{0: x} to port 0x{1: <4x}".format(value, addr)
+        print "error writing {0: #x} to port {1: <#6x}".format(value, addr)
 
 @lldb_command('showinterruptcounts')
 def showinterruptcounts(cmd_args=None):
index 259a4a7edef4a27d48653aa4e412782cd0838a89..b5216e3e636dce29751761fb0affbdc4cc67ddfe 100644 (file)
@@ -761,6 +761,7 @@ def SaveStackshotReport(j, outfile_name, dsc_uuid, dsc_libs_arr):
                  ]
 
     dsc_libs = []
+    print "Shared cache UUID found from the binary data is <%s> " % str(dsc_common[0])
     if dsc_common[0].replace('-', '').lower() == dsc_uuid:
         print "SUCCESS: Found Matching dyld shared cache uuid. Loading library load addresses from layout provided."
         _load_addr = dsc_common[1]
@@ -833,6 +834,9 @@ def SaveStackshotReport(j, outfile_name, dsc_uuid, dsc_libs_arr):
         for tid,thdata in thlist.iteritems():
             threadByID[str(tid)] = {}
             thsnap = threadByID[str(tid)]
+            if "thread_snapshot_v2" not in thdata:
+                print "Found broken thread state for thread ID: %s." % tid
+                break
             threadsnap = thdata["thread_snapshot_v2"]
             thsnap["userTime"] = GetSecondsFromMATime(threadsnap["user_time"], timebase)
             thsnap["id"] = threadsnap["thread_id"]
index b3d4dccc9faa589224241aca01f2ab4897ee186b..16604d864e2d8a1069ecbd1e43cc9a7f3ff25ee0 100644 (file)
@@ -1074,7 +1074,9 @@ def GetVMMapSummary(vmmap):
     vm_size = uint64_t(vmmap.size).value
     resident_pages = 0
     if vmmap.pmap != 0: resident_pages = int(vmmap.pmap.stats.resident_count)
-    out_string += format_string.format(vmmap, vmmap.pmap, vm_size, vmmap.hdr.nentries, resident_pages, vmmap.hint, vmmap.first_free)
+    first_free = 0
+    if int(vmmap.holelistenabled) == 0: first_free = vmmap.f_s.first_free
+    out_string += format_string.format(vmmap, vmmap.pmap, vm_size, vmmap.hdr.nentries, resident_pages, vmmap.hint, first_free)
     return out_string
 
 @lldb_type_summary(['vm_map_entry'])
index b4c85f9180df44c380d4aa0d1d63de72549e137e..e2ddb8e3de698e41730f524c22bfb74be2424f06 100644 (file)
@@ -1026,6 +1026,21 @@ def DumpCallQueue(cmd_args=None):
 
 #EndMacro: dumpcallqueue
 
+@lldb_command('showalltasklogicalwrites')
+def ShowAllTaskIOStats(cmd_args=None):
+    """ Commad to print I/O stats for all tasks
+    """
+    print "{0: <20s} {1: <20s} {2: <20s} {3: <20s} {4: <20s} {5: <20s}".format("task", "Immediate Writes", "Deferred Writes", "Invalidated Writes", "Metadata Writes", "name")
+    for t in kern.tasks:
+        pval = Cast(t.bsd_info, 'proc *')
+        print "{0: <#18x} {1: >20d} {2: >20d} {3: >20d} {4: >20d} {5: <20s}".format(t,
+            t.task_immediate_writes, 
+            t.task_deferred_writes,
+            t.task_invalidated_writes,
+            t.task_metadata_writes,
+            str(pval.p_comm)) 
+
+
 @lldb_command('showalltasks','C')
 def ShowAllTasks(cmd_args=None, cmd_options={}):
     """  Routine to print a summary listing of all the tasks
@@ -1236,7 +1251,7 @@ def SwitchToRegs(cmd_args=None):
     fake_thread_id = 0xdead0000 | (saved_state & ~0xffff0000)
     fake_thread_id = fake_thread_id & 0xdeadffff
     lldb_process.CreateOSPluginThread(0xdeadbeef, saved_state)
-    lldbthread = lldb_process.GetThreadByID(fake_thread_id)
+    lldbthread = lldb_process.GetThreadByID(int(fake_thread_id))
     
     if not lldbthread.IsValid():
         print "Failed to create thread"
index 635726b6d9ffc6b7ab80fb6ccef89aac293621a1..0686350f3a80649cc3cc029d0bd60ec7078a7ece 100644 (file)
@@ -18,6 +18,8 @@
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/signal.h>
+#include <errno.h>
+#include "../unit_tests/tests_common.h"
 
 #define MAX(A, B) ((A) < (B) ? (B) : (A))
 
@@ -68,6 +70,8 @@ static boolean_t      timeshare = FALSE;
 static boolean_t       threaded = FALSE;
 static boolean_t       oneway = FALSE;
 static boolean_t       do_select = FALSE;
+static boolean_t    save_perfdata = FALSE;
+
 int                    msg_type;
 int                    num_ints;
 int                    num_msgs;
@@ -98,6 +102,7 @@ void usage(const char *progname) {
        fprintf(stderr, "    -work num\t\tmicroseconds of client work\n");
        fprintf(stderr, "    -pages num\t\tpages of memory touched by client work\n");
        fprintf(stderr, "    -select   \t\tselect prior to calling kevent().\n");
+       fprintf(stderr, "    -perf   \t\tCreate perfdata files for metrics.\n");
        fprintf(stderr, "default values are:\n");
        fprintf(stderr, "    . no affinity\n");
        fprintf(stderr, "    . not timeshare\n");
@@ -200,6 +205,9 @@ void parse_args(int argc, char *argv[]) {
                } else if (0 == strcmp("-select", argv[0])) {
                        do_select = TRUE;
                        argc--; argv++;
+               } else if (0 == strcmp("-perf", argv[0])) {
+                       save_perfdata = TRUE;
+                       argc--; argv++;
                } else 
                        usage(progname);
        }
@@ -733,6 +741,7 @@ wait_for_servers(void)
        exit(1);
 }
 
+
 int main(int argc, char *argv[]) 
 {
        int             i;
@@ -820,6 +829,10 @@ int main(int argc, char *argv[])
        double dsecs = (double) deltatv.tv_sec + 
                1.0E-6 * (double) deltatv.tv_usec;
 
+       double time_in_sec = (double)deltatv.tv_sec + (double)deltatv.tv_usec/1000.0;
+       double throughput_msg_p_sec = (double) totalmsg/dsecs;
+       double avg_msg_latency = dsecs*1.0E6 / (double)totalmsg;
+
        printf(" in %ld.%03u seconds\n",  
               (long)deltatv.tv_sec, deltatv.tv_usec/1000);
        printf("  throughput in messages/sec:     %g\n",
@@ -827,6 +840,9 @@ int main(int argc, char *argv[])
        printf("  average message latency (usec): %2.3g\n", 
                        dsecs * 1.0E6 / (double) totalmsg);
 
+       if (save_perfdata == TRUE) {
+               record_perf_data("kqmpmm_avg_msg_latency", "usec", avg_msg_latency, "Message latency measured in microseconds. Lower is better", stderr);
+       }
        return (0);
 
 }
index c2991c37cb97d8159d24e4f9e4dacd21a0eb3a00..17b0a1acb10c0031186bfeebf589e3e3ab7b8836 100644 (file)
@@ -17,6 +17,8 @@
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/signal.h>
+#include <errno.h>
+#include "../unit_tests/tests_common.h" /* for record_perf_data() */
 
 #include <libkern/OSAtomic.h>
 
@@ -72,6 +74,7 @@ static boolean_t      timeshare = FALSE;
 static boolean_t       threaded = FALSE;
 static boolean_t       oneway = FALSE;
 static boolean_t       useset = FALSE;
+static boolean_t       save_perfdata = FALSE;
 int                    msg_type;
 int                    num_ints;
 int                    num_msgs;
@@ -114,6 +117,7 @@ void usage(const char *progname) {
        fprintf(stderr, "    -verbose\t\tbe verbose (use multiple times to increase verbosity)\n");
        fprintf(stderr, "    -oneway\t\tdo not request return reply\n");
        fprintf(stderr, "    -count num\t\tnumber of messages to send\n");
+       fprintf(stderr, "    -perf   \t\tCreate perfdata files for metrics.\n");
        fprintf(stderr, "    -type trivial|inline|complex\ttype of messages to send\n");
        fprintf(stderr, "    -numints num\tnumber of 32-bit ints to send in messages\n");
        fprintf(stderr, "    -servers num\tnumber of server threads to run\n");
@@ -179,6 +183,9 @@ void parse_args(int argc, char *argv[]) {
                } else if (0 == strcmp("-oneway", argv[0])) {
                        oneway = TRUE;
                        argc--; argv++;
+               } else if (0 == strcmp("-perf", argv[0])) {
+                       save_perfdata = TRUE;
+                       argc--; argv++;
                } else if (0 == strcmp("-type", argv[0])) {
                        if (argc < 2) 
                                usage(progname);
@@ -940,6 +947,14 @@ int main(int argc, char *argv[])
        printf("  average message latency (usec): %2.3g\n", 
                        dsecs * 1.0E6 / (double) totalmsg);
 
+       double time_in_sec = (double)deltatv.tv_sec + (double)deltatv.tv_usec/1000.0;
+       double throughput_msg_p_sec = (double) totalmsg/dsecs;
+       double avg_msg_latency = dsecs*1.0E6 / (double)totalmsg;
+
+       if (save_perfdata == TRUE) {
+               record_perf_data("mpmm_avg_msg_latency", "usec", avg_msg_latency, "Message latency measured in microseconds. Lower is better", stderr);
+       }
+
        if (stress_prepost) {
                int64_t sendns = abs_to_ns(g_client_send_time);
                dsecs = (double)sendns / (double)NSEC_PER_SEC;
diff --git a/tools/tests/TLBcoherency/Makefile b/tools/tests/TLBcoherency/Makefile
new file mode 100644 (file)
index 0000000..00bbf15
--- /dev/null
@@ -0,0 +1,27 @@
+include ../Makefile.common
+
+CC:=$(shell xcrun -sdk "$(SDKROOT)" -find cc)
+
+ifdef RC_ARCHS
+    ARCHS:=$(RC_ARCHS)
+  else
+    ifeq "$(Embedded)" "YES"
+      ARCHS:=armv7 armv7s arm64 armv7k
+    else
+      ARCHS:=x86_64
+  endif
+endif
+
+CFLAGS := -g $(patsubst %, -arch %, $(ARCHS)) -isysroot $(SDKROOT) -isystem $(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders
+
+DSTROOT?=$(shell /bin/pwd)
+SYMROOT?=$(shell /bin/pwd)
+
+DEBUG:=0
+
+$(DSTROOT)/tlbcoh: TLBcoherency.c
+       $(CC) $(CFLAGS) -Wall TLBcoherency.c -o $(SYMROOT)/$(notdir $@) -DDEBUG=$(DEBUG) -g -Os
+       if [ ! -e $@ ]; then ditto $(SYMROOT)/$(notdir $@) $@; fi
+
+clean:
+       rm -rf $(DSTROOT)/tlbcoh $(SYMROOT)/*.dSYM $(SYMROOT)/tlbcoh
diff --git a/tools/tests/TLBcoherency/TLBcoherency.c b/tools/tests/TLBcoherency/TLBcoherency.c
new file mode 100644 (file)
index 0000000..a4165ba
--- /dev/null
@@ -0,0 +1,310 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/* A pool of threads which attempt to verify multiprocessor TLB coherency.
+ * Creates -t threads, by default 4
+ * Creates -s separate mmap(MAP_ANON) R/W mappings, sized at 1 page each but
+ * alterable via -z <npages>
+ * Initially read-faults each mapping in, verifying first-word zerofill--
+ * The kernel typically uses the physical aperture to perform the zerofill
+ * Writes map_address (page_aligned) | low 12 bits of the PID at the first word
+ * This can help verify ASID related inconsistencies
+ * Records a timestamp in a Structure associated with each mapping
+ * With a custom kernel, it has the option of creating a remapping of the page in
+ * the kernel's address space to exercise shared kernel mapping coherency.
+ * Each thread subsequently loops around on the set of mappings. One thread is designated
+ * the observer thread. The thread acquires a lock on the arena element,
+ * verifies that the mapping has the expected pattern (Address | PID), if the
+ * element is in the MAPPED state. Can optionally tell the kernel to check its
+ * alias as well. If it notices a mismatch, it has the option to issue a syscall
+ * to  stop kernel tracing. If the -f option is supplied, the test is terminated.
+ * If the page has lingered beyond -l microseconds, non-observer threads will
+ * unmap the page, optionally calling into the kernel to unmap its alias, and
+ * repopulate the element.
+ * After this sequence, the thread will optionally usleep for -p microseconds,
+ * to allow for idle power management to engage if possible (errata might exist
+ * in those areas), or context switches to occur.
+ * Created Derek Kumar, 2011.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <pthread.h>
+#include <string.h>
+#include <mach/mach_time.h>
+#include <libkern/OSAtomic.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+
+typedef struct {
+       OSSpinLock tlock;
+       uintptr_t taddr;
+       unsigned tstate;
+       uint64_t tctime;
+} cpage;
+
+cpage *parray;
+
+#define ARENASIZE (1024)
+#define NTHREADS (4)
+#define PAGE_LINGER_TIME (2000000)
+#define MAX_THREADS (512)
+#define MYSYS (215)
+#define CONSISTENCY(...) fprintf(stderr, __VA_ARGS__ );
+
+unsigned arenasize = ARENASIZE, mapping_size;
+uint64_t page_linger_time = PAGE_LINGER_TIME;
+enum arenastates {MTOUCHED = 1, UNMAPPED = 2, MAPPED = 4, WP =8};
+enum syscaction {MDOMAP = 1, MDOUNMAP = 2, MDOCHECK = 4};
+enum ttypes {OBSERVER = 1, LOOPER = 2};
+bool trymode = true;
+bool all_stop = false;
+bool stop_on_failure = false;
+bool reuse_addrs = true;
+bool dosyscall = false;
+
+pid_t cpid;
+int sleepus;
+
+pthread_t threads[MAX_THREADS];
+uint32_t roles[MAX_THREADS];
+
+void usage(char **a) {
+       exit(1);
+}
+
+void set_enable(int val)
+{
+       int mib[6];
+       size_t needed;
+
+        mib[0] = CTL_KERN;
+        mib[1] = KERN_KDEBUG;
+        mib[2] = KERN_KDENABLE;
+        mib[3] = val;
+        mib[4] = 0;
+        mib[5] = 0;
+
+        if (sysctl(mib, 4, NULL, &needed, NULL, 0) < 0) {
+                printf("trace facility failure, KERN_KDENABLE\n");
+       }
+}
+
+void initialize_arena_element(int i) {
+       __unused int sysret;
+       void *hint = reuse_addrs ? (void *)0x1000 : NULL;
+       parray[i].taddr = (uintptr_t)mmap(hint, mapping_size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_SHARED, -1, 0);
+
+       if (parray[i].taddr == (uintptr_t)MAP_FAILED) {
+               perror("mmap");
+               exit(2);
+       }
+
+#if    !defined(__LP64__)
+       uint32_t pattern = parray[i].taddr;
+       pattern |= cpid & 0xFFF;
+//     memset_pattern4((void *)parray[i].taddr, &pattern, PAGE_SIZE); //
+//     uncomment to fill the whole page, but a sufficiently unique first word
+//     gets the job done without slowing down the test
+
+#else
+       uint64_t pattern = parray[i].taddr;
+       pattern |= (cpid & 0xFFF);
+//     memset_pattern8(parray[i].taddr, &pattern, PAGE_SIZE);
+#endif
+
+       uint64_t val =  (*(uintptr_t *)parray[i].taddr);
+
+       if (val != 0) {
+               CONSISTENCY("Mismatch, actual: 0x%llx, expected: 0x%llx\n", (unsigned long long)val, 0ULL);
+               if (stop_on_failure) {
+                       set_enable(0);
+                       exit(5);
+               }
+       }
+       for (int k = 0; k < (mapping_size >> PAGE_SHIFT); k++) {
+               *(uintptr_t *)(parray[i].taddr + k * PAGE_SIZE) = pattern;
+       }
+
+       parray[i].tctime = mach_absolute_time();
+       parray[i].tstate = MTOUCHED;
+
+       if (dosyscall) {
+               sysret = syscall(MYSYS, MDOMAP, parray[i].taddr, pattern, i, mapping_size);
+       }
+}
+
+void initialize_arena(void) {
+       for (int i = 0; i < arenasize; i++) {
+               initialize_arena_element(i);
+       }
+}
+
+void *tlbexerciser(void *targs) {
+       uint32_t role = *(uint32_t *)targs;
+       __unused int sysret;
+       printf("Starting thread %p, role: %u\n", pthread_self(), role);
+
+       for(;;) {
+               for (int i = 0; i < arenasize; i++) {
+                       if (all_stop)
+                               return NULL;
+
+                       if (trymode) {
+                               if (OSSpinLockTry(&parray[i].tlock) == false)
+                                       continue;
+                       } else {
+                               OSSpinLockLock(&parray[i].tlock);
+                       }
+
+                       if (parray[i].tstate != UNMAPPED) {
+                               uintptr_t ad;
+                               ad = parray[i].taddr | (cpid & 0xFFF);
+                               uintptr_t val = *(uintptr_t *)parray[i].taddr;
+
+                               if (val != ad) {
+                                       if (stop_on_failure)
+                                               all_stop = true;
+                                       syscall(180, 0x71BC0000, (ad >> 32), (ad & ~0), 0, 0, 0);
+                                       CONSISTENCY("Mismatch, actual: 0x%llx, expected: 0x%llx\n", (unsigned long long)val, (unsigned long long)ad);
+                                       if (stop_on_failure) {
+                                               set_enable(0);
+                                               exit(5);
+                                       }
+                               }
+
+                               if (dosyscall) {
+                                       sysret = syscall(MYSYS, MDOCHECK, parray[i].taddr, ad, i, 0);
+                               }
+
+                               if ((role != OBSERVER) && ((mach_absolute_time() - parray[i].tctime) > page_linger_time)) {
+                                       parray[i].tstate = UNMAPPED;
+                                       if (munmap((void *)parray[i].taddr, mapping_size) != 0) {
+                                               perror("munmap");
+                                       }
+
+                                       if (dosyscall) {
+                                               sysret = syscall(MYSYS, MDOUNMAP, parray[i].taddr, ad, i, mapping_size);
+                                       }
+                               }
+                       } else {
+                               if (role != OBSERVER) {
+                                       initialize_arena_element(i);
+                               }
+                       }
+
+                       parray[i].tlock = 0; //unlock
+
+                       if (sleepus)
+                               usleep(sleepus);
+               }
+       }
+
+       return NULL;
+}
+
+int main(int argc, char **argv) {
+       extern char *optarg;
+       int arg;
+       unsigned nthreads = NTHREADS;
+
+       mapping_size = PAGE_SIZE;
+
+       while ((arg = getopt(argc, argv, "l:t:h:s:p:z:fry")) != -1) {
+               switch (arg) {
+               case 'l':
+                       page_linger_time = strtoull(optarg, NULL, 0);
+                       break;
+               case 't':
+                       nthreads = atoi(optarg);
+                       break;
+               case 's':
+                       arenasize = atoi(optarg); // we typically want this to
+                                                 // be sized < 2nd level TLB
+                       break;
+               case 'f':
+                       stop_on_failure = true;
+                       break;
+               case 'r':
+                       reuse_addrs = false;
+                       break;
+               case 'p':
+                       sleepus = atoi(optarg);
+                       break;
+               case 'y':
+                       dosyscall = true;
+                       break;
+               case 'z':
+                       mapping_size = atoi(optarg) * PAGE_SIZE;
+                       break;
+               case 'h':
+                       usage(argv);
+               }
+       }
+
+       if(optind != argc) {
+               usage(argv);
+       }
+
+       printf("page_linger_time: 0x%llx, nthreads: %u, arenasize: %u sleepus: %d reuse_addrs: %u, stop_on_failure: %u, dosyscall: %u, mappingsize: 0x%x\n", page_linger_time, nthreads, arenasize, sleepus, reuse_addrs, (unsigned) stop_on_failure, dosyscall, mapping_size);
+
+       parray = calloc(arenasize, sizeof(cpage));
+       cpid = getpid();
+
+       initialize_arena();
+
+       for (int dex = 0; dex < nthreads; dex++) {
+               roles[dex] = LOOPER;
+               if (dex == 0)
+                       roles[dex] = OBSERVER;
+               int result = pthread_create(&threads[dex], NULL, tlbexerciser, &roles[dex]);
+               if(result) {
+                       printf("pthread_create: %d starting worker thread; aborting.\n", result);
+                       return result;
+               }
+       }
+
+       for(int dex = 0; dex < nthreads; dex++) {
+               void *rtn;
+               int result = pthread_join(threads[dex], &rtn);
+
+               if(result) {
+                       printf("pthread_join(): %d, aborting\n", result);
+                       return result;
+               }
+
+               if(rtn) {
+                       printf("***Aborting on worker error\n");
+                       exit(1);
+               }
+       }
+       return 0;
+}
index 7a8f3165036a7e3172882337e1f514d78cceb33b..c9a940bd20db9e5fa541ce0401f075e622dfc9c6 100755 (executable)
@@ -9,6 +9,20 @@ RUN=run
 PRODUCT=`sw_vers -productName`
 COUNT=
 
+# params are: record_perf_data(metric, unit, value, description)
+function record_perf_data() {
+    local METRIC=$1
+    local UNIT=$2
+    local DATA=$3
+    local DESCRIPTION=$4
+       echo "{ \"version\" : \"1.0\", \"measurements\" : {\"$METRIC\": {\"description\" : \"$DESCRIPTION\", \"names\":[\"$METRIC\"],  \"units\" : [\"$UNIT\"], \"data\" : [$DATA] }}}"
+}
+
+PERFDATA_DIR=$BATS_TMP_DIR
+if [ "${PERFDATA_DIR}" == "" ]; then
+       PERFDATA_DIR=/tmp/
+fi
+
 case "$PRODUCT" in
     "iPhone OS")
        COUNT=1000
@@ -22,7 +36,13 @@ for i in ${EXECUTABLES}; do
     echo "Running $i"
     for j in `jot $(sysctl -n hw.ncpu) 1`; do
        printf "\t%dx\t" $j
-       /usr/bin/time ./${RUN} $j $((${COUNT}/$j)) ./$i
+    METRIC_NAME="${i}_${j}x"
+    TIMEOUT=` /usr/bin/time ./${RUN} $j $((${COUNT}/$j)) ./$i 2>&1`
+    echo ${TIMEOUT}
+    REALTIME=`echo ${TIMEOUT} | awk '{ print $1 }'`
+    TOTALTIME=`echo ${TIMEOUT} | awk '{ print $3 + $5 }'`
+    record_perf_data "${METRIC_NAME}_real" "s" $REALTIME "Real time in seconds. Lower is better. This may have variance based on load on system" > ${PERFDATA_DIR}/${METRIC_NAME}_real.perfdata
+    record_perf_data "${METRIC_NAME}_sys" "s" $TOTALTIME "User + Sys time in seconds. Lower is better." > /tmp/${METRIC_NAME}_sys.perfdata
        if [ $? -ne 0 ]; then
            echo "Failed $i, exit status $?"
            exit 1