pax -rw . $(SRCROOT)
else ifeq ($(RC_ProjectName),xnu_quick_test)
+# This rule should be removed once rdar://22820602 is complete.
+default: install
+
+installhdrs:
+
+install: xnu_tests
+
+clean:
+
+installsrc:
+ pax -rw . $(SRCROOT)
+
+else ifeq ($(RC_ProjectName),xnu_tests)
default: install
installhdrs:
-install: xnu_quick_test
+install: xnu_tests
clean:
"SDKROOT=$(SDKROOT)"
-# "xnu_quick_test" and "testbots" are targets that can be invoked via a standalone
-# "make xnu_quick_test" or via buildit/XBS with the RC_ProjectName=xnu_quick_test.
+# "xnu_tests" and "testbots" are targets that can be invoked via a standalone
+# "make xnu_tests" or via buildit/XBS with the RC_ProjectName=xnu_tests.
# Define the target here in the outermost scope of the initial Makefile
-xnu_quick_test:
+xnu_tests xnu_quick_test:
$(MAKE) -C $(SRCROOT)/tools/tests \
SRCROOT=$(SRCROOT)/tools/tests
#include <sys/sysctl.h>
#include <i386/cpuid.h>
#include <i386/tsc.h>
+#include <i386/rtclock_protos.h>
#include <i386/machine_routines.h>
#include <i386/pal_routines.h>
#include <i386/ucode.h>
extern uint32_t deep_idle_rebase;
SYSCTL_UINT(_machdep_tsc, OID_AUTO, deep_idle_rebase,
- CTLFLAG_RW|CTLFLAG_KERN|CTLFLAG_LOCKED, &deep_idle_rebase, 0, "");
+ CTLFLAG_RD|CTLFLAG_LOCKED, &deep_idle_rebase, 0, "");
+SYSCTL_QUAD(_machdep_tsc, OID_AUTO, at_boot,
+ CTLFLAG_RD|CTLFLAG_LOCKED, &tsc_at_boot, "");
+SYSCTL_QUAD(_machdep_tsc, OID_AUTO, rebase_abs_time,
+ CTLFLAG_RD|CTLFLAG_LOCKED, &tsc_rebase_abs_time, "");
SYSCTL_NODE(_machdep_tsc, OID_AUTO, nanotime,
CTLFLAG_RD|CTLFLAG_LOCKED, NULL, "TSC to ns conversion");
}
cnid = filep->fileID;
- /* Skip over journal files. */
- if (cnid == hfsmp->hfs_jnlfileid || cnid == hfsmp->hfs_jnlinfoblkid) {
+ /* Skip over journal files and the hotfiles B-Tree file. */
+ if (cnid == hfsmp->hfs_jnlfileid
+ || cnid == hfsmp->hfs_jnlinfoblkid
+ || cnid == VTOC(hfsmp->hfc_filevp)->c_fileid) {
continue;
}
/*
stage = HFC_ADOPTION;
break;
}
+
+ // Jump straight to delete for some files...
+ if (key->fileID == VTOC(hfsmp->hfc_filevp)->c_fileid
+ || key->fileID == hfsmp->hfs_jnlfileid
+ || key->fileID == hfsmp->hfs_jnlinfoblkid
+ || key->fileID < kHFSFirstUserCatalogNodeID) {
+ goto delete;
+ }
+
/*
* Aquire the vnode for this file.
*/
// XXXdbg
#include <sys/filedesc.h>
+static hfsmount_t *hfs_mount_from_cwd(vfs_context_t ctx)
+{
+ vnode_t vp = vfs_context_cwd(ctx);
+
+ if (!vp)
+ return NULL;
+
+ /*
+ * We could use vnode_tag, but it is probably more future proof to
+ * compare fstypename.
+ */
+ char fstypename[MFSNAMELEN];
+ vnode_vfsname(vp, fstypename);
+
+ if (strcmp(fstypename, "hfs"))
+ return NULL;
+
+ return VTOHFS(vp);
+}
+
/*
* HFS filesystem related variables.
*/
} else if (name[0] == HFS_ENABLE_JOURNALING) {
// make the file system journaled...
- vnode_t vp = vfs_context_cwd(context);
vnode_t jvp;
ExtendedVCB *vcb;
struct cat_attr jnl_attr;
if (!kauth_cred_issuser(kauth_cred_get())) {
return (EPERM);
}
- if (vp == NULLVP)
- return EINVAL;
- hfsmp = VTOHFS(vp);
+ hfsmp = hfs_mount_from_cwd(context);
+ if (!hfsmp)
+ return EINVAL;
+
if (hfsmp->hfs_flags & HFS_READ_ONLY) {
return EROFS;
}
}
if (hfsmp->jnl) {
- printf("hfs: volume @ mp %p is already journaled!\n", vnode_mount(vp));
+ printf("hfs: volume %s is already journaled!\n", hfsmp->vcbVN);
return EAGAIN;
}
vcb = HFSTOVCB(hfsmp);
return 0;
} else if (name[0] == HFS_DISABLE_JOURNALING) {
// clear the journaling bit
- vnode_t vp = vfs_context_cwd(context);
-
+
/* Only root can disable journaling */
if (!kauth_cred_issuser(kauth_cred_get())) {
return (EPERM);
}
- if (vp == NULLVP)
- return EINVAL;
- hfsmp = VTOHFS(vp);
+ hfsmp = hfs_mount_from_cwd(context);
+ if (!hfsmp)
+ return EINVAL;
/*
* Disabling journaling is disallowed on volumes with directory hard links
return EPERM;
}
- printf("hfs: disabling journaling for mount @ %p\n", vnode_mount(vp));
+ printf("hfs: disabling journaling for %s\n", hfsmp->vcbVN);
hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
}
- return 0;
- } else if (name[0] == HFS_GET_JOURNAL_INFO) {
- vnode_t vp = vfs_context_cwd(context);
- off_t jnl_start, jnl_size;
-
- if (vp == NULLVP)
- return EINVAL;
-
- /* 64-bit processes won't work with this sysctl -- can't fit a pointer into an int! */
- if (proc_is64bit(current_proc()))
- return EINVAL;
-
- hfsmp = VTOHFS(vp);
- if (hfsmp->jnl == NULL) {
- jnl_start = 0;
- jnl_size = 0;
- } else {
- jnl_start = hfs_blk_to_bytes(hfsmp->jnl_start, HFSTOVCB(hfsmp)->blockSize) + HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
- jnl_size = hfsmp->jnl_size;
- }
-
- if ((error = copyout((caddr_t)&jnl_start, CAST_USER_ADDR_T(name[1]), sizeof(off_t))) != 0) {
- return error;
- }
- if ((error = copyout((caddr_t)&jnl_size, CAST_USER_ADDR_T(name[2]), sizeof(off_t))) != 0) {
- return error;
- }
-
return 0;
} else if (name[0] == HFS_SET_PKG_EXTENSIONS) {
// Update to_cp's resource data if it has it
filefork_t *to_rfork = to_cp->c_rsrcfork;
if (to_rfork) {
- to_rfork->ff_invalidranges = from_rfork->ff_invalidranges;
- to_rfork->ff_data = from_rfork->ff_data;
+ TAILQ_SWAP(&to_rfork->ff_invalidranges,
+ &from_rfork->ff_invalidranges, rl_entry, rl_link);
+ to_rfork->ff_data = from_rfork->ff_data;
// Deal with ubc_setsize
hfs_rsrc_setsize(to_cp);
lck_grp_t * proc_lck_grp;
lck_grp_t * proc_slock_grp;
lck_grp_t * proc_fdmlock_grp;
+lck_grp_t * proc_ucred_mlock_grp;
lck_grp_t * proc_mlock_grp;
lck_grp_attr_t * proc_lck_grp_attr;
lck_attr_t * proc_lck_attr;
#if CONFIG_FINE_LOCK_GROUPS
proc_slock_grp = lck_grp_alloc_init("proc-slock", proc_lck_grp_attr);
proc_fdmlock_grp = lck_grp_alloc_init("proc-fdmlock", proc_lck_grp_attr);
+ proc_ucred_mlock_grp = lck_grp_alloc_init("proc-ucred-mlock", proc_lck_grp_attr);
proc_mlock_grp = lck_grp_alloc_init("proc-mlock", proc_lck_grp_attr);
#endif
/* Allocate proc lock attribute */
proc_klist_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr);
lck_mtx_init(&kernproc->p_mlock, proc_mlock_grp, proc_lck_attr);
lck_mtx_init(&kernproc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr);
+ lck_mtx_init(&kernproc->p_ucred_mlock, proc_ucred_mlock_grp, proc_lck_attr);
lck_spin_init(&kernproc->p_slock, proc_slock_grp, proc_lck_attr);
#else
proc_list_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
proc_klist_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
lck_mtx_init(&kernproc->p_mlock, proc_lck_grp, proc_lck_attr);
lck_mtx_init(&kernproc->p_fdmlock, proc_lck_grp, proc_lck_attr);
+ lck_mtx_init(&kernproc->p_ucred_mlock, proc_lck_grp, proc_lck_attr);
lck_spin_init(&kernproc->p_slock, proc_lck_grp, proc_lck_attr);
#endif
int kdbg_readthrmap(user_addr_t, size_t *, vnode_t, vfs_context_t);
int kdbg_readthrmap_v3(user_addr_t, size_t *, int);
int kdbg_readcurthrmap(user_addr_t, size_t *);
-int kdbg_getreg(kd_regtype *);
int kdbg_setreg(kd_regtype *);
int kdbg_setrtcdec(kd_regtype *);
int kdbg_setpidex(kd_regtype *);
return(ret);
}
-int
-kdbg_getreg(__unused kd_regtype * kdr)
-{
-#if 0
- int i,j, ret=0;
- unsigned int val_1, val_2, val;
-
- switch (kdr->type) {
- case KDBG_CLASSTYPE :
- val_1 = (kdr->value1 & 0xff);
- val_2 = val_1 + 1;
- kdlog_beg = (val_1<<24);
- kdlog_end = (val_2<<24);
- kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
- kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_CLASSTYPE);
- break;
- case KDBG_SUBCLSTYPE :
- val_1 = (kdr->value1 & 0xff);
- val_2 = (kdr->value2 & 0xff);
- val = val_2 + 1;
- kdlog_beg = ((val_1<<24) | (val_2 << 16));
- kdlog_end = ((val_1<<24) | (val << 16));
- kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
- kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_SUBCLSTYPE);
- break;
- case KDBG_RANGETYPE :
- kdlog_beg = (kdr->value1);
- kdlog_end = (kdr->value2);
- kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
- kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_RANGETYPE);
- break;
- case KDBG_TYPENONE :
- kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
- kdlog_beg = 0;
- kdlog_end = 0;
- break;
- default :
- ret = EINVAL;
- break;
- }
-#endif /* 0 */
- return(EINVAL);
-}
-
static int
kdbg_write_to_vnode(caddr_t buffer, size_t size, vnode_t vp, vfs_context_t ctx, off_t file_offset)
{
ret = kdbg_setreg(&kd_Reg);
break;
case KERN_KDGETREG:
- if (size < sizeof(kd_regtype)) {
- ret = EINVAL;
- break;
- }
- ret = kdbg_getreg(&kd_Reg);
- if (copyout(&kd_Reg, where, sizeof(kd_regtype))) {
- ret = EINVAL;
- }
kdbg_disable_bg_trace();
-
+ ret = EINVAL;
break;
case KERN_KDREADTR:
ret = kdbg_read(where, sizep, NULL, NULL, RAW_VERSION1);
* Returns: (kauth_cred_t) Pointer to the process's
* newly referenced credential
*
- * Locks: PROC_LOCK is held before taking the reference and released
+ * Locks: PROC_UCRED_LOCK is held before taking the reference and released
* after the refeence is taken to protect the p_ucred field of
* the process referred to by procp.
*
{
kauth_cred_t cred;
- proc_lock(procp);
+ proc_ucred_lock(procp);
cred = proc_ucred(procp);
kauth_cred_ref(cred);
- proc_unlock(procp);
+ proc_ucred_unlock(procp);
return(cred);
}
DEBUG_CRED_CHANGE("kauth_proc_setlabel_unlocked CH(%d): %p/0x%08x -> %p/0x%08x\n", p->p_pid, my_cred, my_cred->cr_flags, my_new_cred, my_new_cred->cr_flags);
- proc_lock(p);
+ proc_ucred_lock(p);
/*
* We need to protect for a race where another thread
* also changed the credential after we took our
* restart this again with the new cred.
*/
if (p->p_ucred != my_cred) {
- proc_unlock(p);
+ proc_ucred_unlock(p);
kauth_cred_unref(&my_new_cred);
my_cred = kauth_cred_proc_ref(p);
/* try again */
PROC_UPDATE_CREDS_ONPROC(p);
mac_proc_set_enforce(p, MAC_ALL_ENFORCE);
- proc_unlock(p);
+ proc_ucred_unlock(p);
}
break;
}
DEBUG_CRED_CHANGE("kauth_proc_label_update_execve_unlocked CH(%d): %p/0x%08x -> %p/0x%08x\n", p->p_pid, my_cred, my_cred->cr_flags, my_new_cred, my_new_cred->cr_flags);
- proc_lock(p);
+ proc_ucred_lock(p);
/*
* We need to protect for a race where another thread
* also changed the credential after we took our
* restart this again with the new cred.
*/
if (p->p_ucred != my_cred) {
- proc_unlock(p);
+ proc_ucred_unlock(p);
kauth_cred_unref(&my_new_cred);
my_cred = kauth_cred_proc_ref(p);
/* try again */
/* update cred on proc */
PROC_UPDATE_CREDS_ONPROC(p);
mac_proc_set_enforce(p, MAC_ALL_ENFORCE);
- proc_unlock(p);
+ proc_ucred_unlock(p);
}
break;
}
#include <mach/shared_region.h>
+#include <libkern/section_keywords.h>
+
unsigned long cs_procs_killed = 0;
unsigned long cs_procs_invalidated = 0;
int cs_enforcement_panic=0;
#if CONFIG_ENFORCE_SIGNED_CODE
-int cs_enforcement_enable = 1;
+#define DEFAULT_CS_ENFORCEMENT_ENABLE 1
#else
-int cs_enforcement_enable = 0;
+#define DEFAULT_CS_ENFORCEMENT_ENABLE 0
#endif
+SECURITY_READ_ONLY_LATE(int) cs_enforcement_enable = DEFAULT_CS_ENFORCEMENT_ENABLE;
#if CONFIG_ENFORCE_LIBRARY_VALIDATION
-int cs_library_val_enable = 1;
+#define DEFAULT_CS_LIBRARY_VA_ENABLE 1
#else
-int cs_library_val_enable = 0;
+#define DEFAULT_CS_LIBRARY_VA_ENABLE 0
#endif
+SECURITY_READ_ONLY_LATE(int) cs_library_val_enable = DEFAULT_CS_LIBRARY_VA_ENABLE;
#endif /* !SECURE_KERNEL */
int cs_all_vnodes = 0;
#if CONFIG_FINE_LOCK_GROUPS
lck_mtx_init(&child_proc->p_mlock, proc_mlock_grp, proc_lck_attr);
lck_mtx_init(&child_proc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr);
+ lck_mtx_init(&child_proc->p_ucred_mlock, proc_ucred_mlock_grp, proc_lck_attr);
#if CONFIG_DTRACE
lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr);
#endif
#else /* !CONFIG_FINE_LOCK_GROUPS */
lck_mtx_init(&child_proc->p_mlock, proc_lck_grp, proc_lck_attr);
lck_mtx_init(&child_proc->p_fdmlock, proc_lck_grp, proc_lck_attr);
+ lck_mtx_init(&child_proc->p_ucred_mlock, proc_lck_grp, proc_lck_attr);
#if CONFIG_DTRACE
lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr);
#endif
void
proc_lock(proc_t p)
{
+ lck_mtx_assert(proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
lck_mtx_lock(&p->p_mlock);
}
lck_mtx_unlock(proc_list_mlock);
}
+void
+proc_ucred_lock(proc_t p)
+{
+ lck_mtx_lock(&p->p_ucred_mlock);
+}
+
+void
+proc_ucred_unlock(proc_t p)
+{
+ lck_mtx_unlock(&p->p_ucred_mlock);
+}
+
#include <kern/zalloc.h>
struct zone *uthread_zone;
uthread_t uth = (uthread_t)uthread;
proc_t p = (proc_t)bsd_info;
+#if PROC_REF_DEBUG
+ if (__improbable(uthread_get_proc_refcount(uthread) != 0)) {
+ panic("uthread_cleanup called for uthread %p with uu_proc_refcount != 0", uthread);
+ }
+#endif
+
if (uth->uu_lowpri_window || uth->uu_throttle_info) {
/*
* task is marked as a low priority I/O type
proc_t p;
/* TODO - add a victim queue and push this into the main jetsam thread */
-
p = proc_find(victim_pid);
if (!p) {
return FALSE;
}
- printf("memorystatus: specifically killing pid %d [%s] (%s %d) - memorystatus_available_pages: %d\n",
- victim_pid, (p->p_comm ? p->p_comm : "(unknown)"),
- jetsam_kill_cause_name[cause], p->p_memstat_effectivepriority, memorystatus_available_pages);
-
proc_list_lock();
+ if ((p->p_memstat_state & P_MEMSTAT_TERMINATED) ||
+ (p->p_listflag & P_LIST_EXITED) ||
+ (p->p_memstat_state & P_MEMSTAT_ERROR)) {
+ proc_list_unlock();
+ proc_rele(p);
+ return FALSE;
+ }
+
+ p->p_memstat_state |= P_MEMSTAT_TERMINATED;
+
if (memorystatus_jetsam_snapshot_count == 0) {
memorystatus_init_jetsam_snapshot_locked(NULL,0);
}
memorystatus_update_jetsam_snapshot_entry_locked(p, cause);
proc_list_unlock();
+
+ printf("memorystatus: specifically killing pid %d [%s] (%s %d) - memorystatus_available_pages: %d\n",
+ victim_pid, (p->p_comm ? p->p_comm : "(unknown)"),
+ jetsam_kill_cause_name[cause], p->p_memstat_effectivepriority, memorystatus_available_pages);
+
killed = memorystatus_do_kill(p, cause);
proc_rele(p);
(void)chgproccnt(ruid, 1);
}
- proc_lock(p);
+ proc_ucred_lock(p);
/*
* We need to protect for a race where another thread
* also changed the credential after we took our
* Note: the kauth_cred_setresuid has consumed a reference to my_cred, it p_ucred != my_cred, then my_cred must not be dereferenced!
*/
if (p->p_ucred != my_cred) {
- proc_unlock(p);
+ proc_ucred_unlock(p);
/*
* We didn't successfully switch to the new ruid, so decrement
* the procs/uid count that we incremented above.
PROC_UPDATE_CREDS_ONPROC(p);
OSBitOrAtomic(P_SUGID, &p->p_flag);
- proc_unlock(p);
+ proc_ucred_unlock(p);
/*
* If we've updated the ruid, decrement the count of procs running
* under the previous ruid
DEBUG_CRED_CHANGE("seteuid CH(%d): %p/0x%08x -> %p/0x%08x\n", p->p_pid, my_cred, my_pcred->cr_flags, my_new_cred, posix_cred_get(my_new_cred)->cr_flags);
- proc_lock(p);
+ proc_ucred_lock(p);
/*
* We need to protect for a race where another thread
* also changed the credential after we took our
* should restart this again with the new cred.
*/
if (p->p_ucred != my_cred) {
- proc_unlock(p);
+ proc_ucred_unlock(p);
kauth_cred_unref(&my_new_cred);
my_cred = kauth_cred_proc_ref(p);
my_pcred = posix_cred_get(my_cred);
/* update cred on proc */
PROC_UPDATE_CREDS_ONPROC(p);
OSBitOrAtomic(P_SUGID, &p->p_flag);
- proc_unlock(p);
+ proc_ucred_unlock(p);
}
break;
}
(void)chgproccnt(ruid, 1);
}
- proc_lock(p);
+ proc_ucred_lock(p);
/*
* We need to protect for a race where another thread
* also changed the credential after we took our
* Note: the kauth_cred_setresuid has consumed a reference to my_cred, it p_ucred != my_cred, then my_cred must not be dereferenced!
*/
if (p->p_ucred != my_cred) {
- proc_unlock(p);
+ proc_ucred_unlock(p);
if (ruid != KAUTH_UID_NONE && chgproccnt_ok(p)) {
/*
* We didn't successfully switch to the new ruid, so decrement
/* update cred on proc */
PROC_UPDATE_CREDS_ONPROC(p);
OSBitOrAtomic(P_SUGID, &p->p_flag);
- proc_unlock(p);
+ proc_ucred_unlock(p);
if (ruid != KAUTH_UID_NONE && chgproccnt_ok(p)) {
/*
DEBUG_CRED_CHANGE("setgid(CH)%d: %p/0x%08x->%p/0x%08x\n", p->p_pid, my_cred, my_cred->cr_flags, my_new_cred, my_new_cred->cr_flags);
- proc_lock(p);
+ proc_ucred_lock(p);
/*
* We need to protect for a race where another thread
* also changed the credential after we took our
* should restart this again with the new cred.
*/
if (p->p_ucred != my_cred) {
- proc_unlock(p);
+ proc_ucred_unlock(p);
kauth_cred_unref(&my_new_cred);
/* try again */
my_cred = kauth_cred_proc_ref(p);
/* update cred on proc */
PROC_UPDATE_CREDS_ONPROC(p);
OSBitOrAtomic(P_SUGID, &p->p_flag);
- proc_unlock(p);
+ proc_ucred_unlock(p);
}
break;
}
DEBUG_CRED_CHANGE("setegid(CH)%d: %p/0x%08x->%p/0x%08x\n", p->p_pid, my_cred, my_pcred->cr_flags, my_new_cred, posix_cred_get(my_new_cred)->cr_flags);
- proc_lock(p);
+ proc_ucred_lock(p);
/*
* We need to protect for a race where another thread
* also changed the credential after we took our
* should restart this again with the new cred.
*/
if (p->p_ucred != my_cred) {
- proc_unlock(p);
+ proc_ucred_unlock(p);
kauth_cred_unref(&my_new_cred);
/* try again */
my_cred = kauth_cred_proc_ref(p);
/* update cred on proc */
PROC_UPDATE_CREDS_ONPROC(p);
OSBitOrAtomic(P_SUGID, &p->p_flag);
- proc_unlock(p);
+ proc_ucred_unlock(p);
}
break;
}
DEBUG_CRED_CHANGE("setregid(CH)%d: %p/0x%08x->%p/0x%08x\n", p->p_pid, my_cred, my_pcred->cr_flags, my_new_cred, posix_cred_get(my_new_cred)->cr_flags);
- proc_lock(p);
+ proc_ucred_lock(p);
/* need to protect for a race where another thread
* also changed the credential after we took our
* reference. If p_ucred has changed then we
* should restart this again with the new cred.
*/
if (p->p_ucred != my_cred) {
- proc_unlock(p);
+ proc_ucred_unlock(p);
kauth_cred_unref(&my_new_cred);
/* try again */
my_cred = kauth_cred_proc_ref(p);
/* update cred on proc */
PROC_UPDATE_CREDS_ONPROC(p);
OSBitOrAtomic(P_SUGID, &p->p_flag); /* XXX redundant? */
- proc_unlock(p);
+ proc_ucred_unlock(p);
}
break;
}
DEBUG_CRED_CHANGE("setgroups1(CH)%d: %p/0x%08x->%p/0x%08x\n", p->p_pid, my_cred, my_cred->cr_flags, my_new_cred, my_new_cred->cr_flags);
- proc_lock(p);
+ proc_ucred_lock(p);
/*
* We need to protect for a race where another
* thread also changed the credential after we
* with the new cred.
*/
if (p->p_ucred != my_cred) {
- proc_unlock(p);
+ proc_ucred_unlock(p);
kauth_cred_unref(&my_new_cred);
my_cred = kauth_cred_proc_ref(p);
/* try again */
/* update cred on proc */
PROC_UPDATE_CREDS_ONPROC(p);
OSBitOrAtomic(P_SUGID, &p->p_flag);
- proc_unlock(p);
+ proc_ucred_unlock(p);
}
break;
}
*/
unsigned sigrestrict_arg = 0;
-#if PLATFORM_WatchOS || PLATFORM_AppleTVOS
+#if PLATFORM_WatchOS
static int
sigrestrictmask(void)
{
(void)signum;
return 0;
}
-#endif /* !(PLATFORM_WatchOS || PLATFORM_AppleTVOS) */
+#endif /* !PLATFORM_WatchOS */
/*
* Returns: 0 Success
signum == SIGKILL || signum == SIGSTOP)
return (EINVAL);
- if ((error = signal_is_restricted(p, signum))) {
- if (error == ENOTSUP) {
- printf("%s(%d): denied attempt to register action for signal %d\n",
- proc_name_address(p), proc_pid(p), signum);
+ if (uap->nsa) {
+ if (IS_64BIT_PROCESS(p)) {
+ struct __user64_sigaction __vec64;
+ error = copyin(uap->nsa, &__vec64, sizeof(__vec64));
+ __sigaction_user64_to_kern(&__vec64, &__vec);
+ } else {
+ struct __user32_sigaction __vec32;
+ error = copyin(uap->nsa, &__vec32, sizeof(__vec32));
+ __sigaction_user32_to_kern(&__vec32, &__vec);
+ }
+ if (error)
+ return (error);
+ __vec.sa_flags &= SA_USERSPACE_MASK; /* Only pass on valid sa_flags */
+
+ if ((__vec.sa_flags & SA_SIGINFO) || __vec.sa_handler != SIG_DFL) {
+ if ((error = signal_is_restricted(p, signum))) {
+ if (error == ENOTSUP) {
+ printf("%s(%d): denied attempt to register action for signal %d\n",
+ proc_name_address(p), proc_pid(p), signum);
+ }
+ return error;
+ }
}
- return error;
}
if (uap->osa) {
if (IS_64BIT_PROCESS(p)) {
struct user64_sigaction vec64;
-
sigaction_kern_to_user64(sa, &vec64);
error = copyout(&vec64, uap->osa, sizeof(vec64));
} else {
struct user32_sigaction vec32;
-
sigaction_kern_to_user32(sa, &vec32);
error = copyout(&vec32, uap->osa, sizeof(vec32));
}
if (error)
return (error);
}
+
if (uap->nsa) {
- if (IS_64BIT_PROCESS(p)) {
- struct __user64_sigaction __vec64;
-
- error = copyin(uap->nsa, &__vec64, sizeof(__vec64));
- __sigaction_user64_to_kern(&__vec64, &__vec);
- } else {
- struct __user32_sigaction __vec32;
-
- error = copyin(uap->nsa, &__vec32, sizeof(__vec32));
- __sigaction_user32_to_kern(&__vec32, &__vec);
- }
- if (error)
- return (error);
- __vec.sa_flags &= SA_USERSPACE_MASK; /* Only pass on valid sa_flags */
error = setsigvec(p, current_thread(), signum, &__vec, FALSE);
}
+
return (error);
}
signal_setast(sig_actthread);
}
+/*
+ * get_signalthread
+ *
+ * Picks an appropriate thread from a process to target with a signal.
+ *
+ * Called with proc locked.
+ * Returns thread with BSD ast set.
+ *
+ * We attempt to deliver a proc-wide signal to the first thread in the task.
+ * This allows single threaded applications which use signals to
+ * be able to be linked with multithreaded libraries.
+ */
static kern_return_t
get_signalthread(proc_t p, int signum, thread_t * thr)
{
return(KERN_FAILURE);
}
- proc_lock(p);
-
TAILQ_FOREACH(uth, &p->p_uthlist, uu_list) {
if(((uth->uu_flag & UT_NO_SIGMASK)== 0) &&
(((uth->uu_sigmask & mask) == 0) || (uth->uu_sigwait & mask))) {
if (check_actforsig(p->task, uth->uu_context.vc_thread, 1) == KERN_SUCCESS) {
*thr = uth->uu_context.vc_thread;
- proc_unlock(p);
return(KERN_SUCCESS);
}
}
}
- proc_unlock(p);
if (get_signalact(p->task, thr, 1) == KERN_SUCCESS) {
return(KERN_SUCCESS);
}
user_addr_t action = USER_ADDR_NULL;
proc_t sig_proc;
thread_t sig_thread;
- register task_t sig_task;
+ task_t sig_task;
int mask;
struct uthread *uth;
kern_return_t kret;
kauth_cred_t my_cred;
if ((u_int)signum >= NSIG || signum == 0)
- panic("psignal signal number");
+ panic("psignal: bad signal number %d", signum);
+
mask = sigmask(signum);
prop = sigprop[signum];
sig_thread = thread;
sig_proc = (proc_t)get_bsdtask_info(sig_task);
} else if (flavor & PSIG_TRY_THREAD) {
+ assert((thread == current_thread()) && (p == current_proc()));
sig_task = p->task;
sig_thread = thread;
sig_proc = p;
} else {
sig_task = p->task;
- sig_thread = (struct thread *)0;
+ sig_thread = THREAD_NULL;
sig_proc = p;
}
* also no need to send a signal to a process that is in the middle
* of being torn down.
*/
- if (ISSET(sig_proc->p_flag, P_REBOOT) ||
- ISSET(sig_proc->p_lflag, P_LEXIT))
+ if (ISSET(sig_proc->p_flag, P_REBOOT) || ISSET(sig_proc->p_lflag, P_LEXIT)) {
+ DTRACE_PROC3(signal__discard, thread_t, sig_thread, proc_t, sig_proc, int, signum);
return;
+ }
if( (flavor & (PSIG_VFORK | PSIG_THREAD)) == 0) {
proc_knote(sig_proc, NOTE_SIGNAL | signum);
if ((flavor & PSIG_LOCKED)== 0)
proc_signalstart(sig_proc, 0);
- /*
- * Deliver the signal to the first thread in the task. This
- * allows single threaded applications which use signals to
- * be able to be linked with multithreaded libraries. We have
- * an implicit reference to the current thread, but need
- * an explicit one otherwise. The thread reference keeps
- * the corresponding task data structures around too. This
- * reference is released by thread_deallocate.
- */
-
-
+ /* Don't send signals to a process that has ignored them. */
if (((flavor & PSIG_VFORK) == 0) && ((sig_proc->p_lflag & P_LTRACED) == 0) && (sig_proc->p_sigignore & mask)) {
DTRACE_PROC3(signal__discard, thread_t, sig_thread, proc_t, sig_proc, int, signum);
- goto psigout;
+ goto sigout_unlocked;
}
+ /*
+ * The proc_lock prevents the targeted thread from being deallocated
+ * or handling the signal until we're done signaling it.
+ *
+ * Once the proc_lock is dropped, we have no guarantee the thread or uthread exists anymore.
+ *
+ * XXX: What if the thread goes inactive after the thread passes bsd ast point?
+ */
+ proc_lock(sig_proc);
+
if (flavor & PSIG_VFORK) {
action = SIG_DFL;
act_set_astbsd(sig_thread);
/* If successful return with ast set */
kret = get_signalthread(sig_proc, signum, &sig_thread);
}
+
if (kret != KERN_SUCCESS) {
-#if SIGNAL_DEBUG
- ram_printf(1);
-#endif /* SIGNAL_DEBUG */
- goto psigout;
+ DTRACE_PROC3(signal__discard, thread_t, sig_thread, proc_t, sig_proc, int, signum);
+ proc_unlock(sig_proc);
+ goto sigout_unlocked;
}
uth = get_bsdthread_info(sig_thread);
* action will be SIG_DFL here.)
*/
if (sig_proc->p_sigignore & mask)
- goto psigout;
+ goto sigout_locked;
+
if (uth->uu_sigwait & mask)
action = KERN_SIG_WAIT;
else if (uth->uu_sigmask & mask)
}
}
- proc_lock(sig_proc);
-
+ /* TODO: p_nice isn't hooked up to the scheduler... */
if (sig_proc->p_nice > NZERO && action == SIG_DFL && (prop & SA_KILL) &&
(sig_proc->p_lflag & P_LTRACED) == 0)
sig_proc->p_nice = NZERO;
* is default; don't stop the process below if sleeping,
* and don't clear any pending SIGCONT.
*/
- proc_unlock(sig_proc);
pg = proc_pgrp(sig_proc);
if (prop & SA_TTYSTOP && pg->pg_jobc == 0 &&
action == SIG_DFL) {
pg_rele(pg);
- goto psigout;
+ goto sigout_locked;
}
pg_rele(pg);
- proc_lock(sig_proc);
uth->uu_siglist &= ~contsigmask;
}
uth->uu_siglist |= mask;
- /*
- * Repost AST incase sigthread has processed
- * ast and missed signal post.
- */
- if (action == KERN_SIG_CATCH)
- act_set_astbsd(sig_thread);
-
/*
* Defer further processing for signals which are held,
* except that stopped processes must be continued by SIGCONT.
*/
/* vfork will not go thru as action is SIG_DFL */
- if ((action == KERN_SIG_HOLD) && ((prop & SA_CONT) == 0 || sig_proc->p_stat != SSTOP)) {
- proc_unlock(sig_proc);
- goto psigout;
- }
+ if ((action == KERN_SIG_HOLD) && ((prop & SA_CONT) == 0 || sig_proc->p_stat != SSTOP))
+ goto sigout_locked;
+
/*
* SIGKILL priority twiddling moved here from above because
* it needs sig_thread. Could merge it into large switch
* below if we didn't care about priority for tracing
* as SIGKILL's action is always SIG_DFL.
+ *
+ * TODO: p_nice isn't hooked up to the scheduler...
*/
if ((signum == SIGKILL) && (sig_proc->p_nice > NZERO)) {
sig_proc->p_nice = NZERO;
if (sig_proc->p_lflag & P_LTRACED) {
if (sig_proc->p_stat != SSTOP)
goto runlocked;
- else {
- proc_unlock(sig_proc);
- goto psigout;
- }
+ else
+ goto sigout_locked;
}
+
if ((flavor & PSIG_VFORK) != 0)
goto runlocked;
if (prop & SA_CONT) {
OSBitOrAtomic(P_CONTINUED, &sig_proc->p_flag);
sig_proc->p_contproc = current_proc()->p_pid;
-
- proc_unlock(sig_proc);
(void) task_resume_internal(sig_task);
- goto psigout;
}
- proc_unlock(sig_proc);
- goto psigout;
+ goto sigout_locked;
}
if (action != SIG_DFL) {
*/
if (prop & SA_CONT) {
OSBitOrAtomic(P_CONTINUED, &sig_proc->p_flag);
- proc_unlock(sig_proc);
(void) task_resume_internal(sig_task);
- proc_lock(sig_proc);
sig_proc->p_stat = SRUN;
} else if (sig_proc->p_stat == SSTOP) {
- proc_unlock(sig_proc);
- goto psigout;
+ goto sigout_locked;
}
/*
* Fill out siginfo structure information to pass to the
* Note: Avoid the SIGCHLD recursion case!
*/
if (signum != SIGCHLD) {
- proc_unlock(sig_proc);
r_uid = kauth_getruid();
- proc_lock(sig_proc);
sig_proc->si_pid = current_proc()->p_pid;
sig_proc->si_status = W_EXITCODE(signum, 0);
* stopped from the keyboard.
*/
if (!(prop & SA_STOP) && sig_proc->p_pptr == initproc) {
- proc_unlock(sig_proc);
- psignal_locked(sig_proc, SIGKILL);
- proc_lock(sig_proc);
uth->uu_siglist &= ~mask;
proc_unlock(sig_proc);
- goto psigout;
+ /* siglock still locked, proc_lock not locked */
+ psignal_locked(sig_proc, SIGKILL);
+ goto sigout_unlocked;
}
-
+
/*
* Stop the task
* if task hasn't already been stopped by
psignal(pp, SIGCHLD);
}
- if (pp != PROC_NULL)
+ if (pp != PROC_NULL) {
proc_parentdropref(pp, 0);
- } else
- proc_unlock(sig_proc);
- goto psigout;
+ }
+
+ goto sigout_unlocked;
+ }
+
+ goto sigout_locked;
}
DTRACE_PROC3(signal__send, thread_t, sig_thread, proc_t, p, int, signum);
- /*
- * enters switch with sig_proc lock held but dropped when
- * gets out of switch
- */
switch (signum) {
/*
* Signals ignored by default have been dealt
*/
act_set_astbsd(sig_thread);
thread_abort(sig_thread);
- proc_unlock(sig_proc);
- goto psigout;
+ goto sigout_locked;
case SIGCONT:
/*
OSBitOrAtomic(P_CONTINUED, &sig_proc->p_flag);
sig_proc->p_contproc = sig_proc->p_pid;
- proc_unlock(sig_proc);
(void) task_resume_internal(sig_task);
- proc_lock(sig_proc);
+
/*
* When processing a SIGCONT, we need to check
* to see if there are signals pending that
uth->uu_siglist &= ~mask;
sig_proc->p_stat = SRUN;
- proc_unlock(sig_proc);
- goto psigout;
+ goto sigout_locked;
default:
/*
*/
if (((flavor & (PSIG_VFORK|PSIG_THREAD)) == 0) && (action == SIG_DFL) && (prop & SA_KILL)) {
sig_proc->p_stat = SRUN;
- proc_unlock(sig_proc);
thread_abort(sig_thread);
- goto psigout;
+ goto sigout_locked;
}
/*
* resume it.
*/
if (sig_proc->p_stat == SSTOP) {
- proc_unlock(sig_proc);
- goto psigout;
+ goto sigout_locked;
}
goto runlocked;
}
*/
if (sig_proc->p_stat == SSTOP) {
if ((sig_proc->p_lflag & P_LTRACED) != 0 && sig_proc->p_xstat != 0)
- uth->uu_siglist |= sigmask(sig_proc->p_xstat);
+ uth->uu_siglist |= sigmask(sig_proc->p_xstat);
+
if ((flavor & PSIG_VFORK) != 0) {
sig_proc->p_stat = SRUN;
}
- proc_unlock(sig_proc);
} else {
/*
* setrunnable(p) in BSD and
* Wake up the thread if it is interruptible.
*/
sig_proc->p_stat = SRUN;
- proc_unlock(sig_proc);
if ((flavor & PSIG_VFORK) == 0)
thread_abort_safely(sig_thread);
}
-psigout:
+
+sigout_locked:
+ proc_unlock(sig_proc);
+
+sigout_unlocked:
if ((flavor & PSIG_LOCKED)== 0) {
proc_signalend(sig_proc, 0);
}
return (0);
}
+int
+timespec_is_valid(const struct timespec *ts)
+{
+ /* The INT32_MAX limit ensures the timespec is safe for clock_*() functions
+ * which accept 32-bit ints. */
+ if (ts->tv_sec < 0 || ts->tv_sec > INT32_MAX ||
+ ts->tv_nsec < 0 || (unsigned long long)ts->tv_nsec > NSEC_PER_SEC) {
+ return 0;
+ }
+ return 1;
+}
+
/*
* Decrement an interval timer by a specified number
* of microseconds, which must be less than a second,
return (result + usresult);
}
+uint64_t
+tstoabstime(struct timespec *ts)
+{
+ uint64_t abstime_s, abstime_ns;
+ clock_interval_to_absolutetime_interval(ts->tv_sec, NSEC_PER_SEC, &abstime_s);
+ clock_interval_to_absolutetime_interval(ts->tv_nsec, 1, &abstime_ns);
+ return abstime_s + abstime_ns;
+}
+
#if NETWORKING
/*
* ratecheck(): simple time-based rate-limit checking.
int __attribute__ ((noinline)) proc_pid_rusage(int pid, int flavor, user_addr_t buffer, int32_t * retval);
int __attribute__ ((noinline)) proc_pidoriginatorinfo(int pid, int flavor, user_addr_t buffer, uint32_t buffersize, int32_t * retval);
int __attribute__ ((noinline)) proc_listcoalitions(int flavor, int coaltype, user_addr_t buffer, uint32_t buffersize, int32_t *retval);
+int __attribute__ ((noinline)) proc_can_use_foreground_hw(int pid, user_addr_t reason, uint32_t resonsize, int32_t *retval);
/* protos for procpidinfo calls */
int __attribute__ ((noinline)) proc_pidfdlist(proc_t p, user_addr_t buffer, uint32_t buffersize, int32_t *retval);
case PROC_INFO_CALL_LISTCOALITIONS:
return proc_listcoalitions(pid /* flavor */, flavor /* coaltype */, buffer,
buffersize, retval);
+ case PROC_INFO_CALL_CANUSEFGHW:
+ return proc_can_use_foreground_hw(pid, buffer, buffersize, retval);
default:
return(EINVAL);
}
}
+/*************************** proc_can_use_forgeound_hw **************************/
+int proc_can_use_foreground_hw(int pid, user_addr_t u_reason, uint32_t reasonsize, int32_t *retval)
+{
+ proc_t p = PROC_NULL;
+ int error = 0;
+ uint32_t reason = PROC_FGHW_ERROR;
+ uint32_t isBG = 0;
+ task_t task = TASK_NULL;
+#if CONFIG_COALITIONS
+ coalition_t coal = COALITION_NULL;
+#endif
+
+ *retval = 0;
+
+ if (pid <= 0) {
+ error = EINVAL;
+ reason = PROC_FGHW_ERROR;
+ goto out;
+ }
+
+ p = proc_find(pid);
+ if (p == PROC_NULL) {
+ error = ESRCH;
+ reason = PROC_FGHW_ERROR;
+ goto out;
+ }
+
+#if CONFIG_COALITIONS
+ if (p != current_proc() &&
+ !kauth_cred_issuser(kauth_cred_get())) {
+ error = EPERM;
+ reason = PROC_FGHW_ERROR;
+ goto out;
+ }
+
+ task = p->task;
+ task_reference(task);
+ if (coalition_is_leader(task, COALITION_TYPE_JETSAM, &coal) == FALSE) {
+ /* current task is not a coalition leader: find the leader */
+ task_deallocate(task);
+ task = coalition_get_leader(coal);
+ }
+
+ if (task != TASK_NULL) {
+ /*
+ * If task is non-null, then it is the coalition leader of the
+ * current process' coalition. This could be the same task as
+ * the current_task, and that's OK.
+ */
+ uint32_t flags = 0;
+ int role;
+
+ proc_get_darwinbgstate(task, &flags);
+ if ((flags & PROC_FLAG_APPLICATION) != PROC_FLAG_APPLICATION) {
+ /*
+ * Coalition leader is not an application, continue
+ * searching for other ways this task could gain
+ * access to HW
+ */
+ reason = PROC_FGHW_DAEMON_LEADER;
+ goto no_leader;
+ }
+
+ if (proc_get_effective_task_policy(task, TASK_POLICY_DARWIN_BG)) {
+ /*
+ * If the leader of the current process' coalition has
+ * been marked as DARWIN_BG, then it definitely should
+ * not be using foreground hardware resources.
+ */
+ reason = PROC_FGHW_LEADER_BACKGROUND;
+ goto out;
+ }
+
+ role = proc_get_effective_task_policy(task, TASK_POLICY_ROLE);
+ switch (role) {
+ case TASK_FOREGROUND_APPLICATION: /* DARWIN_ROLE_UI_FOCAL */
+ case TASK_BACKGROUND_APPLICATION: /* DARWIN_ROLE_UI */
+ /*
+ * The leader of this coalition is a focal, UI app:
+ * access granted
+ * TODO: should extensions/plugins be allowed to use
+ * this hardware?
+ */
+ *retval = 1;
+ reason = PROC_FGHW_OK;
+ goto out;
+ case TASK_DEFAULT_APPLICATION: /* DARWIN_ROLE_UI_NON_FOCAL */
+ case TASK_NONUI_APPLICATION: /* DARWIN_ROLE_NON_UI */
+ case TASK_THROTTLE_APPLICATION:
+ case TASK_UNSPECIFIED:
+ default:
+ /* non-focal, non-ui apps don't get access */
+ reason = PROC_FGHW_LEADER_NONUI;
+ goto out;
+ }
+ }
+
+no_leader:
+ if (task != TASK_NULL) {
+ task_deallocate(task);
+ task = TASK_NULL;
+ }
+#endif /* CONFIG_COALITIONS */
+
+ /*
+ * There is no reasonable semantic to investigate the currently
+ * adopted voucher of an arbitrary thread in a non-current process.
+ * We return '0'
+ */
+ if (p != current_proc()) {
+ error = EINVAL;
+ goto out;
+ }
+
+ /*
+ * In the absence of coalitions, fall back to a voucher-based lookup
+ * where a daemon can used foreground HW if it's operating on behalf
+ * of a foreground application.
+ * NOTE: this is equivalent to a call to
+ * proc_pidoriginatorinfo(PROC_PIDORIGINATOR_BGSTATE, &isBG, sizeof(isBG))
+ */
+ isBG = 1;
+ error = proc_get_originatorbgstate(&isBG);
+ switch (error) {
+ case 0:
+ break;
+ case ESRCH:
+ reason = PROC_FGHW_NO_ORIGINATOR;
+ error = 0;
+ goto out;
+ case ENOATTR:
+ reason = PROC_FGHW_NO_VOUCHER_ATTR;
+ error = 0;
+ goto out;
+ case EINVAL:
+ reason = PROC_FGHW_DAEMON_NO_VOUCHER;
+ error = 0;
+ goto out;
+ default:
+ /* some other error occurred: report that to the caller */
+ reason = PROC_FGHW_VOUCHER_ERROR;
+ goto out;
+ }
+
+ if (isBG) {
+ reason = PROC_FGHW_ORIGINATOR_BACKGROUND;
+ error = 0;
+ } else {
+ /*
+ * The process itself is either a foreground app, or has
+ * adopted a voucher originating from an app that's still in
+ * the foreground
+ */
+ reason = PROC_FGHW_DAEMON_OK;
+ *retval = 1;
+ }
+
+out:
+ if (task != TASK_NULL)
+ task_deallocate(task);
+ if (p != PROC_NULL)
+ proc_rele(p);
+ if (reasonsize >= sizeof(reason) && u_reason != (user_addr_t)0)
+ (void)copyout(&reason, u_reason, sizeof(reason));
+ return error;
+}
+
+
/********************************** proc_pidinfo ********************************/
kern_return_t kr;
struct coalition_resource_usage cru;
- if (bufsize != sizeof(cru)) {
- return EINVAL;
- }
-
kr = coalition_resource_usage_internal(coal, &cru);
switch (kr) {
return EIO; /* shrug */
}
- return copyout(&cru, buffer, bufsize);
+ return copyout(&cru, buffer, MIN(bufsize, sizeof(cru)));
}
int coalition_info(proc_t p, struct coalition_info_args *uap, __unused int32_t *retval)
static int selcount(struct proc *p, u_int32_t *ibits, int nfd, int *count);
static int seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup, int fromselcount);
static int seldrop(struct proc *p, u_int32_t *ibits, int nfd);
+static int select_internal(struct proc *p, struct select_nocancel_args *uap, uint64_t timeout, int32_t *retval);
/*
* Select system call.
* Returns: 0 Success
* EINVAL Invalid argument
* EAGAIN Nonconformant error if allocation fails
- * selprocess:???
*/
int
select(struct proc *p, struct select_args *uap, int32_t *retval)
{
__pthread_testcancel(1);
- return(select_nocancel(p, (struct select_nocancel_args *)uap, retval));
+ return select_nocancel(p, (struct select_nocancel_args *)uap, retval);
}
int
select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retval)
+{
+ uint64_t timeout = 0;
+
+ if (uap->tv) {
+ int err;
+ struct timeval atv;
+ if (IS_64BIT_PROCESS(p)) {
+ struct user64_timeval atv64;
+ err = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64));
+ /* Loses resolution - assume timeout < 68 years */
+ atv.tv_sec = atv64.tv_sec;
+ atv.tv_usec = atv64.tv_usec;
+ } else {
+ struct user32_timeval atv32;
+ err = copyin(uap->tv, (caddr_t)&atv32, sizeof(atv32));
+ atv.tv_sec = atv32.tv_sec;
+ atv.tv_usec = atv32.tv_usec;
+ }
+ if (err)
+ return err;
+
+ if (itimerfix(&atv)) {
+ err = EINVAL;
+ return err;
+ }
+
+ clock_absolutetime_interval_to_deadline(tvtoabstime(&atv), &timeout);
+ }
+
+ return select_internal(p, uap, timeout, retval);
+}
+
+int
+pselect(struct proc *p, struct pselect_args *uap, int32_t *retval)
+{
+ __pthread_testcancel(1);
+ return pselect_nocancel(p, (struct pselect_nocancel_args *)uap, retval);
+}
+
+int
+pselect_nocancel(struct proc *p, struct pselect_nocancel_args *uap, int32_t *retval)
+{
+ int err;
+ struct uthread *ut;
+ uint64_t timeout = 0;
+
+ if (uap->ts) {
+ struct timespec ts;
+
+ if (IS_64BIT_PROCESS(p)) {
+ struct user64_timespec ts64;
+ err = copyin(uap->ts, (caddr_t)&ts64, sizeof(ts64));
+ ts.tv_sec = ts64.tv_sec;
+ ts.tv_nsec = ts64.tv_nsec;
+ } else {
+ struct user32_timespec ts32;
+ err = copyin(uap->ts, (caddr_t)&ts32, sizeof(ts32));
+ ts.tv_sec = ts32.tv_sec;
+ ts.tv_nsec = ts32.tv_nsec;
+ }
+ if (err) {
+ return err;
+ }
+
+ if (!timespec_is_valid(&ts)) {
+ return EINVAL;
+ }
+ clock_absolutetime_interval_to_deadline(tstoabstime(&ts), &timeout);
+ }
+
+ ut = get_bsdthread_info(current_thread());
+
+ if (uap->mask != USER_ADDR_NULL) {
+ /* save current mask, then copyin and set new mask */
+ sigset_t newset;
+ err = copyin(uap->mask, &newset, sizeof(sigset_t));
+ if (err) {
+ return err;
+ }
+ ut->uu_oldmask = ut->uu_sigmask;
+ ut->uu_flag |= UT_SAS_OLDMASK;
+ ut->uu_sigmask = (newset & ~sigcantmask);
+ }
+
+ err = select_internal(p, (struct select_nocancel_args *)uap, timeout, retval);
+
+ if (err != EINTR && ut->uu_flag & UT_SAS_OLDMASK) {
+ /*
+ * Restore old mask (direct return case). NOTE: EINTR can also be returned
+ * if the thread is cancelled. In that case, we don't reset the signal
+ * mask to its original value (which usually happens in the signal
+ * delivery path). This behavior is permitted by POSIX.
+ */
+ ut->uu_sigmask = ut->uu_oldmask;
+ ut->uu_oldmask = 0;
+ ut->uu_flag &= ~UT_SAS_OLDMASK;
+ }
+
+ return err;
+}
+
+/*
+ * Generic implementation of {,p}select. Care: we type-pun uap across the two
+ * syscalls, which differ slightly. The first 4 arguments (nfds and the fd sets)
+ * are identical. The 5th (timeout) argument points to different types, so we
+ * unpack in the syscall-specific code, but the generic code still does a null
+ * check on this argument to determine if a timeout was specified.
+ */
+static int
+select_internal(struct proc *p, struct select_nocancel_args *uap, uint64_t timeout, int32_t *retval)
{
int error = 0;
u_int ni, nw;
getbits(ex, 2);
#undef getbits
- if (uap->tv) {
- struct timeval atv;
- if (IS_64BIT_PROCESS(p)) {
- struct user64_timeval atv64;
- error = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64));
- /* Loses resolution - assume timeout < 68 years */
- atv.tv_sec = atv64.tv_sec;
- atv.tv_usec = atv64.tv_usec;
- } else {
- struct user32_timeval atv32;
- error = copyin(uap->tv, (caddr_t)&atv32, sizeof(atv32));
- atv.tv_sec = atv32.tv_sec;
- atv.tv_usec = atv32.tv_usec;
- }
- if (error)
- goto continuation;
- if (itimerfix(&atv)) {
- error = EINVAL;
- goto continuation;
- }
-
- clock_absolutetime_interval_to_deadline(
- tvtoabstime(&atv), &seldata->abstime);
- }
- else
- seldata->abstime = 0;
+ seldata->abstime = timeout;
if ( (error = selcount(p, sel->ibits, uap->nd, &count)) ) {
goto continuation;
putbits(ex, 2);
#undef putbits
}
+
+ if (error != EINTR && sel_pass == SEL_SECONDPASS && uth->uu_flag & UT_SAS_OLDMASK) {
+ /* restore signal mask - continuation case */
+ uth->uu_sigmask = uth->uu_oldmask;
+ uth->uu_oldmask = 0;
+ uth->uu_flag &= ~UT_SAS_OLDMASK;
+ }
+
return(error);
}
391 AUE_NULL ALL { int enosys(void); }
392 AUE_NULL ALL { int enosys(void); }
393 AUE_NULL ALL { int enosys(void); }
-394 AUE_NULL ALL { int enosys(void); }
-395 AUE_NULL ALL { int enosys(void); }
+394 AUE_SELECT ALL { int pselect(int nd, u_int32_t *in, u_int32_t *ou, u_int32_t *ex, const struct timespec *ts, const struct sigset_t *mask) NO_SYSCALL_STUB; }
+395 AUE_SELECT ALL { int pselect_nocancel(int nd, u_int32_t *in, u_int32_t *ou, u_int32_t *ex, const struct timespec *ts, const struct sigset_t *mask) NO_SYSCALL_STUB; }
396 AUE_NULL ALL { user_ssize_t read_nocancel(int fd, user_addr_t cbuf, user_size_t nbyte) NO_SYSCALL_STUB; }
397 AUE_NULL ALL { user_ssize_t write_nocancel(int fd, user_addr_t cbuf, user_size_t nbyte) NO_SYSCALL_STUB; }
398 AUE_OPEN_RWTC ALL { int open_nocancel(user_addr_t path, int flags, int mode) NO_SYSCALL_STUB; }
0x130048C MACH_vm_page_sleep
0x1300490 MACH_vm_page_expedite
0x13004c0 MACH_vm_pressure_event
+0x1300500 MACH_vm_data_write
0x1400000 MACH_SCHED
0x1400004 MACH_STKATTACH
0x1400008 MACH_STKHANDOFF
0x1a30004 ENERGY_PERF_GPU_DESCRIPTION
0x1a30008 ENERGY_PERF_GPU_TIME
0x1a40000 SYSDIAGNOSE_notify_user
+0x1a50000 ZALLOC_ZCRAM
0x2010000 L_IP_In_Beg
0x2010004 L_IP_Out_Beg
0x2010008 L_IP_In_End
error = cs_validate_csblob((const uint8_t *)addr, size, &cd);
if (error) {
- if (cs_debug)
+
+ if (cs_debug)
printf("CODESIGNING: csblob invalid: %d\n", error);
- blob->csb_flags = 0;
- blob->csb_start_offset = 0;
- blob->csb_end_offset = 0;
- memset(blob->csb_cdhash, 0, sizeof(blob->csb_cdhash));
- /* let the vnode checker determine if the signature is valid or not */
+ /* The vnode checker can't make the rest of this function succeed if csblob validation failed, so bail */
+ goto out;
+
} else {
const unsigned char *md_base;
uint8_t hash[CS_HASH_MAX_SIZE];
dst_se = sockaddrentry_alloc(how);
if (dst_se != NULL) {
int len = src_se->se_addr->sa_len;
+ /*
+ * Workaround for rdar://23362120
+ * Allways allocate a buffer that can hold an IPv6 socket address
+ */
+ size_t alloclen = MAX(len, sizeof(struct sockaddr_in6));
MALLOC(dst_se->se_addr, struct sockaddr *,
- len, M_SONAME, how | M_ZERO);
+ alloclen, M_SONAME, how | M_ZERO);
if (dst_se->se_addr != NULL) {
bcopy(src_se->se_addr, dst_se->se_addr, len);
} else {
{
struct sockaddr *sa;
int error;
+ size_t alloclen;
if (len > SOCK_MAXADDRLEN)
return (ENAMETOOLONG);
if (len < offsetof(struct sockaddr, sa_data[0]))
return (EINVAL);
- MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK | M_ZERO);
+ /*
+ * Workaround for rdar://23362120
+ * Allways allocate a buffer that can hold an IPv6 socket address
+ */
+ alloclen = MAX(len, sizeof(struct sockaddr_in6));
+ MALLOC(sa, struct sockaddr *, alloclen, M_SONAME, M_WAITOK | M_ZERO);
if (sa == NULL) {
return (ENOMEM);
}
posix_madvise.2 \
pread.2 \
profil.2 \
+ pselect.2 \
pthread_setugid_np.2 \
ptrace.2 \
pwrite.2 \
--- /dev/null
+.\"
+.\" Copyright 2002 Massachusetts Institute of Technology
+.\"
+.\" Permission to use, copy, modify, and distribute this software and
+.\" its documentation for any purpose and without fee is hereby
+.\" granted, provided that both the above copyright notice and this
+.\" permission notice appear in all copies, that both the above
+.\" copyright notice and this permission notice appear in all
+.\" supporting documentation, and that the name of M.I.T. not be used
+.\" in advertising or publicity pertaining to distribution of the
+.\" software without specific, written prior permission. M.I.T. makes
+.\" no representations about the suitability of this software for any
+.\" purpose. It is provided "as is" without express or implied
+.\" warranty.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
+.\" ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
+.\" INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
+.\" SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+.\" SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+.\" LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+.\" USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+.\" ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+.\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+.\" OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD: src/lib/libc/gen/pselect.3,v 1.4 2002/12/18 10:13:54 ru Exp $
+.\"
+.Dd June 16, 2002
+.Dt PSELECT 2
+.Os
+.Sh NAME
+.Nm pselect
+.Nd synchronous I/O multiplexing a la POSIX.1g
+.Sh LIBRARY
+.Lb libc
+.Sh SYNOPSIS
+.In sys/select.h
+.Ft int
+.Fo pselect
+.Fa "int nfds"
+.Fa "fd_set *restrict readfds"
+.Fa "fd_set *restrict writefds"
+.Fa "fd_set *restrict errorfds"
+.Fa "const struct timespec *restrict timeout"
+.Fa "const sigset_t *restrict sigmask"
+.Fc
+.Sh DESCRIPTION
+The
+.Fn pselect
+function was introduced by
+.St -p1003.1g-2000
+as a slightly stronger version of
+.Xr select 2 .
+The
+.Fa nfds , readfds , writefds ,
+and
+.Fa errorfds
+arguments are all identical to the analogous arguments of
+.Fn select .
+The
+.Fa timeout
+argument in
+.Fn pselect
+points to a
+.Vt "const struct timespec" ,
+rather than the (modifiable)
+.Vt "struct timeval"
+used by
+.Fn select ;
+as in
+.Fn select ,
+a null pointer may be passed to indicate that
+.Fn pselect
+should wait indefinitely.
+Finally,
+.Fa sigmask
+specifies a signal mask which is set while waiting for input.
+When
+.Fn pselect
+returns, the original signal mask is restored.
+.Pp
+See
+.Xr select 2
+for a more detailed discussion of the semantics of this interface, and
+for macros used to manipulate the
+.Vt "fd_set"
+data type.
+.Sh RETURN VALUES
+The
+.Fn pselect
+function returns the same values and under the same conditions as
+.Fn select .
+.Sh ERRORS
+The
+.Fn pselect
+function may fail for any of the reasons documented for
+.Xr select 2
+and (if a signal mask is provided)
+.Xr sigprocmask 2 .
+.Sh SEE ALSO
+.Xr kqueue 2 ,
+.Xr poll 2 ,
+.Xr select 2 ,
+.Xr sigprocmask 2
+.Sh STANDARDS
+The
+.Fn pselect
+function conforms to
+.St -p1003.1-2001 .
+.Sh HISTORY
+The
+.Fn pselect
+function first appeared in
+.Fx 5.0 .
+.Sh AUTHORS
+The
+.Fn pselect
+manual page was written by
+.An Garrett Wollman Aq wollman@FreeBSD.org .
*/
if (droptype == DTYPE_NODROP && qlen(q) >= maxqsize) {
if (pkt->pkt_proto == IPPROTO_TCP &&
+ qlen(q) < (maxqsize + (maxqsize >> 1)) &&
((pkt->pkt_flags & PKTF_TCP_REXMT) ||
(sp->sfb_flags & SFBF_LAST_PKT_DROPPED))) {
/*
ret = 0;
}
+ if (ifp->if_ipv4_stat == NULL) {
+ MALLOC(ifp->if_ipv4_stat, struct if_tcp_ecn_stat *,
+ sizeof(struct if_tcp_ecn_stat), M_TEMP, M_WAITOK|M_ZERO);
+ if (ifp->if_ipv4_stat == NULL) {
+ ret = ENOMEM;
+ goto end;
+ }
+ }
+
+ if (ifp->if_ipv6_stat == NULL) {
+ MALLOC(ifp->if_ipv6_stat, struct if_tcp_ecn_stat *,
+ sizeof(struct if_tcp_ecn_stat), M_TEMP, M_WAITOK|M_ZERO);
+ if (ifp->if_ipv6_stat == NULL) {
+ ret = ENOMEM;
+ goto end;
+ }
+ }
end:
if (ret != 0) {
if (ifp->if_tcp_stat != NULL) {
zfree(dlif_udpstat_zone, *pbuf);
ifp->if_udp_stat = NULL;
}
+ if (ifp->if_ipv4_stat != NULL) {
+ FREE(ifp->if_ipv4_stat, M_TEMP);
+ ifp->if_ipv4_stat = NULL;
+ }
+ if (ifp->if_ipv6_stat != NULL) {
+ FREE(ifp->if_ipv6_stat, M_TEMP);
+ ifp->if_ipv6_stat = NULL;
+ }
}
return (ret);
}
extern int if_next_index(void);
+extern int tcp_ecn_outbound;
errno_t
ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
} else {
ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
}
+
+ /*
+ * Enable ECN capability on this interface depending on the
+ * value of ECN global setting
+ */
+ if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) {
+ ifp->if_eflags |= IFEF_ECN_ENABLE;
+ ifp->if_eflags &= ~IFEF_ECN_DISABLE;
+ }
+
ifnet_lock_done(ifp);
ifnet_head_done();
if (ifp->if_udp_stat != NULL)
bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
+ /* Reset ifnet IPv4 stats */
+ if (ifp->if_ipv4_stat != NULL)
+ bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
+
+ /* Reset ifnet IPv6 stats */
+ if (ifp->if_ipv6_stat != NULL)
+ bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
+
/* Release memory held for interface link status report */
if (ifp->if_link_status != NULL) {
FREE(ifp->if_link_status, M_TEMP);
case SIOCGIFINTERFACESTATE: /* struct ifreq */
case SIOCSIFPROBECONNECTIVITY: /* struct ifreq */
case SIOCGIFPROBECONNECTIVITY: /* struct ifreq */
- case SIOCGSTARTDELAY: { /* struct ifreq */
+ case SIOCGSTARTDELAY: /* struct ifreq */
+ case SIOCGECNMODE: /* struct ifreq */
+ case SIOCSECNMODE: { /* struct ifreq */
struct ifreq ifr;
bcopy(data, &ifr, sizeof (ifr));
ifr.ifr_name[IFNAMSIZ - 1] = '\0';
else
ifr->ifr_probe_connectivity = 0;
break;
+ case SIOCGECNMODE:
+ if ((ifp->if_eflags & (IFEF_ECN_ENABLE|IFEF_ECN_DISABLE)) ==
+ IFEF_ECN_ENABLE)
+ ifr->ifr_ecn_mode = IFRTYPE_ECN_ENABLE;
+ else if ((ifp->if_eflags & (IFEF_ECN_ENABLE|IFEF_ECN_DISABLE)) ==
+ IFEF_ECN_DISABLE)
+ ifr->ifr_ecn_mode = IFRTYPE_ECN_DISABLE;
+ else
+ ifr->ifr_ecn_mode = IFRTYPE_ECN_DEFAULT;
+ break;
+ case SIOCSECNMODE:
+ if (ifr->ifr_ecn_mode == IFRTYPE_ECN_DEFAULT) {
+ ifp->if_eflags &= ~(IFEF_ECN_ENABLE|IFEF_ECN_DISABLE);
+ } else if (ifr->ifr_ecn_mode == IFRTYPE_ECN_ENABLE) {
+ ifp->if_eflags |= IFEF_ECN_ENABLE;
+ ifp->if_eflags &= ~IFEF_ECN_DISABLE;
+ } else if (ifr->ifr_ecn_mode == IFRTYPE_ECN_DISABLE) {
+ ifp->if_eflags |= IFEF_ECN_DISABLE;
+ ifp->if_eflags &= ~IFEF_ECN_ENABLE;
+ } else
+ error = EINVAL;
+ break;
default:
VERIFY(0);
/* NOTREACHED */
case SIOCGIFAGENTDATA64:
case SIOCSIFINTERFACESTATE:
case SIOCGIFINTERFACESTATE:
+ case SIOCSIFPROBECONNECTIVITY:
+ case SIOCGIFPROBECONNECTIVITY:
+ case SIOCGECNMODE:
+ case SIOCSECNMODE:
;
}
}
#define IFEF_NOACKPRI 0x00200000 /* No TCP ACK prioritization */
#define IFEF_AWDL_RESTRICTED 0x00400000 /* Restricted AWDL mode */
#define IFEF_2KCL 0x00800000 /* prefers 2K cluster (socket based tunnel) */
+#define IFEF_ECN_ENABLE 0x01000000 /* use ECN for TCP connections on the interface */
+#define IFEF_ECN_DISABLE 0x02000000 /* do not use ECN for TCP connections on the interface */
#define IFEF_SENDLIST 0x10000000 /* Supports tx packet lists */
#define IFEF_DIRECTLINK 0x20000000 /* point-to-point topology */
#define _IFEF_INUSE 0x40000000 /* deprecated */
} ifru_start_delay;
struct if_interface_state ifru_interface_state;
u_int32_t ifru_probe_connectivity;
+ u_int32_t ifru_ecn_mode;
+#define IFRTYPE_ECN_DEFAULT 0
+#define IFRTYPE_ECN_ENABLE 1
+#define IFRTYPE_ECN_DISABLE 2
#endif /* PRIVATE */
} ifr_ifru;
#define ifr_addr ifr_ifru.ifru_addr /* address */
#define ifr_start_delay_timeout ifr_ifru.ifru_start_delay.timeout
#define ifr_interface_state ifr_ifru.ifru_interface_state
#define ifr_probe_connectivity ifr_ifru.ifru_probe_connectivity
+#define ifr_ecn_mode ifr_ifru.ifru_ecn_mode
#endif /* PRIVATE */
};
u_int64_t ifi_poll_interval_time; /* poll interval (nsec) */
};
+struct if_tcp_ecn_perf_stat {
+ u_int64_t rtt_avg;
+ u_int64_t rtt_var;
+ u_int64_t oo_percent;
+ u_int64_t sack_episodes;
+ u_int64_t reorder_percent;
+ u_int64_t rxmit_percent;
+ u_int64_t rxmit_drop;
+};
+
+struct if_tcp_ecn_stat {
+ u_int64_t timestamp;
+ u_int64_t ecn_client_setup;
+ u_int64_t ecn_server_setup;
+ u_int64_t ecn_client_success;
+ u_int64_t ecn_server_success;
+ u_int64_t ecn_peer_nosupport;
+ u_int64_t ecn_syn_lost;
+ u_int64_t ecn_synack_lost;
+ u_int64_t ecn_recv_ce;
+ u_int64_t ecn_recv_ece;
+ u_int64_t ecn_conn_recv_ce;
+ u_int64_t ecn_conn_recv_ece;
+ u_int64_t ecn_conn_plnoce;
+ u_int64_t ecn_conn_plce;
+ u_int64_t ecn_conn_noplce;
+ u_int64_t ecn_fallback_synloss;
+ u_int64_t ecn_fallback_reorder;
+ u_int64_t ecn_fallback_ce;
+ struct if_tcp_ecn_perf_stat ecn_on;
+ struct if_tcp_ecn_perf_stat ecn_off;
+};
+
/*
* Interface link status report -- includes statistics related to
* the link layer technology sent by the driver. The driver will monitor
decl_lck_rw_data(, if_link_status_lock);
struct if_link_status *if_link_status;
struct if_interface_state if_interface_state;
+ struct if_tcp_ecn_stat *if_ipv4_stat;
+ struct if_tcp_ecn_stat *if_ipv6_stat;
};
#define IF_TCP_STATINC(_ifp, _s) do { \
((_ifp)->if_type == IFT_CELLULAR || \
(_ifp)->if_delegated.type == IFT_CELLULAR)
+/*
+ * Indicate whether or not the immediate interface, or the interface delegated
+ * by it, is an ETHERNET interface.
+ */
+#define IFNET_IS_ETHERNET(_ifp) \
+ ((_ifp)->if_family == IFNET_FAMILY_ETHERNET || \
+ (_ifp)->if_delegated.family == IFNET_FAMILY_ETHERNET)
/*
* Indicate whether or not the immediate interface, or the interface delegated
* by it, is a Wi-Fi interface (IFNET_SUBFAMILY_WIFI). Delegated interface
return (error);
}
+ u_int32_t total_len = m_length2(packet, NULL);
+ if (total_len < (tlv_offset + sizeof(u_int8_t) + sizeof(length) + length)) {
+ NECPLOG(LOG_ERR, "Got a bad TLV, length (%u) + offset (%d) < total length (%u)",
+ length, (tlv_offset + sizeof(u_int8_t) + sizeof(length)), total_len);
+ return (EINVAL);
+ }
+
if (value_size != NULL) {
*value_size = length;
}
goto done;
}
// Copy parameters in
- copyin(uap->parameters, parameters, uap->parameters_size);
+ error = copyin(uap->parameters, parameters, uap->parameters_size);
+ if (error) {
+ goto done;
+ }
error = necp_application_find_policy_match_internal(parameters, uap->parameters_size, &returned_result);
if (error) {
}
// Copy return value back
- copyout(&returned_result, uap->returned_result, sizeof(struct necp_aggregate_result));
+ error = copyout(&returned_result, uap->returned_result, sizeof(struct necp_aggregate_result));
+ if (error) {
+ goto done;
+ }
done:
if (parameters != NULL) {
FREE(parameters, M_NECP);
SYSCTL_STRUCT(_net_stats, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
&nstat_stats, nstat_stats, "");
-
enum
{
NSTAT_FLAG_CLEANUP = (1 << 0),
static bool nstat_control_reporting_allowed(nstat_control_state *state, nstat_src *src);
static boolean_t nstat_control_begin_query(nstat_control_state *state, const nstat_msg_hdr *hdrp);
static u_int16_t nstat_control_end_query(nstat_control_state *state, nstat_src *last_src, boolean_t partial);
+static void nstat_ifnet_report_ecn_stats(void);
static u_int32_t nstat_udp_watchers = 0;
static u_int32_t nstat_tcp_watchers = 0;
lck_rw_done(&ifp->if_link_status_lock);
}
+static u_int64_t nstat_ifnet_last_report_time = 0;
+extern int tcp_report_stats_interval;
+
+void
+nstat_ifnet_report_ecn_stats(void)
+{
+ u_int64_t uptime, last_report_time;
+ struct nstat_sysinfo_data data;
+ struct nstat_sysinfo_ifnet_ecn_stats *st;
+ struct ifnet *ifp;
+
+ uptime = net_uptime();
+
+ if ((int)(uptime - nstat_ifnet_last_report_time) <
+ tcp_report_stats_interval)
+ return;
+
+ last_report_time = nstat_ifnet_last_report_time;
+ nstat_ifnet_last_report_time = uptime;
+ data.flags = NSTAT_SYSINFO_IFNET_ECN_STATS;
+ st = &data.u.ifnet_ecn_stats;
+
+ ifnet_head_lock_shared();
+ TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
+ if (ifp->if_ipv4_stat == NULL || ifp->if_ipv6_stat == NULL)
+ continue;
+
+ if ((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) !=
+ IFRF_ATTACHED)
+ continue;
+
+ /* Limit reporting to Wifi, Ethernet and cellular. */
+ if (!(IFNET_IS_ETHERNET(ifp) || IFNET_IS_CELLULAR(ifp)))
+ continue;
+
+ bzero(st, sizeof(*st));
+ if (IFNET_IS_CELLULAR(ifp)) {
+ st->ifnet_type = NSTAT_IFNET_ECN_TYPE_CELLULAR;
+ } else if (IFNET_IS_WIFI(ifp)) {
+ st->ifnet_type = NSTAT_IFNET_ECN_TYPE_WIFI;
+ } else {
+ st->ifnet_type = NSTAT_IFNET_ECN_TYPE_ETHERNET;
+ }
+
+ /* skip if there was no update since last report */
+ if (ifp->if_ipv4_stat->timestamp <= 0 ||
+ ifp->if_ipv4_stat->timestamp < last_report_time)
+ goto v6;
+ st->ifnet_proto = NSTAT_IFNET_ECN_PROTO_IPV4;
+ bcopy(ifp->if_ipv4_stat, &st->ecn_stat,
+ sizeof(st->ecn_stat));
+ nstat_sysinfo_send_data(&data);
+ bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
+
+v6:
+ /* skip if there was no update since last report */
+ if (ifp->if_ipv6_stat->timestamp <= 0 ||
+ ifp->if_ipv6_stat->timestamp < last_report_time)
+ continue;
+ st->ifnet_proto = NSTAT_IFNET_ECN_PROTO_IPV6;
+ bcopy(ifp->if_ipv6_stat, &st->ecn_stat,
+ sizeof(st->ecn_stat));
+ nstat_sysinfo_send_data(&data);
+
+ /* Zero the stats in ifp */
+ bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
+ }
+ ifnet_head_done();
+
+}
+
static errno_t
nstat_ifnet_copy_descriptor(
nstat_provider_cookie_t cookie,
nkeyvals = sizeof(struct nstat_sysinfo_tcp_stats) /
sizeof(u_int32_t);
break;
+ case NSTAT_SYSINFO_IFNET_ECN_STATS:
+ nkeyvals = (sizeof(struct if_tcp_ecn_stat) /
+ sizeof(u_int64_t));
+ /* One less because we are not going to send timestamp */
+ nkeyvals -= 1;
+ /* Two more keys for ifnet type and proto */
+ nkeyvals += 2;
+ break;
default:
return;
}
nstat_set_keyval_scalar(&kv[i++],
NSTAT_SYSINFO_ECN_CONN_NOPL_CE,
data->u.tcp_stats.ecn_conn_nopl_ce);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_FALLBACK_SYNLOSS,
+ data->u.tcp_stats.ecn_fallback_synloss);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_FALLBACK_REORDER,
+ data->u.tcp_stats.ecn_fallback_reorder);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_FALLBACK_CE,
+ data->u.tcp_stats.ecn_fallback_ce);
nstat_set_keyval_scalar(&kv[i++],
NSTAT_SYSINFO_TFO_SYN_DATA_RCV,
data->u.tcp_stats.tfo_syn_data_rcv);
VERIFY(i == nkeyvals);
break;
}
+ case NSTAT_SYSINFO_IFNET_ECN_STATS:
+ {
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_TYPE,
+ data->u.ifnet_ecn_stats.ifnet_type);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_PROTO,
+ data->u.ifnet_ecn_stats.ifnet_proto);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_CLIENT_SETUP,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_client_setup);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_SERVER_SETUP,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_server_setup);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_CLIENT_SUCCESS,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_client_success);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_SERVER_SUCCESS,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_server_success);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_PEER_NOSUPPORT,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_peer_nosupport);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_SYN_LOST,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_syn_lost);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_SYNACK_LOST,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_synack_lost);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_RECV_CE,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_recv_ce);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_RECV_ECE,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_recv_ece);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_CONN_RECV_CE,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_conn_recv_ce);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_CONN_RECV_ECE,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_conn_recv_ece);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_CONN_PLNOCE,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_conn_plnoce);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_CONN_PLCE,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_conn_plce);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_CONN_NOPLCE,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_conn_noplce);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_FALLBACK_SYNLOSS,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_fallback_synloss);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_FALLBACK_REORDER,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_fallback_reorder);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_FALLBACK_CE,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_fallback_ce);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_ON_RTT_AVG,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_on.rtt_avg);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_ON_RTT_VAR,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_on.rtt_var);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_ON_OOPERCENT,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_on.oo_percent);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_ON_SACK_EPISODE,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_on.sack_episodes);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_ON_REORDER_PERCENT,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_on.reorder_percent);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_ON_RXMIT_PERCENT,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_on.rxmit_percent);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_ON_RXMIT_DROP,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_on.rxmit_drop);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_OFF_RTT_AVG,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_off.rtt_avg);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_OFF_RTT_VAR,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_off.rtt_var);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_OFF_OOPERCENT,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_off.oo_percent);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_OFF_SACK_EPISODE,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_off.sack_episodes);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_OFF_REORDER_PERCENT,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_off.reorder_percent);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_OFF_RXMIT_PERCENT,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_off.rxmit_percent);
+ nstat_set_keyval_scalar(&kv[i++],
+ NSTAT_SYSINFO_ECN_IFNET_OFF_RXMIT_DROP,
+ data->u.ifnet_ecn_stats.ecn_stat.ecn_off.rxmit_drop);
+ VERIFY(i == nkeyvals);
+ break;
+ }
}
if (syscnt != NULL)
{
mbuf_report_peak_usage();
tcp_report_stats();
+ nstat_ifnet_report_ecn_stats();
}
#pragma mark -- Kernel Control Socket --
,NSTAT_SYSINFO_TFO_SYN_DATA_ACKED = 41
,NSTAT_SYSINFO_TFO_SYN_LOSS = 42
,NSTAT_SYSINFO_TFO_BLACKHOLE = 43
+ ,NSTAT_SYSINFO_ECN_FALLBACK_SYNLOSS = 44
+ ,NSTAT_SYSINFO_ECN_FALLBACK_REORDER = 45
+ ,NSTAT_SYSINFO_ECN_FALLBACK_CE = 46
+ ,NSTAT_SYSINFO_ECN_IFNET_TYPE = 47
+ ,NSTAT_SYSINFO_ECN_IFNET_PROTO = 48
+ ,NSTAT_SYSINFO_ECN_IFNET_CLIENT_SETUP = 49
+ ,NSTAT_SYSINFO_ECN_IFNET_SERVER_SETUP = 50
+ ,NSTAT_SYSINFO_ECN_IFNET_CLIENT_SUCCESS = 51
+ ,NSTAT_SYSINFO_ECN_IFNET_SERVER_SUCCESS = 52
+ ,NSTAT_SYSINFO_ECN_IFNET_PEER_NOSUPPORT = 53
+ ,NSTAT_SYSINFO_ECN_IFNET_SYN_LOST = 54
+ ,NSTAT_SYSINFO_ECN_IFNET_SYNACK_LOST = 55
+ ,NSTAT_SYSINFO_ECN_IFNET_RECV_CE = 56
+ ,NSTAT_SYSINFO_ECN_IFNET_RECV_ECE = 57
+ ,NSTAT_SYSINFO_ECN_IFNET_SENT_ECE = 58
+ ,NSTAT_SYSINFO_ECN_IFNET_CONN_RECV_CE = 59
+ ,NSTAT_SYSINFO_ECN_IFNET_CONN_RECV_ECE = 60
+ ,NSTAT_SYSINFO_ECN_IFNET_CONN_PLNOCE = 61
+ ,NSTAT_SYSINFO_ECN_IFNET_CONN_PLCE = 62
+ ,NSTAT_SYSINFO_ECN_IFNET_CONN_NOPLCE = 63
+ ,NSTAT_SYSINFO_ECN_IFNET_FALLBACK_SYNLOSS = 64
+ ,NSTAT_SYSINFO_ECN_IFNET_FALLBACK_REORDER = 65
+ ,NSTAT_SYSINFO_ECN_IFNET_FALLBACK_CE = 66
+ ,NSTAT_SYSINFO_ECN_IFNET_ON_RTT_AVG = 67
+ ,NSTAT_SYSINFO_ECN_IFNET_ON_RTT_VAR = 68
+ ,NSTAT_SYSINFO_ECN_IFNET_ON_OOPERCENT = 69
+ ,NSTAT_SYSINFO_ECN_IFNET_ON_SACK_EPISODE = 70
+ ,NSTAT_SYSINFO_ECN_IFNET_ON_REORDER_PERCENT = 71
+ ,NSTAT_SYSINFO_ECN_IFNET_ON_RXMIT_PERCENT = 72
+ ,NSTAT_SYSINFO_ECN_IFNET_ON_RXMIT_DROP = 73
+ ,NSTAT_SYSINFO_ECN_IFNET_OFF_RTT_AVG = 74
+ ,NSTAT_SYSINFO_ECN_IFNET_OFF_RTT_VAR = 75
+ ,NSTAT_SYSINFO_ECN_IFNET_OFF_OOPERCENT = 76
+ ,NSTAT_SYSINFO_ECN_IFNET_OFF_SACK_EPISODE = 77
+ ,NSTAT_SYSINFO_ECN_IFNET_OFF_REORDER_PERCENT = 78
+ ,NSTAT_SYSINFO_ECN_IFNET_OFF_RXMIT_PERCENT = 79
+ ,NSTAT_SYSINFO_ECN_IFNET_OFF_RXMIT_DROP = 80
};
#pragma mark -- Network Statistics Providers --
#define NSTAT_SYSINFO_MBUF_STATS 0x0001
#define NSTAT_SYSINFO_TCP_STATS 0x0002
+#define NSTAT_SYSINFO_IFNET_ECN_STATS 0x0003
#pragma mark -- Network Statistics User Client --
u_int32_t ecn_conn_plnoce; /* Number of connections using ECN seen packet loss but never received CE */
u_int32_t ecn_conn_pl_ce; /* Number of connections using ECN seen packet loss and CE */
u_int32_t ecn_conn_nopl_ce; /* Number of connections using ECN with no packet loss but received CE */
+ u_int32_t ecn_fallback_synloss; /* Number of times we did fall back due to SYN-Loss */
+ u_int32_t ecn_fallback_reorder; /* Number of times we fallback because we detected the PAWS-issue */
+ u_int32_t ecn_fallback_ce; /* Number of times we fallback because we received too many CEs */
u_int32_t tfo_syn_data_rcv; /* Number of SYN+data received with valid cookie */
u_int32_t tfo_cookie_req_rcv;/* Number of TFO cookie-requests received */
u_int32_t tfo_cookie_sent; /* Number of TFO-cookies offered to the client */
u_int32_t tfo_blackhole; /* Number of times SYN+TFO has been lost and we fallback */
} nstat_sysinfo_tcp_stats;
+enum {
+ NSTAT_IFNET_ECN_PROTO_IPV4 = 1
+ ,NSTAT_IFNET_ECN_PROTO_IPV6
+};
+
+enum {
+ NSTAT_IFNET_ECN_TYPE_CELLULAR = 1
+ ,NSTAT_IFNET_ECN_TYPE_WIFI
+ ,NSTAT_IFNET_ECN_TYPE_ETHERNET
+};
+
+typedef struct nstat_sysinfo_ifnet_ecn_stats {
+ u_int32_t ifnet_proto;
+ u_int32_t ifnet_type;
+ struct if_tcp_ecn_stat ecn_stat;
+} nstat_sysinfo_ifnet_ecn_stats;
+
typedef struct nstat_sysinfo_data
{
u_int32_t flags;
union {
nstat_sysinfo_mbuf_stats mb_stats;
nstat_sysinfo_tcp_stats tcp_stats;
+ nstat_sysinfo_ifnet_ecn_stats ifnet_ecn_stats;
} u;
} nstat_sysinfo_data;
if (p == NULL)
return;
- if (pfs) {
+ if (pfs != NULL) {
bzero(pfs->pcounters, sizeof (pfs->pcounters));
bzero(pfs->bcounters, sizeof (pfs->bcounters));
- }
- /* just clear statistics */
- if (pfs == NULL) {
+ for (i = 0; i < 2; i++)
+ for (j = 0; j < 2; j++)
+ for (k = 0; k < 2; k++) {
+ pfs->pcounters[i][j][k] +=
+ p->pfik_packets[i][j][k];
+ pfs->bcounters[i][j] +=
+ p->pfik_bytes[i][j][k];
+ }
+ } else {
+ /* just clear statistics */
bzero(p->pfik_packets, sizeof (p->pfik_packets));
bzero(p->pfik_bytes, sizeof (p->pfik_bytes));
p->pfik_tzero = pf_calendar_time_second();
}
- for (i = 0; i < 2; i++)
- for (j = 0; j < 2; j++)
- for (k = 0; k < 2; k++) {
- pfs->pcounters[i][j][k] +=
- p->pfik_packets[i][j][k];
- pfs->bcounters[i][j] +=
- p->pfik_bytes[i][j][k];
- }
}
int
struct ifnet *gifp = NULL;
struct ip *ip;
int af, proto;
- u_int8_t otos;
+ u_int8_t otos, old_tos;
int egress_success = 0;
+ int sum;
ip = mtod(m, struct ip *);
proto = ip->ip_p;
return;
}
ip = mtod(m, struct ip *);
- if (gifp->if_flags & IFF_LINK1)
+ if (gifp->if_flags & IFF_LINK1) {
+ old_tos = ip->ip_tos;
egress_success = ip_ecn_egress(ECN_NORMAL, &otos, &ip->ip_tos);
- else
+ if (old_tos != ip->ip_tos) {
+ sum = ~ntohs(ip->ip_sum) & 0xffff;
+ sum += (~otos & 0xffff) + ip->ip_tos;
+ sum = (sum >> 16) + (sum & 0xffff);
+ sum += (sum >> 16); /* add carry */
+ ip->ip_sum = htons(~sum & 0xffff);
+ }
+ } else
egress_success = ip_ecn_egress(ECN_NOCARE, &otos, &ip->ip_tos);
break;
}
return (EINVAL);
if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT)))
wild = 1;
- socket_unlock(so, 0); /* keep reference on socket */
- lck_rw_lock_exclusive(pcbinfo->ipi_lock);
bzero(&laddr, sizeof(laddr));
+ socket_unlock(so, 0); /* keep reference on socket */
+ lck_rw_lock_exclusive(pcbinfo->ipi_lock);
+
if (nam != NULL) {
if (nam->sa_len != sizeof (struct sockaddr_in)) {
}
}
socket_lock(so, 0);
+
+ /*
+ * We unlocked socket's protocol lock for a long time.
+ * The socket might have been dropped/defuncted.
+ * Checking if world has changed since.
+ */
+ if (inp->inp_state == INPCB_STATE_DEAD) {
+ lck_rw_done(pcbinfo->ipi_lock);
+ return (ECONNABORTED);
+ }
+
if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
lck_rw_done(pcbinfo->ipi_lock);
return (EINVAL);
}
/*
- * Insert PCB onto various hash lists.
+ * @brief Insert PCB onto various hash lists.
+ *
+ * @param inp Pointer to internet protocol control block
+ * @param locked Implies if ipi_lock (protecting pcb list)
+ * is already locked or not.
+ *
+ * @return int error on failure and 0 on success
*/
int
in_pcbinshash(struct inpcb *inp, int locked)
socket_unlock(inp->inp_socket, 0);
lck_rw_lock_exclusive(pcbinfo->ipi_lock);
socket_lock(inp->inp_socket, 0);
- if (inp->inp_state == INPCB_STATE_DEAD) {
- /*
- * The socket got dropped when
- * it was unlocked
- */
- lck_rw_done(pcbinfo->ipi_lock);
- return (ECONNABORTED);
- }
}
}
+ /*
+ * This routine or its caller may have given up
+ * socket's protocol lock briefly.
+ * During that time the socket may have been dropped.
+ * Safe-guarding against that.
+ */
+ if (inp->inp_state == INPCB_STATE_DEAD) {
+ if (!locked) {
+ lck_rw_done(pcbinfo->ipi_lock);
+ }
+ return (ECONNABORTED);
+ }
+
+
#if INET6
if (inp->inp_vflag & INP_IPV6)
hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
break;
}
- VERIFY(inp->inp_state != INPCB_STATE_DEAD);
-
/*
* If none exists, malloc one and tack it on.
*/
#if IPSEC
#define in6p_sp inp_sp
#endif /* IPSEC */
+#define INP_INC_IFNET_STAT(_inp_, _stat_) { \
+ if ((_inp_)->inp_last_outifp != NULL) { \
+ if ((_inp_)->inp_vflag & INP_IPV6) { \
+ (_inp_)->inp_last_outifp->if_ipv6_stat->_stat_++;\
+ } else { \
+ (_inp_)->inp_last_outifp->if_ipv4_stat->_stat_++;\
+ }\
+ }\
+}
struct inpcbport {
LIST_ENTRY(inpcbport) phd_hash;
if (SOCK_PROTO(inp->inp_socket) == IPPROTO_TCP) {
struct tcpcb *tp = sototcpcb(inp->inp_socket);
+ /*
+ * Workaround race where inp_ppcb is NULL during
+ * socket initialization
+ */
+ if (tp == NULL)
+ continue;
+
switch (tp->t_state) {
case TCPS_CLOSED:
continue;
/* Process ECN for both normal and compatibility modes */
case ECN_NORMAL:
case ECN_COMPATIBILITY:
- if ((*outer & IPTOS_ECN_MASK) == IPTOS_ECN_CE) {
- if ((*inner & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) {
- /* Drop */
- return (0);
- } else {
- *inner |= IPTOS_ECN_CE;
- }
+ if (((*outer & IPTOS_ECN_MASK) == IPTOS_ECN_CE) &&
+ ((*inner & IPTOS_ECN_MASK) != IPTOS_ECN_NOTECT)) {
+ *inner |= IPTOS_ECN_CE;
} else if ((*outer & IPTOS_ECN_MASK) == IPTOS_ECN_ECT1 &&
(*inner & IPTOS_ECN_MASK) == IPTOS_ECN_ECT0) {
*inner = *outer;
* Enable message delivery on a socket, this feature is currently unsupported and
* is subjected to change in future.
*/
-#define TCP_ENABLE_MSGS 0x206
+#define TCP_ENABLE_MSGS 0x206
#define TCP_ADAPTIVE_WRITE_TIMEOUT 0x207 /* Write timeout used as a multiple of RTT */
-#define TCP_NOTIMEWAIT 0x208 /* Avoid going into time-wait */
+#define TCP_NOTIMEWAIT 0x208 /* Avoid going into time-wait */
#define TCP_DISABLE_BLACKHOLE_DETECTION 0x209 /* disable PMTU blackhole detection */
+#define TCP_ECN_MODE 0x210 /* fine grain control for A/B testing */
+
+#define ECN_MODE_DEFAULT 0x0 /* per interface or system wide default */
+#define ECN_MODE_ENABLE 0x1 /* force enable ECN on connection */
+#define ECN_MODE_DISABLE 0x2 /* force disable ECN on connection */
/*
* The TCP_INFO socket option is a private API and is subject to change
tcpi_tfo_cookie_req_rcv:1, /* Server received cookie-request */
tcpi_tfo_cookie_sent:1, /* Server announced cookie */
tcpi_tfo_cookie_invalid:1; /* Server received an invalid cookie */
+
+ u_int16_t tcpi_ecn_client_setup:1, /* Attempted ECN setup from client side */
+ tcpi_ecn_server_setup:1, /* Attempted ECN setup from server side */
+ tcpi_ecn_success:1, /* peer negotiated ECN */
+ tcpi_ecn_lost_syn:1, /* Lost SYN with ECN setup */
+ tcpi_ecn_lost_synack:1, /* Lost SYN-ACK with ECN setup */
+ tcpi_local_peer:1, /* Local to the host or the subnet */
+ tcpi_if_cell:1, /* Interface is cellular */
+ tcpi_if_wifi:1; /* Interface is WiFi */
+
+ u_int32_t tcpi_ecn_recv_ce; /* Packets received with CE */
+ u_int32_t tcpi_ecn_recv_cwr; /* Packets received with CWR */
+
+ u_int32_t tcpi_rcvoopack; /* out-of-order packets received */
+ u_int32_t tcpi_pawsdrop; /* segments dropped due to PAWS */
+ u_int32_t tcpi_sack_recovery_episode; /* SACK recovery episodes */
+ u_int32_t tcpi_reordered_pkts; /* packets reorderd */
+ u_int32_t tcpi_dsack_sent; /* Sent DSACK notification */
+ u_int32_t tcpi_dsack_recvd; /* Received a valid DSACK option */
+ u_int32_t tcpi_flowhash; /* Unique id for the connection */
};
struct tcp_measure_bw_burst {
struct tcp_heuristic_key th_key;
- /*
- * If tfo_cookie_loss is changed to a smaller type, it might be worth
- * checking for integer-overflow in tcp_cache_tfo_inc_loss
- */
- u_int32_t th_tfo_cookie_loss; /* The number of times a SYN+cookie has been lost */
+ char th_val_start[0]; /* Marker for memsetting to 0 */
+
+ u_int8_t th_tfo_cookie_loss; /* The number of times a SYN+cookie has been lost */
+ u_int8_t th_ecn_loss; /* The number of times a SYN+ecn has been lost */
+ u_int8_t th_ecn_aggressive; /* The number of times we did an aggressive fallback */
u_int32_t th_tfo_fallback_trials; /* Number of times we did not try out TFO due to SYN-loss */
u_int32_t th_tfo_cookie_backoff; /* Time until when we should not try out TFO */
+ u_int32_t th_ecn_backoff; /* Time until when we should not try out ECN */
- u_int8_t th_tfo_in_backoff:1, /* Are we doing TFO due to the backoff timer? */
- th_tfo_aggressive_fallback:1, /* Agressive fallback due to nasty middlebox */
+ u_int8_t th_tfo_in_backoff:1, /* Are we avoiding TFO due to the backoff timer? */
+ th_tfo_aggressive_fallback:1, /* Aggressive fallback due to nasty middlebox */
th_tfo_snd_middlebox_supp:1, /* We are sure that the network supports TFO in upstream direction */
th_tfo_rcv_middlebox_supp:1; /* We are sure that the network supports TFO in downstream direction*/
+
+ char th_val_end[0]; /* Marker for memsetting to 0 */
};
struct tcp_heuristics_head {
static lck_grp_t *tcp_heuristic_mtx_grp;
static lck_grp_attr_t *tcp_heuristic_mtx_grp_attr;
-/* Number of SYN-losses we accept */
-#define TFO_MAX_COOKIE_LOSS 2
+int tcp_ecn_timeout = 60;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, ecn_timeout, CTLFLAG_RW | CTLFLAG_LOCKED,
+ &tcp_ecn_timeout, 0, "Initial minutes to wait before re-trying ECN");
/*
* Round up to next higher power-of 2. See "Bit Twiddling Hacks".
tpheur = oldest_heur;
/* We recycle - set everything to 0 */
- tpheur->th_tfo_cookie_loss = 0;
- tpheur->th_tfo_fallback_trials = 0;
- tpheur->th_tfo_cookie_backoff = 0;
- tpheur->th_tfo_in_backoff = 0;
- tpheur->th_tfo_aggressive_fallback = 0;
- tpheur->th_tfo_snd_middlebox_supp = 0;
- tpheur->th_tfo_rcv_middlebox_supp = 0;
+ bzero(tpheur->th_val_start,
+ tpheur->th_val_end - tpheur->th_val_start);
} else {
/* Create a new heuristic and add it to the list */
tpheur = _MALLOC(sizeof(struct tcp_heuristic), M_TEMP,
SLIST_INSERT_HEAD(&head->tcp_heuristics, tpheur, list);
}
+ /*
+ * Set to tcp_now, to make sure it won't be > than tcp_now in the
+ * near future.
+ */
+ tpheur->th_ecn_backoff = tcp_now;
+ tpheur->th_tfo_cookie_backoff = tcp_now;
+
memcpy(&tpheur->th_key, &key, sizeof(key));
}
tp->t_tfo_flags |= TFO_F_NO_SNDPROBING;
}
-void tcp_heuristic_tfo_inc_loss(struct tcpcb *tp)
+void tcp_heuristic_inc_loss(struct tcpcb *tp, int tfo, int ecn)
{
struct tcp_heuristics_head *head;
struct tcp_heuristic *tpheur;
if (tpheur == NULL)
return;
- /* Potential integer overflow, but tfo_cookie_loss is 32-bits */
- tpheur->th_tfo_cookie_loss++;
+ /* Limit to 9 to prevent integer-overflow during exponential backoff */
+ if (tfo && tpheur->th_tfo_cookie_loss < 9)
+ tpheur->th_tfo_cookie_loss++;
+
+ if (ecn && tpheur->th_ecn_loss < 9) {
+ tpheur->th_ecn_loss++;
+ if (tpheur->th_ecn_loss >= ECN_MAX_SYN_LOSS) {
+ tcpstat.tcps_ecn_fallback_synloss++;
+ INP_INC_IFNET_STAT(tp->t_inpcb, ecn_fallback_synloss);
+ tpheur->th_ecn_backoff = tcp_now +
+ ((tcp_ecn_timeout * 60 * TCP_RETRANSHZ)
+ << (tpheur->th_ecn_loss - ECN_MAX_SYN_LOSS));
+ }
+ }
tcp_heuristic_unlock(head);
}
tcp_heuristic_unlock(head);
}
-void tcp_heuristic_tfo_reset_loss(struct tcpcb *tp)
+void tcp_heuristic_ecn_aggressive(struct tcpcb *tp)
+{
+ struct tcp_heuristics_head *head;
+ struct tcp_heuristic *tpheur;
+
+ tpheur = tcp_getheuristic_with_lock(tp, 1, &head);
+ if (tpheur == NULL)
+ return;
+
+ /* Must be done before, otherwise we will start off with expo-backoff */
+ tpheur->th_ecn_backoff = tcp_now +
+ ((tcp_ecn_timeout * 60 * TCP_RETRANSHZ) << (tpheur->th_ecn_aggressive));
+
+ /*
+ * Ugly way to prevent integer overflow... limit to 9 to prevent in
+ * overflow during exp. backoff.
+ */
+ if (tpheur->th_ecn_aggressive < 9)
+ tpheur->th_ecn_aggressive++;
+
+ tcp_heuristic_unlock(head);
+}
+
+void tcp_heuristic_reset_loss(struct tcpcb *tp, int tfo, int ecn)
{
struct tcp_heuristics_head *head;
struct tcp_heuristic *tpheur;
if (tpheur == NULL)
return;
- tpheur->th_tfo_cookie_loss = 0;
- tpheur->th_tfo_aggressive_fallback = 0;
+ if (tfo)
+ tpheur->th_tfo_cookie_loss = 0;
+
+ if (ecn)
+ tpheur->th_ecn_loss = 0;
tcp_heuristic_unlock(head);
}
return (true);
}
+boolean_t tcp_heuristic_do_ecn(struct tcpcb *tp)
+{
+ struct tcp_heuristics_head *head;
+ struct tcp_heuristic *tpheur;
+ boolean_t ret = true;
+
+ /* Get the tcp-heuristic. */
+ tpheur = tcp_getheuristic_with_lock(tp, 0, &head);
+ if (tpheur == NULL)
+ return ret;
+
+ if (TSTMP_GT(tpheur->th_ecn_backoff, tcp_now))
+ ret = false;
+
+ tcp_heuristic_unlock(head);
+
+ return (ret);
+}
+
static void sysctl_cleartfocache(void)
{
int i;
#include <netinet/tcp_var.h>
#include <netinet/in.h>
+/* Number of SYN-losses we accept */
+#define TFO_MAX_COOKIE_LOSS 2
+#define ECN_MAX_SYN_LOSS 2
+
+#define ECN_MIN_CE_PROBES 10 /* Probes are basically the number of incoming packets */
+#define ECN_MAX_CE_RATIO 7 /* Ratio is the maximum number of CE-packets we accept per incoming "probe" */
+
extern void tcp_cache_set_cookie(struct tcpcb *tp, u_char *cookie, u_int8_t len);
extern int tcp_cache_get_cookie(struct tcpcb *tp, u_char *cookie, u_int8_t *len);
extern unsigned int tcp_cache_get_cookie_len(struct tcpcb *tp);
-extern void tcp_heuristic_tfo_inc_loss(struct tcpcb *tp);
+extern void tcp_heuristic_inc_loss(struct tcpcb *tp, int tfo, int ecn);
extern void tcp_heuristic_tfo_snd_good(struct tcpcb *tp);
extern void tcp_heuristic_tfo_rcv_good(struct tcpcb *tp);
extern void tcp_heuristic_tfo_middlebox(struct tcpcb *tp);
-extern void tcp_heuristic_tfo_reset_loss(struct tcpcb *tp);
+extern void tcp_heuristic_ecn_aggressive(struct tcpcb *tp);
+extern void tcp_heuristic_reset_loss(struct tcpcb *tp, int tfo, int ecn);
extern void tcp_heuristic_tfo_success(struct tcpcb *tp);
extern boolean_t tcp_heuristic_do_tfo(struct tcpcb *tp);
+extern boolean_t tcp_heuristic_do_ecn(struct tcpcb *tp);
extern void tcp_cache_init(void);
th->th_seq += i;
}
}
+ tp->t_rcvoopack++;
tcpstat.tcps_rcvoopack++;
tcpstat.tcps_rcvoobyte += *tlenp;
if (nstat_collect) {
/*
* If this happens, things have gone terribly wrong. len should
- * have been check in tcp_dooptions.
+ * have been checked in tcp_dooptions.
*/
VERIFY(len <= TFO_COOKIE_LEN_MAX);
* backing of TFO-cookie requests.
*/
if (tp->t_tfo_flags & TFO_F_SYN_LOSS)
- tcp_heuristic_tfo_inc_loss(tp);
+ tcp_heuristic_inc_loss(tp, 1, 0);
else
- tcp_heuristic_tfo_reset_loss(tp);
+ tcp_heuristic_reset_loss(tp, 1, 0);
}
}
TCP_ECN_ENABLED(tp) && tlen > 0 &&
SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) {
+ tp->t_ecn_recv_ce++;
tcpstat.tcps_ecn_recv_ce++;
+ INP_INC_IFNET_STAT(inp, ecn_recv_ce);
/* Mark this connection as it received CE from network */
tp->ecn_flags |= TE_RECV_ECN_CE;
tp->ecn_flags |= TE_SENDECE;
}
-
+
/*
* Clear TE_SENDECE if TH_CWR is set. This is harmless, so we don't
* bother doing extensive checks for state and whatnot.
*/
if (thflags & TH_CWR) {
tp->ecn_flags &= ~TE_SENDECE;
+ tp->t_ecn_recv_cwr++;
}
/*
CLEAR_IAJ_STATE(tp);
}
+ if (ip_ecn == IPTOS_ECN_CE && tp->t_state == TCPS_ESTABLISHED &&
+ !TCP_ECN_ENABLED(tp) && !(tp->ecn_flags & TE_CEHEURI_SET)) {
+ tcpstat.tcps_ecn_fallback_ce++;
+ tcp_heuristic_ecn_aggressive(tp);
+ tp->ecn_flags |= TE_CEHEURI_SET;
+ }
+
+ if (tp->t_state == TCPS_ESTABLISHED && TCP_ECN_ENABLED(tp) &&
+ ip_ecn == IPTOS_ECN_CE && !(tp->ecn_flags & TE_CEHEURI_SET)) {
+ if (inp->inp_stat->rxpackets < ECN_MIN_CE_PROBES) {
+ tp->t_ecn_recv_ce_pkt++;
+ } else if (tp->t_ecn_recv_ce_pkt > ECN_MAX_CE_RATIO) {
+ tcpstat.tcps_ecn_fallback_ce++;
+ tcp_heuristic_ecn_aggressive(tp);
+ tp->ecn_flags |= TE_CEHEURI_SET;
+ INP_INC_IFNET_STAT(inp,ecn_fallback_ce);
+ } else {
+ /* We tracked the first ECN_MIN_CE_PROBES segments, we
+ * now know that the path is good.
+ */
+ tp->ecn_flags |= TE_CEHEURI_SET;
+ }
+ }
+
/*
* Try to determine if we are receiving a packet after a long time.
* Use our own approximation of idletime to roughly measure remote
* be TH_NEEDSYN.
*/
if (tp->t_state == TCPS_ESTABLISHED &&
- (thflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK|TH_ECE)) == TH_ACK &&
+ (thflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK|TH_ECE|TH_CWR)) == TH_ACK &&
((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&
((to.to_flags & TOF_TS) == 0 ||
TSTMP_GEQ(to.to_tsval, tp->ts_recent)) &&
tp->ts_recent = to.to_tsval;
}
- /* Force acknowledgment if we received a FIN */
-
- if (thflags & TH_FIN)
- tp->t_flags |= TF_ACKNOW;
-
if (tlen == 0) {
if (SEQ_GT(th->th_ack, tp->snd_una) &&
SEQ_LEQ(th->th_ack, tp->snd_max) &&
if ((thflags & (TH_ECE | TH_CWR)) == (TH_ECE)) {
/* ECN-setup SYN-ACK */
tp->ecn_flags |= TE_SETUPRECEIVED;
- if (TCP_ECN_ENABLED(tp))
+ if (TCP_ECN_ENABLED(tp)) {
+ tcp_heuristic_reset_loss(tp, 0, 1);
tcpstat.tcps_ecn_client_success++;
+ }
} else {
if (tp->ecn_flags & TE_SETUPSENT &&
- tp->t_rxtshift == 0)
+ tp->t_rxtshift == 0) {
+ tcp_heuristic_reset_loss(tp, 0, 1);
tcpstat.tcps_ecn_not_supported++;
+ }
+ if (tp->ecn_flags & TE_SETUPSENT &&
+ tp->t_rxtshift > 0)
+ tcp_heuristic_inc_loss(tp, 0, 1);
+
/* non-ECN-setup SYN-ACK */
tp->ecn_flags &= ~TE_SENDIPECT;
}
} else {
tcpstat.tcps_rcvduppack++;
tcpstat.tcps_rcvdupbyte += tlen;
+ tp->t_pawsdrop++;
tcpstat.tcps_pawsdrop++;
+
+ /*
+ * PAWS-drop when ECN is being used? That indicates
+ * that ECT-marked packets take a different path, with
+ * different congestion-characteristics.
+ *
+ * Only fallback when we did send less than 2GB as PAWS
+ * really has no reason to kick in earlier.
+ */
+ if (TCP_ECN_ENABLED(tp) &&
+ inp->inp_stat->rxbytes < 2147483648) {
+ INP_INC_IFNET_STAT(inp, ecn_fallback_reorder);
+ tcpstat.tcps_ecn_fallback_reorder++;
+ tcp_heuristic_ecn_aggressive(tp);
+ }
+
if (nstat_collect) {
nstat_route_rx(tp->t_inpcb->inp_route.ro_rt,
1, tlen, NSTAT_RX_FLAG_DUPLICATE);
if (SACK_ENABLED(tp)) {
tcpstat.tcps_sack_recovery_episode++;
+ tp->t_sack_recovery_episode++;
tp->sack_newdata = tp->snd_nxt;
tp->snd_cwnd = tp->t_maxseg;
tp->t_flagsext &=
* ECE atleast once
*/
tp->ecn_flags |= TE_RECV_ECN_ECE;
+ INP_INC_IFNET_STAT(inp, ecn_recv_ece);
tcpstat.tcps_ecn_recv_ece++;
tcp_ccdbg_trace(tp, th, TCP_CC_ECN_RCVD);
}
tp->t_flags |= TF_DELACK;
tp->t_timer[TCPT_DELACK] = OFFSET_FROM_START(tp, tcp_delack);
}
- }
- else {
+ } else {
tp->t_flags |= TF_ACKNOW;
}
tp->rcv_nxt++;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_RW | CTLFLAG_LOCKED,
&tcp_do_tso, 0, "Enable TCP Segmentation Offload");
+static int
+sysctl_change_ecn_setting SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+ int i, err = 0, changed = 0;
+ struct ifnet *ifp;
+
+ err = sysctl_io_number(req, tcp_ecn_outbound, sizeof(int32_t),
+ &i, &changed);
+ if (err != 0 || req->newptr == USER_ADDR_NULL)
+ return(err);
+
+ if (changed) {
+ if ((tcp_ecn_outbound == 0 || tcp_ecn_outbound == 1) &&
+ (i == 0 || i == 1)) {
+ tcp_ecn_outbound = i;
+ return(err);
+ }
+ if (tcp_ecn_outbound == 2 && (i == 0 || i == 1)) {
+ /*
+ * Reset ECN enable flags on non-cellular
+ * interfaces so that the system default will take
+ * over
+ */
+ ifnet_head_lock_shared();
+ TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
+ if (!IFNET_IS_CELLULAR(ifp)) {
+ ifnet_lock_exclusive(ifp);
+ ifp->if_eflags &= ~IFEF_ECN_DISABLE;
+ ifp->if_eflags &= ~IFEF_ECN_ENABLE;
+ ifnet_lock_done(ifp);
+ }
+ }
+ ifnet_head_done();
+ } else {
+ /*
+ * Set ECN enable flags on non-cellular
+ * interfaces
+ */
+ ifnet_head_lock_shared();
+ TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
+ if (!IFNET_IS_CELLULAR(ifp)) {
+ ifnet_lock_exclusive(ifp);
+ ifp->if_eflags |= IFEF_ECN_ENABLE;
+ ifp->if_eflags &= ~IFEF_ECN_DISABLE;
+ ifnet_lock_done(ifp);
+ }
+ }
+ ifnet_head_done();
+ }
+ tcp_ecn_outbound = i;
+ }
+ /* Change the other one too as the work is done */
+ if (i == 2 || tcp_ecn_inbound == 2)
+ tcp_ecn_inbound = i;
+ return (err);
+}
+
int tcp_ecn_outbound = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, ecn_initiate_out,
- CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_ecn_outbound, 0,
- "Initiate ECN for outbound connections");
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, ecn_initiate_out,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_ecn_outbound, 0,
+ sysctl_change_ecn_setting, "IU",
+ "Initiate ECN for outbound connections");
int tcp_ecn_inbound = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, ecn_negotiate_in,
- CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_ecn_inbound, 0,
- "Allow ECN negotiation for inbound connections");
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, ecn_negotiate_in,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_ecn_inbound, 0,
+ sysctl_change_ecn_setting, "IU",
+ "Initiate ECN for inbound connections");
int tcp_packet_chaining = 50;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, packetchain,
(tp->t_flagsext & TF_FASTOPEN)));
}
+void
+tcp_set_ecn(struct tcpcb *tp, struct ifnet *ifp)
+{
+ boolean_t inbound;
+
+ /*
+ * Socket option has precedence
+ */
+ if (tp->ecn_flags & TE_ECN_MODE_ENABLE) {
+ tp->ecn_flags |= TE_ENABLE_ECN;
+ goto check_heuristic;
+ }
+
+ if (tp->ecn_flags & TE_ECN_MODE_DISABLE) {
+ tp->ecn_flags &= ~TE_ENABLE_ECN;
+ return;
+ }
+ /*
+ * Per interface setting comes next
+ */
+ if (ifp != NULL) {
+ if (ifp->if_eflags & IFEF_ECN_ENABLE) {
+ tp->ecn_flags |= TE_ENABLE_ECN;
+ goto check_heuristic;
+ }
+
+ if (ifp->if_eflags & IFEF_ECN_DISABLE) {
+ tp->ecn_flags &= ~TE_ENABLE_ECN;
+ return;
+ }
+ }
+ /*
+ * System wide settings come last
+ */
+ inbound = (tp->t_inpcb->inp_socket->so_head != NULL);
+ if ((inbound && tcp_ecn_inbound == 1) ||
+ (!inbound && tcp_ecn_outbound == 1)) {
+ tp->ecn_flags |= TE_ENABLE_ECN;
+ goto check_heuristic;
+ } else {
+ tp->ecn_flags &= ~TE_ENABLE_ECN;
+ }
+
+ return;
+
+check_heuristic:
+ if (!tcp_heuristic_do_ecn(tp))
+ tp->ecn_flags &= ~TE_ENABLE_ECN;
+}
+
/*
* Tcp output routine: figure out what should be sent and send it.
*
if ((ifp = rt->rt_ifp) != NULL) {
somultipages(so, (ifp->if_hwassist & IFNET_MULTIPAGES));
tcp_set_tso(tp, ifp);
- soif2kcl(so,
- (ifp->if_eflags & IFEF_2KCL));
+ soif2kcl(so, (ifp->if_eflags & IFEF_2KCL));
+ tcp_set_ecn(tp, ifp);
}
if (rt->rt_flags & RTF_UP)
RT_GENID_SYNC(rt);
*lp++ = htonl(tp->t_dsack_lseq);
*lp++ = htonl(tp->t_dsack_rseq);
tcpstat.tcps_dsack_sent++;
+ tp->t_dsack_sent++;
nsack--;
}
VERIFY(nsack == 0 || tp->rcv_numsacks >= nsack);
*
* For a SYN-ACK, send an ECN setup SYN-ACK
*/
- if ((tcp_ecn_inbound || (tp->t_flags & TF_ENABLE_ECN))
- && (flags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) {
+ if ((flags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK) &&
+ (tp->ecn_flags & TE_ENABLE_ECN)) {
if (tp->ecn_flags & TE_SETUPRECEIVED) {
if (tcp_send_ecn_flags_on_syn(tp, so)) {
/*
if (tp->ecn_flags & TE_SETUPSENT) {
tcpstat.tcps_ecn_lost_synack++;
tcpstat.tcps_ecn_server_success--;
+ tp->ecn_flags |= TE_LOST_SYNACK;
}
tp->ecn_flags &=
TE_SENDCWR);
}
}
- } else if ((tcp_ecn_outbound || (tp->t_flags & TF_ENABLE_ECN))
- && (flags & (TH_SYN | TH_ACK)) == TH_SYN) {
+ } else if ((flags & (TH_SYN | TH_ACK)) == TH_SYN &&
+ (tp->ecn_flags & TE_ENABLE_ECN)) {
if (tcp_send_ecn_flags_on_syn(tp, so)) {
/*
* Setting TH_ECE and TH_CWR makes this an
*/
flags |= (TH_ECE | TH_CWR);
tcpstat.tcps_ecn_client_setup++;
+ tp->ecn_flags |= TE_CLIENT_SETUP;
/*
* Record that we sent the ECN-setup and default to
* Fall back to non-ECN and clear flag indicating
* we should send data with IP ECT set.
*/
- if (tp->ecn_flags & TE_SETUPSENT)
+ if (tp->ecn_flags & TE_SETUPSENT) {
tcpstat.tcps_ecn_lost_syn++;
+ tp->ecn_flags |= TE_LOST_SYN;
+ }
tp->ecn_flags &= ~TE_SENDIPECT;
}
}
tcp_rxtseg_insert(tp, tp->snd_nxt,
(tp->snd_nxt + len - 1));
}
- m->m_pkthdr.pkt_flags |= PKTF_TCP_REXMT;
+ if (len > 0)
+ m->m_pkthdr.pkt_flags |=
+ PKTF_TCP_REXMT;
}
} else {
th->th_seq = htonl(tp->snd_max);
tcp_rxtseg_insert(tp, p->rxmit, (p->rxmit + len - 1));
p->rxmit += len;
tp->sackhint.sack_bytes_rexmit += len;
- m->m_pkthdr.pkt_flags |= PKTF_TCP_REXMT;
+ if (len > 0)
+ m->m_pkthdr.pkt_flags |= PKTF_TCP_REXMT;
}
th->th_ack = htonl(tp->rcv_nxt);
tp->last_ack_sent = tp->rcv_nxt;
}
tcpstat.tcps_reordered_pkts++;
+ tp->t_reordered_pkts++;
VERIFY(SEQ_GEQ(snd_fack, s->rxmit));
to->to_nsacks--;
to->to_sacks += TCPOLEN_SACK;
tcpstat.tcps_dsack_recvd++;
+ tp->t_dsack_recvd++;
/* ignore DSACK option, if DSACK is disabled */
if (tp->t_flagsext & TF_DISABLE_DSACK)
}
}
+static inline void
+tcp_update_ecn_perf_stats(struct tcpcb *tp,
+ struct if_tcp_ecn_perf_stat *stat)
+{
+ u_int64_t curval, oldval;
+ struct inpcb *inp = tp->t_inpcb;
+
+ /* Average RTT */
+ curval = (tp->t_srtt >> TCP_RTT_SHIFT);
+ if (curval > 0 && tp->t_rttupdated >= 16) {
+ if (stat->rtt_avg == 0) {
+ stat->rtt_avg = curval;
+ } else {
+ oldval = stat->rtt_avg;
+ stat->rtt_avg =
+ ((oldval << 4) - oldval + curval) >> 4;
+ }
+ }
+
+ /* RTT variance */
+ curval = tp->t_rttvar >> TCP_RTTVAR_SHIFT;
+ if (curval > 0 && tp->t_rttupdated >= 16) {
+ if (stat->rtt_var == 0) {
+ stat->rtt_var = curval;
+ } else {
+ oldval = stat->rtt_var;
+ stat->rtt_var =
+ ((oldval << 4) - oldval + curval) >> 4;
+ }
+ }
+
+ /* Percentage of Out-of-order packets, shift by 10 for precision */
+ curval = (tp->t_rcvoopack << 10);
+ if (inp->inp_stat != NULL && inp->inp_stat->rxpackets > 0 &&
+ curval > 0) {
+ /* Compute percentage */
+ curval = (curval * 100)/inp->inp_stat->rxpackets;
+ if (stat->oo_percent == 0) {
+ stat->oo_percent = curval;
+ } else {
+ oldval = stat->oo_percent;
+ stat->oo_percent =
+ ((oldval << 4) - oldval + curval) >> 4;
+ }
+ }
+
+ /* Total number of SACK recovery episodes */
+ stat->sack_episodes += tp->t_sack_recovery_episode;
+
+ /* Percentage of reordered packets, shift by 10 for precision */
+ curval = tp->t_reordered_pkts + tp->t_pawsdrop + tp->t_dsack_sent +
+ tp->t_dsack_recvd;
+ curval = curval << 10;
+ if (inp->inp_stat != NULL && (inp->inp_stat->rxpackets > 0 ||
+ inp->inp_stat->txpackets > 0) && curval > 0) {
+ /* Compute percentage */
+ curval = (curval * 100) /
+ (inp->inp_stat->rxpackets + inp->inp_stat->txpackets);
+ if (stat->reorder_percent == 0) {
+ stat->reorder_percent = curval;
+ } else {
+ oldval = stat->reorder_percent;
+ stat->reorder_percent =
+ ((oldval << 4) - oldval + curval) >> 4;
+ }
+ }
+
+ /* Percentage of retransmit bytes, shift by 10 for precision */
+ curval = tp->t_stat.txretransmitbytes << 10;
+ if (inp->inp_stat != NULL && inp->inp_stat->txbytes > 0
+ && curval > 0) {
+ curval = (curval * 100) / inp->inp_stat->txbytes;
+ if (stat->rxmit_percent == 0) {
+ stat->rxmit_percent = curval;
+ } else {
+ oldval = stat->rxmit_percent;
+ stat->rxmit_percent =
+ ((oldval << 4) - oldval + curval) >> 4;
+ }
+ }
+ return;
+}
+
/*
* Close a TCP control block:
* discard all space held by the tcp
/* free the reassembly queue, if any */
(void) tcp_freeq(tp);
+
+ /* Collect ECN related statistics */
+ if (tp->ecn_flags & TE_SETUPSENT) {
+ if (tp->ecn_flags & TE_CLIENT_SETUP) {
+ INP_INC_IFNET_STAT(inp, ecn_client_setup);
+ if (TCP_ECN_ENABLED(tp)) {
+ INP_INC_IFNET_STAT(inp,
+ ecn_client_success);
+ } else if (tp->ecn_flags & TE_LOST_SYN) {
+ INP_INC_IFNET_STAT(inp, ecn_syn_lost);
+ } else {
+ INP_INC_IFNET_STAT(inp,
+ ecn_peer_nosupport);
+ }
+ } else {
+ INP_INC_IFNET_STAT(inp, ecn_server_setup);
+ if (TCP_ECN_ENABLED(tp)) {
+ INP_INC_IFNET_STAT(inp,
+ ecn_server_success);
+ } else if (tp->ecn_flags & TE_LOST_SYNACK) {
+ INP_INC_IFNET_STAT(inp,
+ ecn_synack_lost);
+ } else {
+ INP_INC_IFNET_STAT(inp,
+ ecn_peer_nosupport);
+ }
+ }
+ }
if (TCP_ECN_ENABLED(tp)) {
- if (tp->ecn_flags & TE_RECV_ECN_CE)
+ if (tp->ecn_flags & TE_RECV_ECN_CE) {
tcpstat.tcps_ecn_conn_recv_ce++;
- if (tp->ecn_flags & TE_RECV_ECN_ECE)
+ INP_INC_IFNET_STAT(inp, ecn_conn_recv_ce);
+ }
+ if (tp->ecn_flags & TE_RECV_ECN_ECE) {
tcpstat.tcps_ecn_conn_recv_ece++;
+ INP_INC_IFNET_STAT(inp, ecn_conn_recv_ece);
+ }
if (tp->ecn_flags & (TE_RECV_ECN_CE | TE_RECV_ECN_ECE)) {
if (tp->t_stat.txretransmitbytes > 0 ||
- tp->t_stat.rxoutoforderbytes > 0)
+ tp->t_stat.rxoutoforderbytes > 0) {
tcpstat.tcps_ecn_conn_pl_ce++;
- else
+ INP_INC_IFNET_STAT(inp, ecn_conn_plce);
+ } else {
tcpstat.tcps_ecn_conn_nopl_ce++;
+ INP_INC_IFNET_STAT(inp, ecn_conn_noplce);
+ }
} else {
if (tp->t_stat.txretransmitbytes > 0 ||
- tp->t_stat.rxoutoforderbytes > 0)
+ tp->t_stat.rxoutoforderbytes > 0) {
tcpstat.tcps_ecn_conn_plnoce++;
+ INP_INC_IFNET_STAT(inp, ecn_conn_plnoce);
+ }
+ }
+
+ }
+
+ /* Aggregate performance stats */
+ if (inp->inp_last_outifp != NULL) {
+ struct ifnet *ifp = inp->inp_last_outifp;
+ ifnet_lock_shared(ifp);
+ if ((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) ==
+ IFRF_ATTACHED) {
+ if (inp->inp_vflag & INP_IPV6) {
+ if (TCP_ECN_ENABLED(tp)) {
+ ifp->if_ipv6_stat->timestamp
+ = net_uptime();
+ tcp_update_ecn_perf_stats(tp,
+ &ifp->if_ipv6_stat->ecn_on);
+ } else {
+ ifp->if_ipv6_stat->timestamp
+ = net_uptime();
+ tcp_update_ecn_perf_stats(tp,
+ &ifp->if_ipv6_stat->ecn_off);
+ }
+ } else {
+ if (TCP_ECN_ENABLED(tp)) {
+ ifp->if_ipv4_stat->timestamp
+ = net_uptime();
+ tcp_update_ecn_perf_stats(tp,
+ &ifp->if_ipv4_stat->ecn_on);
+ } else {
+ ifp->if_ipv4_stat->timestamp
+ = net_uptime();
+ tcp_update_ecn_perf_stats(tp,
+ &ifp->if_ipv4_stat->ecn_off);
+ }
+ }
}
+ ifnet_lock_done(ifp);
}
tcp_free_sackholes(tp);
tcp_set_tso(tp, rt->rt_ifp);
soif2kcl(inp->inp_socket,
(rt->rt_ifp->if_eflags & IFEF_2KCL));
+ tcp_set_ecn(tp, rt->rt_ifp);
}
/* Note if the peer is local */
tcp_set_tso(tp, rt->rt_ifp);
soif2kcl(inp->inp_socket,
(rt->rt_ifp->if_eflags & IFEF_2KCL));
+ tcp_set_ecn(tp, rt->rt_ifp);
}
/* Note if the peer is local */
u_int32_t tcps_ecn_conn_plnoce;
u_int32_t tcps_ecn_conn_pl_ce;
u_int32_t tcps_ecn_conn_nopl_ce;
+ u_int32_t tcps_ecn_fallback_synloss;
+ u_int32_t tcps_ecn_fallback_reorder;
+ u_int32_t tcps_ecn_fallback_ce;
/* TFO-related statistics */
u_int32_t tcps_tfo_syn_data_rcv;
} else {
tcpstat.tcps_timeoutdrop++;
}
+ if (tp->t_rxtshift >= TCP_MAXRXTSHIFT) {
+ if (TCP_ECN_ENABLED(tp)) {
+ INP_INC_IFNET_STAT(tp->t_inpcb,
+ ecn_on.rxmit_drop);
+ } else {
+ INP_INC_IFNET_STAT(tp->t_inpcb,
+ ecn_off.rxmit_drop);
+ }
+ }
tp->t_rxtshift = TCP_MAXRXTSHIFT;
postevent(so, 0, EV_TIMEOUT);
soevent(so,
tp->t_timer[TCPT_REXMT] = 0;
tcpstat.tcps_sack_recovery_episode++;
+ tp->t_sack_recovery_episode++;
tp->sack_newdata = tp->snd_nxt;
tp->snd_cwnd = tp->t_maxseg;
tcp_ccdbg_trace(tp, NULL, TCP_CC_ENTER_FASTRECOVERY);
&prev.tcps_ecn_conn_pl_ce, &stat.ecn_conn_pl_ce);
tcp_cumulative_stat(tcpstat.tcps_ecn_conn_nopl_ce,
&prev.tcps_ecn_conn_nopl_ce, &stat.ecn_conn_nopl_ce);
+ tcp_cumulative_stat(tcpstat.tcps_ecn_fallback_synloss,
+ &prev.tcps_ecn_fallback_synloss, &stat.ecn_fallback_synloss);
+ tcp_cumulative_stat(tcpstat.tcps_ecn_fallback_reorder,
+ &prev.tcps_ecn_fallback_reorder, &stat.ecn_fallback_reorder);
+ tcp_cumulative_stat(tcpstat.tcps_ecn_fallback_ce,
+ &prev.tcps_ecn_fallback_ce, &stat.ecn_fallback_ce);
tcp_cumulative_stat(tcpstat.tcps_tfo_syn_data_rcv,
&prev.tcps_tfo_syn_data_rcv, &stat.tfo_syn_data_rcv);
tcp_cumulative_stat(tcpstat.tcps_tfo_cookie_req_rcv,
} else {
error = ENETDOWN;
}
+
+ /* Disable PRECONNECT_DATA, as we don't need to send a SYN anymore. */
+ so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
return error;
}
#endif /* FLOW_DIVERT */
bzero(ti, sizeof(*ti));
ti->tcpi_state = tp->t_state;
-
+ ti->tcpi_flowhash = inp->inp_flowhash;
+
if (tp->t_state > TCPS_LISTEN) {
if (TSTMP_SUPPORTED(tp))
ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
ti->tcpi_snd_wscale = tp->snd_scale;
ti->tcpi_rcv_wscale = tp->rcv_scale;
}
+ if (TCP_ECN_ENABLED(tp))
+ ti->tcpi_options |= TCPI_OPT_ECN;
/* Are we in retranmission episode */
if (IN_FASTRECOVERY(tp) || tp->t_rxtshift > 0)
ti->tcpi_tfo_syn_data_sent = !!(tp->t_tfo_stats & TFO_S_SYN_DATA_SENT);
ti->tcpi_tfo_syn_data_acked = !!(tp->t_tfo_stats & TFO_S_SYN_DATA_ACKED);
ti->tcpi_tfo_syn_loss = !!(tp->t_tfo_stats & TFO_S_SYN_LOSS);
+
+ ti->tcpi_ecn_client_setup = !!(tp->ecn_flags & TE_SETUPSENT);
+ ti->tcpi_ecn_server_setup = !!(tp->ecn_flags & TE_SETUPRECEIVED);
+ ti->tcpi_ecn_success = (tp->ecn_flags & TE_ECN_ON) == TE_ECN_ON ? 1 : 0;
+ ti->tcpi_ecn_lost_syn = !!(tp->ecn_flags & TE_LOST_SYN);
+ ti->tcpi_ecn_lost_synack = !!(tp->ecn_flags & TE_LOST_SYNACK);
+
+ ti->tcpi_local_peer = !!(tp->t_flags & TF_LOCAL);
+
+ if (tp->t_inpcb->inp_last_outifp != NULL) {
+ if (IFNET_IS_CELLULAR(tp->t_inpcb->inp_last_outifp))
+ ti->tcpi_if_cell = 1;
+ else if (IFNET_IS_WIFI(tp->t_inpcb->inp_last_outifp))
+ ti->tcpi_if_wifi = 1;
+ }
+
+ ti->tcpi_ecn_recv_ce = tp->t_ecn_recv_ce;
+ ti->tcpi_ecn_recv_cwr = tp->t_ecn_recv_cwr;
+
+ ti->tcpi_rcvoopack = tp->t_rcvoopack;
+ ti->tcpi_pawsdrop = tp->t_pawsdrop;
+ ti->tcpi_sack_recovery_episode = tp->t_sack_recovery_episode;
+ ti->tcpi_reordered_pkts = tp->t_reordered_pkts;
+ ti->tcpi_dsack_sent = tp->t_dsack_sent;
+ ti->tcpi_dsack_recvd = tp->t_dsack_recvd;
}
}
case TCP_NODELAY:
case TCP_NOOPT:
case TCP_NOPUSH:
- case TCP_ENABLE_ECN:
error = sooptcopyin(sopt, &optval, sizeof optval,
sizeof optval);
if (error)
case TCP_NOPUSH:
opt = TF_NOPUSH;
break;
- case TCP_ENABLE_ECN:
- opt = TF_ENABLE_ECN;
- break;
default:
opt = 0; /* dead code to fool gcc */
break;
else
tcp_disable_tfo(tp);
break;
+ case TCP_ENABLE_ECN:
+ error = sooptcopyin(sopt, &optval, sizeof optval,
+ sizeof optval);
+ if (error)
+ break;
+ if (optval) {
+ tp->ecn_flags |= TE_ECN_MODE_ENABLE;
+ tp->ecn_flags &= ~TE_ECN_MODE_DISABLE;
+ } else {
+ tp->ecn_flags &= ~TE_ECN_MODE_ENABLE;
+ }
+ break;
+ case TCP_ECN_MODE:
+ error = sooptcopyin(sopt, &optval, sizeof optval,
+ sizeof optval);
+ if (error)
+ break;
+ if (optval == ECN_MODE_DEFAULT) {
+ tp->ecn_flags &= ~TE_ECN_MODE_ENABLE;
+ tp->ecn_flags &= ~TE_ECN_MODE_DISABLE;
+ } else if (optval == ECN_MODE_ENABLE) {
+ tp->ecn_flags |= TE_ECN_MODE_ENABLE;
+ tp->ecn_flags &= ~TE_ECN_MODE_DISABLE;
+ } else if (optval == ECN_MODE_DISABLE) {
+ tp->ecn_flags &= ~TE_ECN_MODE_ENABLE;
+ tp->ecn_flags |= TE_ECN_MODE_DISABLE;
+ } else {
+ error = EINVAL;
+ }
+ break;
case SO_FLUSH:
if ((error = sooptcopyin(sopt, &optval, sizeof (optval),
sizeof (optval))) != 0)
optval = tp->t_flags & TF_NOPUSH;
break;
case TCP_ENABLE_ECN:
- optval = (tp->t_flags & TF_ENABLE_ECN) ? 1 : 0;
+ optval = (tp->ecn_flags & TE_ECN_MODE_ENABLE) ? 1 : 0;
+ break;
+ case TCP_ECN_MODE:
+ if (tp->ecn_flags & TE_ECN_MODE_ENABLE)
+ optval = ECN_MODE_ENABLE;
+ else if (tp->ecn_flags & TE_ECN_MODE_DISABLE)
+ optval = ECN_MODE_DISABLE;
+ else
+ optval = ECN_MODE_DEFAULT;
break;
case TCP_CONNECTIONTIMEOUT:
optval = tp->t_keepinit / TCP_RETRANSHZ;
#define TF_WASFRECOVERY 0x400000 /* was in NewReno Fast Recovery */
#define TF_SIGNATURE 0x800000 /* require MD5 digests (RFC2385) */
#define TF_MAXSEGSNT 0x1000000 /* last segment sent was a full segment */
-#define TF_ENABLE_ECN 0x2000000 /* Enable ECN */
#define TF_PMTUD 0x4000000 /* Perform Path MTU Discovery for this connection */
#define TF_CLOSING 0x8000000 /* pending tcp close */
#define TF_TSO 0x10000000 /* TCP Segment Offloading is enable on this connection */
u_int32_t rcv_by_unackwin; /* bytes seen during the last ack-stretching win */
u_int32_t rcv_nostrack_ts; /* timestamp when stretch ack was disabled automatically */
u_int16_t rcv_waitforss; /* wait for packets during slow-start */
- u_int16_t ecn_flags;
-#define TE_SETUPSENT 0x01 /* Indicate we have sent ECN-SETUP SYN or SYN-ACK */
-#define TE_SETUPRECEIVED 0x02 /* Indicate we have received ECN-SETUP SYN or SYN-ACK */
-#define TE_SENDIPECT 0x04 /* Indicate we haven't sent or received non-ECN-setup SYN or SYN-ACK */
-#define TE_SENDCWR 0x08 /* Indicate that the next non-retransmit should have the TCP CWR flag set */
-#define TE_SENDECE 0x10 /* Indicate that the next packet should have the TCP ECE flag set */
-#define TE_INRECOVERY 0x20 /* connection entered recovery after receiving ECE */
-#define TE_RECV_ECN_CE 0x40 /* Received IPTOS_ECN_CE marking atleast once */
-#define TE_RECV_ECN_ECE 0x80 /* Received ECE marking atleast once */
-#define TE_ECN_ON (TE_SETUPSENT | TE_SETUPRECEIVED) /* Indicate ECN was successfully negotiated on a connection) */
+
+/* ECN stats */
+ u_int16_t ecn_flags;
+#define TE_SETUPSENT 0x0001 /* Indicate we have sent ECN-SETUP SYN or SYN-ACK */
+#define TE_SETUPRECEIVED 0x0002 /* Indicate we have received ECN-SETUP SYN or SYN-ACK */
+#define TE_SENDIPECT 0x0004 /* Indicate we haven't sent or received non-ECN-setup SYN or SYN-ACK */
+#define TE_SENDCWR 0x0008 /* Indicate that the next non-retransmit should have the TCP CWR flag set */
+#define TE_SENDECE 0x0010 /* Indicate that the next packet should have the TCP ECE flag set */
+#define TE_INRECOVERY 0x0020 /* connection entered recovery after receiving ECE */
+#define TE_RECV_ECN_CE 0x0040 /* Received IPTOS_ECN_CE marking atleast once */
+#define TE_RECV_ECN_ECE 0x0080 /* Received ECE marking atleast once */
+#define TE_LOST_SYN 0x0100 /* Lost SYN with ECN setup */
+#define TE_LOST_SYNACK 0x0200 /* Lost SYN-ACK with ECN setup */
+#define TE_ECN_MODE_ENABLE 0x0400 /* Option ECN mode set to enable */
+#define TE_ECN_MODE_DISABLE 0x0800 /* Option ECN mode set to disable */
+#define TE_ENABLE_ECN 0x1000 /* Enable negotiation of ECN */
+#define TE_ECN_ON (TE_SETUPSENT | TE_SETUPRECEIVED) /* Indicate ECN was successfully negotiated on a connection) */
+#define TE_CEHEURI_SET 0x2000 /* We did our CE-probing at the beginning */
+#define TE_CLIENT_SETUP 0x4000 /* setup from client side */
+
+ u_int32_t t_ecn_recv_ce; /* Received CE from the network */
+ u_int32_t t_ecn_recv_cwr; /* Packets received with CWR */
+ u_int8_t t_ecn_recv_ce_pkt; /* Received packet with CE-bit set (independent from last_ack_sent) */
/* state for bad retransmit recovery */
u_int32_t snd_cwnd_prev; /* cwnd prior to retransmit */
uint32_t rtt_count; /* Number of RTT samples in recent base history */
uint32_t bg_ssthresh; /* Slow start threshold until delay increases */
uint32_t t_flagsext; /* Another field to accommodate more flags */
-#define TF_RXTFINDROP 0x1 /* Drop conn after retransmitting FIN 3 times */
+#define TF_RXTFINDROP 0x1 /* Drop conn after retransmitting FIN 3 times */
#define TF_RCVUNACK_WAITSS 0x2 /* set when the receiver should not stretch acks */
#define TF_BWMEAS_INPROGRESS 0x4 /* Indicate BW meas is happening */
#define TF_MEASURESNDBW 0x8 /* Measure send bw on this connection */
#define TFO_PROBE_PROBING 1 /* Sending out TCP-keepalives waiting for reply */
#define TFO_PROBE_WAIT_DATA 2 /* Received reply, waiting for data */
u_int8_t t_tfo_probe_state;
+
+ u_int32_t t_rcvoopack; /* out-of-order packets received */
+ u_int32_t t_pawsdrop; /* segments dropped due to PAWS */
+ u_int32_t t_sack_recovery_episode; /* SACK recovery episodes */
+ u_int32_t t_reordered_pkts; /* packets reorderd */
+ u_int32_t t_dsack_sent; /* Sent DSACK notification */
+ u_int32_t t_dsack_recvd; /* Received a valid DSACK option */
};
#define IN_FASTRECOVERY(tp) (tp->t_flags & TF_FASTRECOVERY)
u_int32_t tcps_ecn_conn_plnoce; /* Number of connections that received no CE and sufferred packet loss */
u_int32_t tcps_ecn_conn_pl_ce; /* Number of connections that received CE and sufferred packet loss */
u_int32_t tcps_ecn_conn_nopl_ce; /* Number of connections that received CE and sufferred no packet loss */
+ u_int32_t tcps_ecn_fallback_synloss; /* Number of times we did fall back due to SYN-Loss */
+ u_int32_t tcps_ecn_fallback_reorder; /* Number of times we fallback because we detected the PAWS-issue */
+ u_int32_t tcps_ecn_fallback_ce; /* Number of times we fallback because we received too many CEs */
/* TFO-related statistics */
u_int32_t tcps_tfo_syn_data_rcv; /* Received a SYN+data with valid cookie */
u_int32_t tcps_tfo_blackhole; /* TFO got blackholed by a middlebox. */
};
+
struct tcpstat_local {
u_int64_t badformat;
u_int64_t unspecv6;
void tcp_free_sackholes(struct tcpcb *tp);
int32_t tcp_sbspace(struct tcpcb *tp);
void tcp_set_tso(struct tcpcb *tp, struct ifnet *ifp);
+void tcp_set_ecn(struct tcpcb *tp, struct ifnet *ifp);
void tcp_reset_stretch_ack(struct tcpcb *tp);
extern void tcp_get_ports_used(u_int32_t, int, u_int32_t, bitstr_t *);
uint32_t tcp_count_opportunistic(unsigned int ifindex, u_int32_t flags);
* XXX more sanity checks
* XXX relationship with gif?
*/
- u_int8_t tos;
-
+ u_int8_t tos, otos;
+ int sum;
+
if (ifamily == AF_INET6) {
ipseclog((LOG_NOTICE, "ipsec tunnel protocol mismatch "
"in IPv4 AH input: %s\n", ipsec_logsastr(sav)));
}
}
ip = mtod(m, struct ip *);
+ otos = ip->ip_tos;
/* ECN consideration. */
if (ip_ecn_egress(ip4_ipsec_ecn, &tos, &ip->ip_tos) == 0) {
IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
goto fail;
}
+
+ if (otos != ip->ip_tos) {
+ sum = ~ntohs(ip->ip_sum) & 0xffff;
+ sum += (~otos & 0xffff) + ip->ip_tos;
+ sum = (sum >> 16) + (sum & 0xffff);
+ sum += (sum >> 16); /* add carry */
+ ip->ip_sum = htons(~sum & 0xffff);
+ }
+
if (!key_checktunnelsanity(sav, AF_INET,
(caddr_t)&ip->ip_src, (caddr_t)&ip->ip_dst)) {
ipseclog((LOG_NOTICE, "ipsec tunnel address mismatch "
* XXX more sanity checks
* XXX relationship with gif?
*/
- u_int8_t tos;
+ u_int8_t tos, otos;
+ int sum;
tos = ip->ip_tos;
m_adj(m, off + esplen + ivlen);
}
ip = mtod(m, struct ip *);
/* ECN consideration. */
+
+ otos = ip->ip_tos;
if (ip_ecn_egress(ip4_ipsec_ecn, &tos, &ip->ip_tos) == 0) {
IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
goto bad;
}
+
+ if (otos != ip->ip_tos) {
+ sum = ~ntohs(ip->ip_sum) & 0xffff;
+ sum += (~otos & 0xffff) + ip->ip_tos;
+ sum = (sum >> 16) + (sum & 0xffff);
+ sum += (sum >> 16); /* add carry */
+ ip->ip_sum = htons(~sum & 0xffff);
+ }
+
if (!key_checktunnelsanity(sav, AF_INET,
(caddr_t)&ip->ip_src, (caddr_t)&ip->ip_dst)) {
ipseclog((LOG_ERR, "ipsec tunnel address mismatch "
goto bad;
}
}
+
+ u_int8_t otos;
+ int sum;
+
ip = mtod(m, struct ip *);
+ otos = ip->ip_tos;
/* ECN consideration. */
if (ip46_ecn_egress(ip6_ipsec_ecn, &flowinfo, &ip->ip_tos) == 0) {
IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
goto bad;
}
+
+ if (otos != ip->ip_tos) {
+ sum = ~ntohs(ip->ip_sum) & 0xffff;
+ sum += (~otos & 0xffff) + ip->ip_tos;
+ sum = (sum >> 16) + (sum & 0xffff);
+ sum += (sum >> 16); /* add carry */
+ ip->ip_sum = htons(~sum & 0xffff);
+ }
+
if (!key_checktunnelsanity(sav, AF_INET,
(caddr_t)&ip->ip_src, (caddr_t)&ip->ip_dst)) {
ipseclog((LOG_ERR, "ipsec tunnel address mismatch "
case IPPROTO_IPV4:
{
struct ip *ip;
- u_int8_t otos8;
+ u_int8_t otos8, old_tos;
+ int sum;
+
af = AF_INET;
otos8 = (ntohl(otos) >> 20) & 0xff;
if (mbuf_len(m) < sizeof (*ip)) {
return (IPPROTO_DONE);
}
ip = mtod(m, struct ip *);
- if (gifp->if_flags & IFF_LINK1)
+ if (gifp->if_flags & IFF_LINK1) {
+ old_tos = ip->ip_tos;
egress_success = ip_ecn_egress(ECN_NORMAL, &otos8, &ip->ip_tos);
- else
+ if (old_tos != ip->ip_tos) {
+ sum = ~ntohs(ip->ip_sum) & 0xffff;
+ sum += (~old_tos & 0xffff) + ip->ip_tos;
+ sum = (sum >> 16) + (sum & 0xffff);
+ sum += (sum >> 16); /* add carry */
+ ip->ip_sum = htons(~sum & 0xffff);
+ }
+ } else
egress_success = ip_ecn_egress(ECN_NOCARE, &otos8, &ip->ip_tos);
break;
}
return (EINVAL);
if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT)))
wild = 1;
+
socket_unlock(so, 0); /* keep reference */
lck_rw_lock_exclusive(pcbinfo->ipi_lock);
}
socket_lock(so, 0);
+ /*
+ * We unlocked socket's protocol lock for a long time.
+ * The socket might have been dropped/defuncted.
+ * Checking if world has changed since.
+ */
+ if (inp->inp_state == INPCB_STATE_DEAD) {
+ lck_rw_done(pcbinfo->ipi_lock);
+ return (ECONNABORTED);
+ }
+
/* check if the socket got bound when the lock was released */
if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
lck_rw_done(pcbinfo->ipi_lock);
(mbuf_t *)&m, ippo);
if (result == EJUSTRETURN) {
ipf_unref();
- if (m != NULL)
- m_freem(m);
m = NULL;
goto evaluateloop;
}
case SIOCSPFXFLUSH_IN6: { /* struct in6_ifreq */
/* flush all the prefix advertised by routers */
- struct nd_prefix *next;
+ struct nd_prefix *next = NULL;
lck_mtx_lock(nd6_mutex);
for (pr = nd_prefix.lh_first; pr; pr = next) {
- struct in6_ifaddr *ia;
+ struct in6_ifaddr *ia = NULL;
+ bool iterate_pfxlist_again = false;
next = pr->ndpr_next;
* The same applies for the prefix list.
*/
ia = in6_ifaddrs;
- next = nd_prefix.lh_first;
+ iterate_pfxlist_again = true;
continue;
-
}
IFA_UNLOCK(&ia->ia_ifa);
ia = ia->ia_next;
prelist_remove(pr);
NDPR_UNLOCK(pr);
pfxlist_onlink_check();
- /*
- * If we were trying to restart this loop
- * above by changing the value of 'next', we might
- * end up freeing the only element on the list
- * when we call NDPR_REMREF().
- * When this happens, we also have get out of this
- * loop because we have nothing else to do.
- */
- if (pr == next)
- next = NULL;
NDPR_REMREF(pr);
+ if (iterate_pfxlist_again) {
+ next = nd_prefix.lh_first;
+ }
}
lck_mtx_unlock(nd6_mutex);
break;
lck_mtx_unlock(cp->gss_clnt_mtx);
}
- MALLOC(ki, gss_key_info *, sizeof (gss_key_info), M_TEMP, M_WAITOK|M_ZERO);
- if (ki == NULL) {
- lck_mtx_unlock(&nmp->nm_lock);
- return (ENOMEM);
- }
-
- if (cp) {
- cp->gss_clnt_kinfo = ki;
- } else if (nfs_root_steals_ctx && principal == NULL && kauth_cred_getuid(req->r_cred) == 0) {
+ if (!cp && nfs_root_steals_ctx && principal == NULL && kauth_cred_getuid(req->r_cred) == 0) {
/*
* If superuser is trying to get access, then co-opt
* the first valid context in the list.
}
}
+ MALLOC(ki, gss_key_info *, sizeof (gss_key_info), M_TEMP, M_WAITOK|M_ZERO);
+ if (ki == NULL) {
+ lck_mtx_unlock(&nmp->nm_lock);
+ return (ENOMEM);
+ }
+
NFS_GSS_DBG("Context %s%sfound in Neg Cache @ %ld\n",
NFS_GSS_CTX(req, cp),
cp == NULL ? " not " : "",
nfs_gss_clnt_mnt_ref(nmp);
}
} else {
+ cp->gss_clnt_kinfo = ki;
nfs_gss_clnt_ctx_clean(cp);
if (principal) {
/*
goto bad;
}
+#if CONFIG_MACF
+ if ((error = mac_vnode_check_open(ctx, vp, fmode)))
+ goto bad;
+#endif
+
/* compute action to be authorized */
action = 0;
if (fmode & FREAD)
my_new_cred = kauth_cred_setauditinfo(my_cred, &tmp_as);
if (my_cred != my_new_cred) {
- proc_lock(p);
+ proc_ucred_lock(p);
/* Need to protect for a race where another thread also
* changed the credential after we took our reference.
* If p_ucred has changed then we should restart this
* again with the new cred.
*/
if (p->p_ucred != my_cred) {
- proc_unlock(p);
+ proc_ucred_unlock(p);
audit_session_unref(my_new_cred);
kauth_cred_unref(&my_new_cred);
/* try again */
p->p_ucred = my_new_cred;
/* update cred on proc */
PROC_UPDATE_CREDS_ONPROC(p);
- proc_unlock(p);
+ proc_ucred_unlock(p);
}
/*
* Drop old proc reference or our extra reference.
goto done;
}
- proc_lock(p);
+ proc_ucred_lock(p);
kauth_cred_ref(p->p_ucred);
my_cred = p->p_ucred;
if (!IS_VALID_CRED(my_cred)) {
kauth_cred_unref(&my_cred);
- proc_unlock(p);
+ proc_ucred_unlock(p);
err = ESRCH;
goto done;
}
/* Increment the proc count of new session */
audit_inc_procount(AU_SENTRY_PTR(new_aia_p));
- proc_unlock(p);
+ proc_ucred_unlock(p);
/* Propagate the change from the process to the Mach task. */
set_security_token(p);
/* Decrement the process count of the former session. */
audit_dec_procount(AU_SENTRY_PTR(old_aia_p));
} else {
- proc_unlock(p);
+ proc_ucred_unlock(p);
}
kauth_cred_unref(&my_cred);
*/
extern boolean_t coalition_is_leader(task_t task, int coal_type, coalition_t *coal);
+/*
+ * coalition_get_leader:
+ * Get a task reference on the leader of a given coalition
+ *
+ * Parameters:
+ * coal : The coalition to investigate
+ *
+ * Returns: A referenced task pointer of the leader of the given coalition.
+ * This could be TASK_NULL if the coalition doesn't have a leader.
+ * If the return value is non-null, the caller is responsible to call
+ * task_deallocate on the returned value.
+ */
+extern task_t coalition_get_leader(coalition_t coal);
+
+
/*
* coalition_get_task_count:
* Sum up the number of tasks in the given coalition
#define DBG_MACH_SFI 0xA2 /* Selective Forced Idle (SFI) */
#define DBG_MACH_ENERGY_PERF 0xA3 /* Energy/performance resource stats */
#define DBG_MACH_SYSDIAGNOSE 0xA4 /* sysdiagnose keychord */
+#define DBG_MACH_ZALLOC 0xA5 /* Zone allocator */
/* Codes for Scheduler (DBG_MACH_SCHED) */
#define MACH_SCHED 0x0 /* Scheduler */
#define SFI_PID_SET_MANAGED 0x8
#define SFI_PID_CLEAR_MANAGED 0x9
#define SFI_GLOBAL_DEFER 0xa
+
+/* Codes for Zone Allocator (DBG_MACH_ZALLOC) */
+#define ZALLOC_ZCRAM 0x0
+
/* **** The Kernel Debug Sub Classes for Network (DBG_NETWORK) **** */
#define DBG_NETIP 1 /* Internet Protocol */
#define DBG_NETARP 2 /* Address Resolution Protocol */
#define LISTCOALITIONS_SINGLE_TYPE 2
#define LISTCOALITIONS_SINGLE_TYPE_SIZE (sizeof(struct procinfo_coalinfo))
+/* reasons for proc_can_use_foreground_hw */
+#define PROC_FGHW_OK 0 /* pid may use foreground HW */
+#define PROC_FGHW_DAEMON_OK 1
+#define PROC_FGHW_DAEMON_LEADER 10 /* pid is in a daemon coalition */
+#define PROC_FGHW_LEADER_NONUI 11 /* coalition leader is in a non-focal state */
+#define PROC_FGHW_LEADER_BACKGROUND 12 /* coalition leader is in a background state */
+#define PROC_FGHW_DAEMON_NO_VOUCHER 13 /* pid is a daemon with no adopted voucher */
+#define PROC_FGHW_NO_VOUCHER_ATTR 14 /* pid has adopted a voucher with no bank/originator attribute */
+#define PROC_FGHW_NO_ORIGINATOR 15 /* pid has adopted a voucher for a process that's gone away */
+#define PROC_FGHW_ORIGINATOR_BACKGROUND 16 /* pid has adopted a voucher for an app that's in the background */
+#define PROC_FGHW_VOUCHER_ERROR 98 /* error in voucher / originator callout */
+#define PROC_FGHW_ERROR 99 /* syscall parameter/permissions error */
+
/* __proc_info() call numbers */
#define PROC_INFO_CALL_LISTPIDS 0x1
#define PROC_INFO_CALL_PIDINFO 0x2
#define PROC_INFO_CALL_PIDRUSAGE 0x9
#define PROC_INFO_CALL_PIDORIGINATORINFO 0xa
#define PROC_INFO_CALL_LISTCOALITIONS 0xb
+#define PROC_INFO_CALL_CANUSEFGHW 0xc
#endif /* PRIVATE */
* PL = Process Lock
* PGL = Process Group Lock
* PFDL = Process File Desc Lock
+ * PUCL = Process User Credentials Lock
* PSL = Process Spin Lock
* PPL = Parent Process Lock (planed for later usage)
* LL = List Lock
TAILQ_HEAD( ,eventqelt) p_evlist; /* (PL) */
lck_mtx_t p_fdmlock; /* proc lock to protect fdesc */
+ lck_mtx_t p_ucred_mlock; /* mutex lock to protect p_ucred */
/* substructures: */
- kauth_cred_t p_ucred; /* Process owner's identity. (PL) */
+ kauth_cred_t p_ucred; /* Process owner's identity. (PUCL) */
struct filedesc *p_fd; /* Ptr to open files structure. (PFDL) */
struct pstats *p_stats; /* Accounting/statistics (PL). */
struct plimit *p_limit; /* Process limits.(PL) */
#if CONFIG_FINE_LOCK_GROUPS
extern lck_grp_t * proc_mlock_grp;
extern lck_grp_t * proc_fdmlock_grp;
+extern lck_grp_t * proc_ucred_mlock_grp;
extern lck_grp_t * proc_slock_grp;
#endif
extern lck_grp_attr_t * proc_lck_grp_attr;
extern void proc_fdlock_spin(struct proc *);
extern void proc_fdunlock(struct proc *);
extern void proc_fdlock_assert(proc_t p, int assertflags);
+extern void proc_ucred_lock(struct proc *);
+extern void proc_ucred_unlock(struct proc *);
__private_extern__ int proc_core_name(const char *name, uid_t uid, pid_t pid,
char *cr_name, size_t cr_name_len);
extern int isinferior(struct proc *, struct proc *);
extern int proc_parentdropref(proc_t, int);
int itimerfix(struct timeval *tv);
int itimerdecr(struct proc * p, struct itimerval *itp, int usec);
+int timespec_is_valid(const struct timespec *);
void proc_signalstart(struct proc *, int locked);
void proc_signalend(struct proc *, int locked);
int proc_transstart(struct proc *, int locked, int non_blocking);
#define SIOCGIFFUNCTIONALTYPE _IOWR('i', 173, struct ifreq) /* get interface functional type */
#define SIOCSIFNETSIGNATURE _IOWR('i', 174, struct if_nsreq)
#define SIOCGIFNETSIGNATURE _IOWR('i', 175, struct if_nsreq)
+
+#define SIOCGECNMODE _IOWR('i', 176, struct ifreq)
+#define SIOCSECNMODE _IOW('i', 177, struct ifreq)
#endif /* PRIVATE */
#endif /* !_SYS_SOCKIO_H_ */
void bsd_untimeout(void (*)(void *), void *arg);
void set_fsblocksize(struct vnode *);
uint64_t tvtoabstime(struct timeval *);
+uint64_t tstoabstime(struct timespec *);
void *throttle_info_create(void);
void throttle_info_mount_ref(mount_t mp, void * throttle_info);
void throttle_info_mount_rel(mount_t mp);
goto wait_for_dwrites;
}
+ task_update_logical_writes(current_task(), (io_req_size & ~PAGE_MASK), TASK_WRITE_IMMEDIATE);
while (io_req_size >= PAGE_SIZE && uio->uio_offset < newEOF && retval == 0) {
int throttle_type;
int retval = 0;
int xsize;
upl_page_info_t *pl;
+ int dirty_count;
xsize = *io_resid;
pg_offset = upl_offset & PAGE_MASK;
csize = min(PAGE_SIZE - pg_offset, xsize);
+ dirty_count = 0;
while (xsize && retval == 0) {
addr64_t paddr;
paddr = ((addr64_t)upl_phys_page(pl, pg_index) << PAGE_SHIFT) + pg_offset;
+ if ((uio->uio_rw == UIO_WRITE) && (upl_dirty_page(pl, pg_index) == FALSE))
+ dirty_count++;
retval = uiomove64(paddr, csize, uio);
uio->uio_segflg = segflg;
+ task_update_logical_writes(current_task(), (dirty_count * PAGE_SIZE), TASK_WRITE_DEFERRED);
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_END,
(int)uio->uio_offset, xsize, retval, segflg, 0);
blhdr->binfo[i].bnum = (off_t)(buf_blkno(bp));
blhdr->binfo[i].u.bp = bp;
+ task_update_logical_writes(current_task(), (2 * bsize), TASK_WRITE_METADATA);
KERNEL_DEBUG_CONSTANT(0x3018004, VM_KERNEL_ADDRPERM(vp), blhdr->binfo[i].bnum, bsize, 0, 0);
if (func) {
goto out;
}
+ /* Assume that there were DENYs so we don't wrongly cache KAUTH_VNODE_SEARCHBYANYONE */
+ found_deny = TRUE;
+
KAUTH_DEBUG("%p ALLOWED - caller is superuser", vp);
}
out:
* If the size is being set, make sure it's not a directory.
*/
if (VATTR_IS_ACTIVE(vap, va_data_size)) {
- /* size is meaningless on a directory, don't permit this */
- if (vnode_isdir(vp)) {
- KAUTH_DEBUG("ATTR - ERROR: size change requested on a directory");
- error = EISDIR;
+ /* size is only meaningful on regular files, don't permit otherwise */
+ if (!vnode_isreg(vp)) {
+ KAUTH_DEBUG("ATTR - ERROR: size change requested on non-file");
+ error = vnode_isdir(vp) ? EISDIR : EINVAL;
goto out;
}
}
AUDIT_ARG(fflags, flags);
+#if SECURE_KERNEL
+ if (flags & MNT_UNION) {
+ /* No union mounts on release kernels */
+ error = EPERM;
+ goto out;
+ }
+#endif
+
if ((vp->v_flag & VROOT) &&
(vp->v_mount->mnt_flag & MNT_ROOTFS)) {
if (!(flags & MNT_UNION)) {
flags = (flags & ~(MNT_UPDATE));
}
-#ifdef SECURE_KERNEL
+#if SECURE_KERNEL
if ((flags & MNT_RDONLY) == 0) {
/* Release kernels are not allowed to mount "/" as rw */
error = EPERM;
return(EINVAL);
}
- NDINIT(&fromnd, LOOKUP, OP_COPYFILE, SAVESTART | AUDITVNPATH1,
+ NDINIT(&fromnd, LOOKUP, OP_COPYFILE, AUDITVNPATH1,
UIO_USERSPACE, uap->from, ctx);
if ((error = namei(&fromnd)))
return (error);
out1:
vnode_put(fvp);
- if (fromnd.ni_startdir)
- vnode_put(fromnd.ni_startdir);
nameidone(&fromnd);
if (error == -1)
__ZN18IOTimerEventSourceC2Ev
__ZN18IOTimerEventSourceD0Ev
__ZN18IOTimerEventSourceD2Ev
-__ZN18IOUserNotification10gMetaClassE
-__ZN18IOUserNotification10superClassE
-__ZN18IOUserNotification15setNotificationEP10IONotifier
-__ZN18IOUserNotification4freeEv
-__ZN18IOUserNotification4initEv
-__ZN18IOUserNotification5resetEv
-__ZN18IOUserNotification7isValidEv
-__ZN18IOUserNotification9MetaClassC1Ev
-__ZN18IOUserNotification9MetaClassC2Ev
-__ZN18IOUserNotification9metaClassE
-__ZN18IOUserNotificationC2EPK11OSMetaClass
-__ZN18IOUserNotificationD2Ev
__ZN18_IOServiceNotifier10gMetaClassE
__ZN18_IOServiceNotifier10superClassE
__ZN18_IOServiceNotifier4freeEv
-15.0.0
+15.2.0
# The first line of this file contains the master version number for the kernel.
# All other instances of the kernel version in xnu are derived from this file.
kIOClassNameOverrideNone = 0x00000001,
};
-
#endif /* ! _IOKIT_IOKITKEYSPRIVATE_H */
bool activitySinceSleep(void);
bool abortHibernation(void);
+ void updateConsoleUsers(void);
IOReturn joinAggressiveness( IOService * service );
void handleAggressivesRequests( void );
unsigned int toldPowerdCapWillChange :1;
unsigned int displayPowerOnRequested:1;
+ uint8_t tasksSuspended;
uint32_t hibernateMode;
AbsoluteTime userActivityTime;
AbsoluteTime userActivityTime_prev;
&& (current_task() == kernel_task || mac_iokit_check_nvram_get(kauth_cred_get(), key->getCStringNoCopy()) == 0)
#endif
) { }
- else dict->removeObject(key);
+ else {
+ dict->removeObject(key);
+ iter->reset();
+ }
}
}
//******************************************************************************
static void updateConsoleUsersCallout(thread_call_param_t p0, thread_call_param_t p1)
+{
+ IOPMrootDomain * rootDomain = (IOPMrootDomain *) p0;
+ rootDomain->updateConsoleUsers();
+}
+
+void IOPMrootDomain::updateConsoleUsers(void)
{
IOService::updateConsoleUsers(NULL, kIOMessageSystemHasPoweredOn);
+ if (tasksSuspended)
+ {
+ tasksSuspended = FALSE;
+ tasks_system_suspend(tasksSuspended);
+ }
}
//******************************************************************************
if (SLEEP_STATE == newPowerState)
{
+ if (!tasksSuspended)
+ {
+ AbsoluteTime deadline;
+ tasksSuspended = TRUE;
+ tasks_system_suspend(tasksSuspended);
+
+ clock_interval_to_deadline(10, kSecondScale, &deadline);
+ vm_pageout_wait(AbsoluteTime_to_scalar(&deadline));
+ }
+
#if HIBERNATION
IOHibernateSystemSleep();
IOHibernateIOKitSleep();
gIOStopProviderList = OSArray::withCapacity( 16 );
gIOFinalizeList = OSArray::withCapacity( 16 );
assert( gIOTerminatePhase2List && gIOStopList && gIOStopProviderList && gIOFinalizeList );
+
}
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
const OSSymbol * sym = OSSymbol::withString(str);
OSMetaClass::applyToInstancesOfClassName(sym, instanceMatch, &ctx);
sym->release();
+
}
else
{
{
count = table->getCount();
done = 0;
+
str = OSDynamicCast(OSString, table->getObject(gIOProviderClassKey));
+
if (str) {
done++;
match = ((kIOServiceClassDone & options) || (0 != metaCast(str)));
do
{
count = table->getCount();
+
if (!(kIOServiceInternalDone & options))
{
match = where->matchInternal(table, options, &done);
// do family specific matching
match = where->matchPropertyTable( table, &score );
-
+
if( !match) {
#if IOMATCHDEBUG
if( kIOLogMatch & getDebugFlags( table ))
nextTable = OSDynamicCast(OSDictionary,
table->getObject( gIOParentMatchKey ));
- if( nextTable) {
+ if(nextTable) {
+
// look for a matching entry anywhere up to root
match = false;
matchParent = true;
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
-class IOUserNotification : public OSIterator
+class IOUserIterator : public OSIterator
+{
+ OSDeclareDefaultStructors(IOUserIterator)
+public:
+ OSObject * userIteratorObject;
+ IOLock * lock;
+
+ static IOUserIterator * withIterator(OSIterator * iter);
+ virtual bool init( void ) APPLE_KEXT_OVERRIDE;
+ virtual void free() APPLE_KEXT_OVERRIDE;
+
+ virtual void reset() APPLE_KEXT_OVERRIDE;
+ virtual bool isValid() APPLE_KEXT_OVERRIDE;
+ virtual OSObject * getNextObject() APPLE_KEXT_OVERRIDE;
+};
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+class IOUserNotification : public IOUserIterator
{
OSDeclareDefaultStructors(IOUserNotification)
- IONotifier * holdNotify;
- IOLock * lock;
+#define holdNotify userIteratorObject
public:
- virtual bool init( void ) APPLE_KEXT_OVERRIDE;
virtual void free() APPLE_KEXT_OVERRIDE;
virtual void setNotification( IONotifier * obj );
virtual bool isValid() APPLE_KEXT_OVERRIDE;
};
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+OSDefineMetaClassAndStructors( IOUserIterator, OSIterator )
+
+IOUserIterator *
+IOUserIterator::withIterator(OSIterator * iter)
+{
+ IOUserIterator * me;
+
+ if (!iter) return (0);
+
+ me = new IOUserIterator;
+ if (me && !me->init())
+ {
+ me->release();
+ me = 0;
+ }
+ if (!me) return me;
+ me->userIteratorObject = iter;
+
+ return (me);
+}
+
+bool
+IOUserIterator::init( void )
+{
+ if (!OSObject::init()) return (false);
+
+ lock = IOLockAlloc();
+ if( !lock)
+ return( false );
+
+ return (true);
+}
+
+void
+IOUserIterator::free()
+{
+ if (userIteratorObject) userIteratorObject->release();
+ if (lock) IOLockFree(lock);
+ OSObject::free();
+}
+
+void
+IOUserIterator::reset()
+{
+ IOLockLock(lock);
+ assert(OSDynamicCast(OSIterator, userIteratorObject));
+ ((OSIterator *)userIteratorObject)->reset();
+ IOLockUnlock(lock);
+}
+
+bool
+IOUserIterator::isValid()
+{
+ bool ret;
+
+ IOLockLock(lock);
+ assert(OSDynamicCast(OSIterator, userIteratorObject));
+ ret = ((OSIterator *)userIteratorObject)->isValid();
+ IOLockUnlock(lock);
+
+ return (ret);
+}
+
+OSObject *
+IOUserIterator::getNextObject()
+{
+ OSObject * ret;
+
+ IOLockLock(lock);
+ assert(OSDynamicCast(OSIterator, userIteratorObject));
+ ret = ((OSIterator *)userIteratorObject)->getNextObject();
+ IOLockUnlock(lock);
+
+ return (ret);
+}
+
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
extern "C" {
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
#undef super
-#define super OSIterator
-OSDefineMetaClass( IOUserNotification, OSIterator )
-OSDefineAbstractStructors( IOUserNotification, OSIterator )
+#define super IOUserIterator
+OSDefineMetaClass( IOUserNotification, IOUserIterator )
+OSDefineAbstractStructors( IOUserNotification, IOUserIterator )
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
-bool IOUserNotification::init( void )
-{
- if( !super::init())
- return( false );
-
- lock = IOLockAlloc();
- if( !lock)
- return( false );
-
- return( true );
-}
-
void IOUserNotification::free( void )
{
- if( holdNotify)
- holdNotify->remove();
+ if (holdNotify)
+ {
+ assert(OSDynamicCast(IONotifier, holdNotify));
+ ((IONotifier *)holdNotify)->remove();
+ holdNotify = 0;
+ }
// can't be in handler now
- if( lock)
- IOLockFree( lock );
-
super::free();
}
void IOUserNotification::setNotification( IONotifier * notify )
{
- IONotifier * previousNotify;
+ OSObject * previousNotify;
IOLockLock( gIOObjectPortLock);
IOLockUnlock( gIOObjectPortLock);
if( previousNotify)
- previousNotify->remove();
+ {
+ assert(OSDynamicCast(IONotifier, previousNotify));
+ ((IONotifier *)previousNotify)->remove();
+ }
}
void IOUserNotification::reset()
if( !(out = OSDynamicCast( cls, obj))) \
return( kIOReturnBadArgument )
+#define CHECKLOCKED(cls,obj,out) \
+ IOUserIterator * oIter; \
+ cls * out; \
+ if( !(oIter = OSDynamicCast(IOUserIterator, obj))) \
+ return (kIOReturnBadArgument); \
+ if( !(out = OSDynamicCast(cls, oIter->userIteratorObject))) \
+ return (kIOReturnBadArgument)
+
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
// Create a vm_map_copy_t or kalloc'ed data for memory
io_object_t iterator,
io_object_t *object )
{
+ IOReturn ret;
OSObject * obj;
CHECK( OSIterator, iterator, iter );
if( obj) {
obj->retain();
*object = obj;
- return( kIOReturnSuccess );
+ ret = kIOReturnSuccess;
} else
- return( kIOReturnNoDevice );
+ ret = kIOReturnNoDevice;
+
+ return (ret);
}
/* Routine io_iterator_reset */
obj = matching_size ? OSUnserializeXML(matching, matching_size)
: OSUnserializeXML(matching);
if( (dict = OSDynamicCast( OSDictionary, obj))) {
+
*matches = service->passiveMatch( dict );
kr = kIOReturnSuccess;
} else
obj = matching_size ? OSUnserializeXML(matching, matching_size)
: OSUnserializeXML(matching);
if( (dict = OSDynamicCast( OSDictionary, obj))) {
- *existing = IOService::getMatchingServices( dict );
+ *existing = IOUserIterator::withIterator(IOService::getMatchingServices( dict ));
kr = kIOReturnSuccess;
} else
kr = kIOReturnBadArgument;
if( master_port != master_device_port)
return( kIOReturnNotPrivileged);
- *iterator = IORegistryIterator::iterateOver(
- IORegistryEntry::getPlane( plane ), options );
+ *iterator = IOUserIterator::withIterator(
+ IORegistryIterator::iterateOver(
+ IORegistryEntry::getPlane( plane ), options ));
return( *iterator ? kIOReturnSuccess : kIOReturnBadArgument );
}
{
CHECK( IORegistryEntry, registry_entry, entry );
- *iterator = IORegistryIterator::iterateOver( entry,
- IORegistryEntry::getPlane( plane ), options );
+ *iterator = IOUserIterator::withIterator(
+ IORegistryIterator::iterateOver( entry,
+ IORegistryEntry::getPlane( plane ), options ));
return( *iterator ? kIOReturnSuccess : kIOReturnBadArgument );
}
kern_return_t is_io_registry_iterator_enter_entry(
io_object_t iterator )
{
- CHECK( IORegistryIterator, iterator, iter );
+ CHECKLOCKED( IORegistryIterator, iterator, iter );
+ IOLockLock(oIter->lock);
iter->enterEntry();
+ IOLockUnlock(oIter->lock);
return( kIOReturnSuccess );
}
{
bool didIt;
- CHECK( IORegistryIterator, iterator, iter );
+ CHECKLOCKED( IORegistryIterator, iterator, iter );
+ IOLockLock(oIter->lock);
didIt = iter->exitEntry();
+ IOLockUnlock(oIter->lock);
return( didIt ? kIOReturnSuccess : kIOReturnNoDevice );
}
kxld_types.h \
stack_protector.h
+INSTALL_KF_MI_LCL_LIST += \
+ section_keywords.h
+
+EXPORT_MI_LIST += \
+ section_keywords.h
+
EXPORT_MI_GEN_LIST = version.h
EXPORT_MI_DIR = libkern
--- /dev/null
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _SECTION_KEYWORDS_H
+#define _SECTION_KEYWORDS_H
+
+
+/* Default behaviour */
+#ifndef SECURITY_READ_ONLY_EARLY
+#define __PLACE_IN_SECTION(__segment__section) \
+ __attribute__((used, section(__segment__section)))
+
+#define SECURITY_READ_ONLY_SPECIAL_SECTION(_t,__segment__section) \
+ const _t __PLACE_IN_SECTION(__segment__section)
+
+#define SECURITY_READ_ONLY_EARLY(_t) const _t
+
+#define SECURITY_READ_ONLY_LATE(_t) _t
+
+#define SECURITY_READ_WRITE(_t) _t __attribute__((used))
+#endif /* SECURITY_READ_ONLY_EARLY */
+
+
+#endif /* _SECTION_KEYWORDS_H_ */
unsigned size;
{
if (opaque) items += size - size; /* make compiler happy */
- return sizeof(uInt) > 2 ? (voidpf)malloc(items * size) :
- (voidpf)calloc(items, size);
+ if (sizeof(uInt) > 2) {
+ /*
+ to prevent use of uninitialized memory, malloc and bzero
+ */
+ voidpf p = malloc(items * size);
+ bzero(p, items * size);
+ return p;
+ } else
+ return (voidpf)calloc(items, size);
}
void zcfree (opaque, ptr)
74119F46188F3B6A00C6F48F /* vm_page_size.h in Headers */ = {isa = PBXBuildFile; fileRef = 7466C923170CB99B004557CC /* vm_page_size.h */; };
7466C924170CBA53004557CC /* vm_page_size.h in Headers */ = {isa = PBXBuildFile; fileRef = 7466C923170CB99B004557CC /* vm_page_size.h */; };
74F3290B18EB269400B2B70E /* vm_page_size.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 7466C923170CB99B004557CC /* vm_page_size.h */; };
+ 978228281B8678DC008385AC /* pselect-darwinext.c in Sources */ = {isa = PBXBuildFile; fileRef = 978228271B8678CB008385AC /* pselect-darwinext.c */; };
+ 978228291B8678DF008385AC /* pselect-darwinext-cancel.c in Sources */ = {isa = PBXBuildFile; fileRef = 978228261B8678C2008385AC /* pselect-darwinext-cancel.c */; };
7AE28FDF18AC41B1006A5626 /* csr.c in Sources */ = {isa = PBXBuildFile; fileRef = 7AE28FDE18AC41B1006A5626 /* csr.c */; };
9002401118FC9A7F00D73BFA /* rename_ext.c in Sources */ = {isa = PBXBuildFile; fileRef = 906AA2D018F74CD1001C681A /* rename_ext.c */; };
+ 928336A11B83ED9100873B90 /* thread_register_state.c in Sources */ = {isa = PBXBuildFile; fileRef = 928336A01B83ED7800873B90 /* thread_register_state.c */; };
+ 9299E14A1B841E74005B7350 /* thread_state.h in Headers */ = {isa = PBXBuildFile; fileRef = 928336A21B8412C100873B90 /* thread_state.h */; };
+ 9299E14B1B841F59005B7350 /* thread_state.h in Headers */ = {isa = PBXBuildFile; fileRef = 928336A21B8412C100873B90 /* thread_state.h */; };
A59CB95616669EFB00B064B3 /* stack_logging_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = A59CB95516669DB700B064B3 /* stack_logging_internal.h */; };
A59CB9581666A1A200B064B3 /* munmap.c in Sources */ = {isa = PBXBuildFile; fileRef = A59CB9571666A1A200B064B3 /* munmap.c */; };
BA0D9FB1199031AD007E8A73 /* kdebug_trace.c in Sources */ = {isa = PBXBuildFile; fileRef = BA0D9FB0199031AD007E8A73 /* kdebug_trace.c */; };
E4D45C3F16FB20D30002AF25 /* spawn.h in Headers */ = {isa = PBXBuildFile; fileRef = E4D45C3D16FB20970002AF25 /* spawn.h */; settings = {ATTRIBUTES = (Public, ); }; };
E4D45C4016FB20DC0002AF25 /* spawn_private.h in Headers */ = {isa = PBXBuildFile; fileRef = E4D45C3E16FB20970002AF25 /* spawn_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
E4D7E55C16F8776300F92D8D /* index.c in Sources */ = {isa = PBXBuildFile; fileRef = E4D7E55316F8776300F92D8D /* index.c */; };
- E4D7E55E16F8776300F92D8D /* memset.c in Sources */ = {isa = PBXBuildFile; fileRef = E4D7E55516F8776300F92D8D /* memset.c */; };
+ E4D7E55E16F8776300F92D8D /* memset.c in Sources */ = {isa = PBXBuildFile; fileRef = E4D7E55516F8776300F92D8D /* memset.c */; settings = {COMPILER_FLAGS = "-fno-builtin"; }; };
E4D7E55F16F8776300F92D8D /* strcmp.c in Sources */ = {isa = PBXBuildFile; fileRef = E4D7E55616F8776300F92D8D /* strcmp.c */; };
E4D7E56016F8776300F92D8D /* strcpy.c in Sources */ = {isa = PBXBuildFile; fileRef = E4D7E55716F8776300F92D8D /* strcpy.c */; };
E4D7E56116F8776300F92D8D /* strlcpy.c in Sources */ = {isa = PBXBuildFile; fileRef = E4D7E55916F8776300F92D8D /* strlcpy.c */; };
4BDD5F1C1891AB2F004BF300 /* mach_approximate_time.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = mach_approximate_time.s; sourceTree = "<group>"; };
72B1E6EC190723DB00FB3FA2 /* guarded_open_dprotected_np.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = guarded_open_dprotected_np.c; sourceTree = "<group>"; };
7466C923170CB99B004557CC /* vm_page_size.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = vm_page_size.h; sourceTree = "<group>"; };
+ 978228261B8678C2008385AC /* pselect-darwinext-cancel.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "pselect-darwinext-cancel.c"; sourceTree = "<group>"; };
+ 978228271B8678CB008385AC /* pselect-darwinext.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "pselect-darwinext.c"; sourceTree = "<group>"; };
7AE28FDE18AC41B1006A5626 /* csr.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = csr.c; sourceTree = "<group>"; };
906AA2D018F74CD1001C681A /* rename_ext.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = rename_ext.c; sourceTree = "<group>"; };
+ 928336A01B83ED7800873B90 /* thread_register_state.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = thread_register_state.c; sourceTree = "<group>"; };
+ 928336A21B8412C100873B90 /* thread_state.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = thread_state.h; sourceTree = "<group>"; };
A59CB95516669DB700B064B3 /* stack_logging_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = stack_logging_internal.h; sourceTree = "<group>"; };
A59CB9571666A1A200B064B3 /* munmap.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = munmap.c; sourceTree = "<group>"; };
BA0D9FB0199031AD007E8A73 /* kdebug_trace.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = kdebug_trace.c; sourceTree = "<group>"; };
C6460B7B182025DF00F73CCA /* sfi.c */,
24B223B3121DFF12007DAEDE /* sigsuspend-base.c */,
13B598931A142F5900DB2D5A /* stackshot.c */,
+ 928336A01B83ED7800873B90 /* thread_register_state.c */,
248AA962122C7B2A0085F5B1 /* unlink.c */,
29A59AE5183B110C00E8B896 /* unlinkat.c */,
374A36E214748EE400AAF39D /* varargs_wrappers.s */,
248BA04A121C8EE4008C073F /* cancelable */ = {
isa = PBXGroup;
children = (
+ 978228271B8678CB008385AC /* pselect-darwinext.c */,
+ 978228261B8678C2008385AC /* pselect-darwinext-cancel.c */,
248BA04B121C8EE4008C073F /* fcntl-base.c */,
248BA04E121C8F06008C073F /* fcntl.c */,
248BA051121C8FE2008C073F /* fcntl-cancel.c */,
C9D9BCDD114B00600000D8B9 /* mach_interface.h */,
C9D9BCDF114B00600000D8B9 /* port_obj.h */,
C9D9BCE0114B00600000D8B9 /* sync.h */,
+ 928336A21B8412C100873B90 /* thread_state.h */,
C9D9BCE3114B00600000D8B9 /* vm_task.h */,
7466C923170CB99B004557CC /* vm_page_size.h */,
);
C6C401241741566D000AE69F /* gethostuuid_private.h in Headers */,
C6D3EFB916542C510052CF30 /* mach.h in Headers */,
C6D3EFBA16542C510052CF30 /* mach_error.h in Headers */,
+ 9299E14B1B841F59005B7350 /* thread_state.h in Headers */,
C6D3EFBB16542C510052CF30 /* mach_init.h in Headers */,
C6D3EFBC16542C510052CF30 /* mach_interface.h in Headers */,
C6D3EFBD16542C510052CF30 /* port_obj.h in Headers */,
C9D9BD26114B00600000D8B9 /* mach.h in Headers */,
C9D9BD27114B00600000D8B9 /* mach_error.h in Headers */,
C9D9BD28114B00600000D8B9 /* mach_init.h in Headers */,
+ 9299E14A1B841E74005B7350 /* thread_state.h in Headers */,
C6C40122174155E3000AE69F /* gethostuuid_private.h in Headers */,
C9D9BD29114B00600000D8B9 /* mach_interface.h in Headers */,
C9D9BD2B114B00600000D8B9 /* port_obj.h in Headers */,
C9D9BD56114B00600000D8B9 /* slot_name.c in Sources */,
24484A7511F6178E00E10CD2 /* string.c in Sources */,
E453AF351700FD3C00F2C94C /* getiopolicy_np.c in Sources */,
+ 978228281B8678DC008385AC /* pselect-darwinext.c in Sources */,
2485235511582D8F0051B413 /* mach_legacy.c in Sources */,
242AB66611EBDC1200107336 /* errno.c in Sources */,
E4D45C2E16F868ED0002AF25 /* libproc.c in Sources */,
24A7C5C711FF8DA6007669EB /* sendto.c in Sources */,
24A7C5C811FF8DA6007669EB /* setattrlist.c in Sources */,
24A7C5C911FF8DA6007669EB /* socketpair.c in Sources */,
+ 928336A11B83ED9100873B90 /* thread_register_state.c in Sources */,
9002401118FC9A7F00D73BFA /* rename_ext.c in Sources */,
2419382B12135FF6003CDE41 /* chmod.c in Sources */,
248BA01D121C56BF008C073F /* connect.c in Sources */,
248AA967122C7CDA0085F5B1 /* rename.c in Sources */,
24B8C2621237F53900D36CC3 /* remove-counter.c in Sources */,
C99A4F501305B2BD0054B7B7 /* __get_cpu_capabilities.s in Sources */,
+ 978228291B8678DF008385AC /* pselect-darwinext-cancel.c in Sources */,
C99A4F531305B43F0054B7B7 /* init_cpu_capabilities.c in Sources */,
030B179B135377B400DAD1F0 /* open_dprotected_np.c in Sources */,
E4D45C3116F868ED0002AF25 /* proc_listpidspath.c in Sources */,
"(os/kern) let orphan continue", /* 45 */
"(os/kern) service not supported",
"(os/kern) remote node down",
+ "(os/kern) thread not waiting",
+ "(os/kern) operation timed out",
+ "(os/kern) code signing error", /* 50 */
+ "(os/kern) policy is static",
+ "(os/kern) insufficient input buffer size",
};
static const char * const err_codes_unix[] = {
--- /dev/null
+/*
+ * Copyright (c) 2015 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _MACH_THREAD_STATE_H_
+#define _MACH_THREAD_STATE_H_
+
+#include <Availability.h>
+#include <mach/mach.h>
+
+#ifndef KERNEL
+/*
+ * Gets all register values in the target thread with pointer-like contents.
+ * There's no guarantee that the returned values are valid pointers, but all
+ * valid pointers will be returned. The order and count of the provided
+ * register values is unspecified and may change; registers with values that
+ * are not valid pointers may be omitted, so the number of pointers returned
+ * may vary from call to call.
+ *
+ * sp is an out parameter that will contain the stack pointer
+ * length is an in/out parameter for the length of the values array
+ * values is an array of pointers
+ *
+ * This may only be called on threads in the current task. If the current
+ * platform defines a stack red zone, the stack pointer returned will be
+ * adjusted to account for red zone.
+ *
+ * If length is insufficient KERN_INSUFFICIENT_BUFFER_SIZE will be returned and
+ * length set to the amount of memory required. Callers MUST NOT assume that
+ * any particular size of buffer will be sufficient and should retry with an
+ * aproproately sized buffer upon this error.
+ */
+__OSX_UNAVAILABLE
+__IOS_UNAVAILABLE
+__TVOS_AVAILABLE(9.0)
+__WATCHOS_UNAVAILABLE
+kern_return_t thread_get_register_pointer_values(thread_t thread, uintptr_t *sp, size_t *length, uintptr_t *values);
+#endif
+
+#endif /* _MACH_THREAD_STATE_H_ */
--- /dev/null
+__TVOS_PROHIBITED
+thread_terminate
+act_get_state
+act_set_state
+thread_depress_abort
+thread_get_special_port
+thread_set_special_port
+thread_set_exception_ports
+thread_get_exception_ports
+thread_swap_exception_ports
+thread_get_mach_voucher
+thread_set_mach_voucher
+thread_swap_mach_voucher
+mach_ports_register
+mach_ports_lookup
+task_suspend
+task_resume
+task_set_info
+task_get_special_port
+task_set_special_port
+thread_create
+thread_create_running
+task_set_exception_ports
+task_get_exception_ports
+task_swap_exception_ports
+task_policy_set
+task_policy_get
+task_zone_info
+task_get_state
+task_set_state
+task_set_phys_footprint_limit
+task_suspend2
+task_resume2
+task_get_mach_voucher
+task_set_mach_voucher
+task_swap_mach_voucher
+task_set_port_space
+host_request_notification
+host_info
+task_wire
+mach_port_allocate_name
+host_create_mach_voucher
+host_register_mach_voucher_attr_manager
+host_register_well_known_mach_voucher_attr_manager
+host_set_atm_diagnostic_flag
+host_get_atm_diagnostic_flag
+
-__WATCHOS_PROHIBITED __TVOS_PROHIBITED
+__WATCHOS_PROHIBITED
thread_terminate
act_get_state
act_set_state
--- /dev/null
+/*
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#define VARIANT_CANCELABLE
+#define VARIANT_DARWIN_EXTSN
+
+#include "../select-base.c"
--- /dev/null
+/*
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#undef __DARWIN_NON_CANCELABLE
+#define __DARWIN_NON_CANCELABLE 1
+#define VARIANT_DARWIN_EXTSN
+
+#include "../select-base.c"
#define __DARWIN_NON_CANCELABLE 0
#endif /* __LP64__ && (VARIANT_CANCELABLE || VARIANT_PRE1050) */
+#if defined(VARIANT_DARWIN_EXTSN)
+#define _DARWIN_C_SOURCE
+#define _DARWIN_UNLIMITED_SELECT
+#endif
+
#include <sys/select.h>
+#include <sys/time.h>
+#include <sys/signal.h>
#include "_errno.h"
#if defined(VARIANT_CANCELABLE) || defined(VARIANT_PRE1050)
+#if !defined(VARIANT_DARWIN_EXTSN)
extern int __select(int, fd_set * __restrict, fd_set * __restrict,
fd_set * __restrict, struct timeval * __restrict);
+#endif
+int __pselect(int, fd_set * __restrict, fd_set * __restrict,
+ fd_set * __restrict, const struct timespec * __restrict, const sigset_t * __restrict);
#else /* !VARIANT_CANCELABLE && !VARIANT_PRE1050 */
+#if !defined(VARIANT_DARWIN_EXTSN)
int __select_nocancel(int, fd_set * __restrict, fd_set * __restrict,
fd_set * __restrict, struct timeval * __restrict);
+#endif
+int __pselect_nocancel(int, fd_set * __restrict, fd_set * __restrict,
+ fd_set * __restrict, const struct timespec * __restrict, const sigset_t * __restrict);
#endif /* VARIANT_CANCELABLE || VARIANT_PRE1050 */
+#if !defined(VARIANT_DARWIN_EXTSN)
/*
- * select stub, return error if nfds > FD_SETSIZE
- * add pthread cancelability
- * mandated for conformance.
- *
- * This is only for (non DARWINEXTSN) UNIX03 (both cancelable and
- * non-cancelable) and for legacy
+ * select() implementation for 1050 and legacy (cancelable and non-cancelable)
+ * variants. The darwin extension variants (both cancelable & non-cancelable) are
+ * mapped directly to the syscall stub.
*/
int
select(int nfds, fd_set * __restrict readfds, fd_set * __restrict writefds,
#endif /* VARIANT_LEGACY || VARIANT_PRE1050 */
)
{
-
#if defined(VARIANT_LEGACY) || defined(VARIANT_PRE1050)
struct timeval tb, *timeout;
tb.tv_sec = 0;
tb.tv_usec = 10000;
timeout = &tb;
- } else
+ } else {
timeout = intimeout;
+ }
#else /* !VARIANT_LEGACY && !VARIANT_PRE1050 */
if (nfds > FD_SETSIZE) {
errno = EINVAL;
return -1;
}
-#endif /* VARIANT_LEGACY || VARIANT_PRE1050 */
+#endif
+
#if defined(VARIANT_CANCELABLE) || defined(VARIANT_PRE1050)
return __select(nfds, readfds, writefds, exceptfds, timeout);
#else /* !VARIANT_CANCELABLE && !VARIANT_PRE1050 */
return __select_nocancel(nfds, readfds, writefds, exceptfds, timeout);
#endif /* VARIANT_CANCELABLE || VARIANT_PRE1050 */
}
+#endif /* !defined(VARIANT_DARWIN_EXTSN) */
+
+
+/*
+ * User-space emulation of pselect() syscall for B&I
+ * TODO: remove when B&I move to xnu with native pselect()
+ */
+extern int __pthread_sigmask(int, const sigset_t *, sigset_t *);
+static int
+_pselect_emulated(int count, fd_set * __restrict rfds, fd_set * __restrict wfds,
+ fd_set * __restrict efds, const struct timespec * __restrict timo,
+ const sigset_t * __restrict mask)
+{
+ sigset_t omask;
+ struct timeval tvtimo, *tvp;
+ int rv, sverrno;
+
+ if (timo) {
+ tvtimo.tv_sec = timo->tv_sec;
+ tvtimo.tv_usec = (__darwin_suseconds_t)(timo->tv_nsec / 1000);
+ tvp = &tvtimo;
+ } else {
+ tvp = 0;
+ }
+
+ if (mask != 0) {
+ rv = __pthread_sigmask(SIG_SETMASK, mask, &omask);
+ if (rv != 0)
+ return rv;
+ }
+
+ rv = select(count, rfds, wfds, efds, tvp);
+ if (mask != 0) {
+ sverrno = errno;
+ __pthread_sigmask(SIG_SETMASK, &omask, (sigset_t *)0);
+ errno = sverrno;
+ }
+
+ return rv;
+}
+
+/*
+ * pselect() implementation for all variants. Unlike select(), we implement the
+ * darwin extension variants here to catch cases where xnu doesn't implement
+ * pselect and we need to emulate.
+ */
+int
+pselect(int nfds, fd_set * __restrict readfds, fd_set * __restrict writefds,
+ fd_set * __restrict exceptfds, const struct timespec * __restrict
+#if defined(VARIANT_LEGACY) || defined(VARIANT_PRE1050)
+ intimeout,
+#else /* !VARIANT_LEGACY && !VARIANT_PRE1050 */
+ timeout,
+#endif /* VARIANT_LEGACY || VARIANT_PRE1050 */
+ const sigset_t * __restrict sigmask)
+{
+ int ret;
+#if defined(VARIANT_LEGACY) || defined(VARIANT_PRE1050)
+ struct timespec tb;
+ const struct timespec *timeout;
+
+ /*
+ * Legacy select behavior is minimum 10 msec when tv_usec is non-zero
+ */
+ if (intimeout && intimeout->tv_sec == 0 && intimeout->tv_nsec > 0 && intimeout->tv_nsec < 10000000L) {
+ tb.tv_sec = 0;
+ tb.tv_nsec = 10000000L;
+ timeout = &tb;
+ } else {
+ timeout = intimeout;
+ }
+#elif defined(VARIANT_DARWIN_EXTSN)
+#else
+ /* 1050 variant */
+ if (nfds > FD_SETSIZE) {
+ errno = EINVAL;
+ return -1;
+ }
+#endif
+
+#if defined(VARIANT_CANCELABLE) || defined(VARIANT_PRE1050)
+ ret = __pselect(nfds, readfds, writefds, exceptfds, timeout, sigmask);
+#else /* !VARIANT_CANCELABLE && !VARIANT_PRE1050 */
+ ret = __pselect_nocancel(nfds, readfds, writefds, exceptfds, timeout, sigmask);
+#endif /* VARIANT_CANCELABLE || VARIANT_PRE1050 */
+
+ if (ret == -1 && errno == ENOSYS) {
+ ret = _pselect_emulated(nfds, readfds, writefds, exceptfds, timeout, sigmask);
+ }
+
+ return ret;
+}
#define wsize sizeof(u_int)
#define wmask (wsize - 1)
+// n.b. this must be compiled with -fno-builtin or it might get optimized into
+// a recursive call to bzero.
__attribute__((visibility("hidden")))
void
bzero(void *dst0, size_t length)
--- /dev/null
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include <mach/mach.h>
+
+kern_return_t
+thread_get_register_pointer_values(thread_t thread, uintptr_t *sp, size_t *length, uintptr_t *values)
+{
+ if (!length) return KERN_INVALID_ARGUMENT;
+ if (*length > 0 && values == NULL) return KERN_INVALID_ARGUMENT;
+
+ size_t in_length = *length;
+ size_t out_length = 0;
+
+#if defined(__i386__)
+ i386_thread_state_t state = {};
+ thread_state_flavor_t flavor = x86_THREAD_STATE32;
+ mach_msg_type_number_t count = i386_THREAD_STATE_COUNT;
+#elif defined(__x86_64__)
+ x86_thread_state64_t state = {};
+ thread_state_flavor_t flavor = x86_THREAD_STATE64;
+ mach_msg_type_number_t count = x86_THREAD_STATE64_COUNT;
+#else
+#error thread_get_register_pointer_values not defined for this architecture
+#endif
+
+ kern_return_t ret = thread_get_state(thread, flavor, (thread_state_t)&state, &count);
+ if (ret != KERN_SUCCESS){
+ return ret;
+ }
+
+ // If the provided pointer value is > PAGE_SIZE, add it to the output array
+ // if there's available space. (Values between 0 and PAGE_SIZE are the NULL page
+ // and not valid pointers.)
+#define push_register_value(p) do { \
+ if ((uintptr_t)p > PAGE_SIZE) { \
+ if (out_length < in_length && values) \
+ values[out_length] = p; \
+ out_length++; \
+ } } while (0)
+
+#if defined(__i386__)
+ if (sp) *sp = state.__esp;
+
+ push_register_value(state.__eip);
+
+ push_register_value(state.__eax);
+ push_register_value(state.__ebx);
+ push_register_value(state.__ecx);
+ push_register_value(state.__edx);
+ push_register_value(state.__edi);
+ push_register_value(state.__esi);
+ push_register_value(state.__ebp);
+#elif defined(__x86_64__)
+ if (sp) *sp = state.__rsp - 128 /* redzone */;
+
+ push_register_value(state.__rip);
+
+ push_register_value(state.__rax);
+ push_register_value(state.__rbx);
+ push_register_value(state.__rcx);
+ push_register_value(state.__rdx);
+ push_register_value(state.__rdi);
+ push_register_value(state.__rbp);
+ push_register_value(state.__r8);
+ push_register_value(state.__r9);
+ push_register_value(state.__r10);
+ push_register_value(state.__r11);
+ push_register_value(state.__r12);
+ push_register_value(state.__r13);
+ push_register_value(state.__r14);
+ push_register_value(state.__r15);
+#else
+#error thread_get_register_pointer_values not defined for this architecture
+#endif
+
+ *length = out_length;
+
+ if (in_length == 0 || out_length > in_length){
+ return KERN_INSUFFICIENT_BUFFER_SIZE;
+ }
+
+ return KERN_SUCCESS;
+}
MIGS_DUAL_PUBLIC_PRIVATE=""
-if [[ "$PLATFORM_NAME" = "iphoneos" || "$PLATFORM_NAME" = "iphonesimulator" || "$PLATFORM_NAME" = "iphoneosnano" || "$PLATFORM_NAME" = "iphonenanosimulator" || "$PLATFORM_NAME" = "tvos" || "$PLATFOM_NAME" = "tvsimulator" || "$PLATFOM_NAME" = "appletvos" || "$PLATFOM_NAME" = "appletvsimulator" || "$PLATFOM_NAME" = "watchos" || "$PLATFOM_NAME" = "watchsimulator" ]]
+if ( echo {iphone,tv,appletv,watch}{os,simulator} iphone{osnano,nanosimulator} | grep -wFq "$PLATFORM_NAME" )
then
MIGS_PRIVATE="mach_vm.defs"
else
port_obj.h
sync.h
vm_task.h
- vm_page_size.h"
+ vm_page_size.h
+ thread_state.h"
-MIG_FILTERS="watchos_prohibited_mig.txt"
+MIG_FILTERS="watchos_prohibited_mig.txt tvos_prohibited_mig.txt"
# install /usr/include/server headers
mkdir -p $SERVER_HEADER_DST
if (PE_parse_boot_argn("debug", &debug_boot_arg, sizeof (debug_boot_arg))) {
panicDebugging = TRUE;
+#if DEVELOPMENT || DEBUG
if (debug_boot_arg & DB_HALT) halt_in_debugger=1;
+#endif
if (debug_boot_arg & DB_PRT) disable_debug_output=FALSE;
if (debug_boot_arg & DB_SLOG) systemLogDiags=TRUE;
if (debug_boot_arg & DB_LOG_PI_SCRN) logPanicDataToScreen=TRUE;
unsigned int cpus = 0;
boolean_t fidn;
boolean_t IA32e = TRUE;
- char namep[16];
postcode(I386_INIT_ENTRY);
kernel_debug_string_simple("PE_init_kprintf");
PE_init_kprintf(FALSE);
- if(PE_parse_boot_argn("-show_pointers", &namep, sizeof (namep)))
- doprnt_hide_pointers = FALSE;
-
kernel_debug_string_simple("kernel_early_bootstrap");
kernel_early_bootstrap();
mp_call_head_unlock(cqp, intrs_enabled);
KERNEL_DEBUG_CONSTANT(
TRACE_MP_CPUS_CALL_ACTION,
- call.func, call.arg0, call.arg1, call.maskp, 0);
+ VM_KERNEL_UNSLIDE(call.func), VM_KERNEL_UNSLIDE_OR_PERM(call.arg0),
+ VM_KERNEL_UNSLIDE_OR_PERM(call.arg1), VM_KERNEL_ADDRPERM(call.maskp), 0);
call.func(call.arg0, call.arg1);
(void) mp_call_head_lock(cqp);
}
KERNEL_DEBUG_CONSTANT(
TRACE_MP_CPUS_CALL | DBG_FUNC_START,
- cpus, mode, VM_KERNEL_UNSLIDE(action_func), arg0, arg1);
+ cpus, mode, VM_KERNEL_UNSLIDE(action_func), VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1));
if (!smp_initialized) {
if ((cpus & CPUMASK_SELF) == 0)
KERNEL_DEBUG_CONSTANT(
TRACE_MP_CPUS_CALL_LOCAL,
VM_KERNEL_UNSLIDE(action_func),
- arg0, arg1, 0, 0);
+ VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1), 0, 0);
action_func(arg0, arg1);
}
} else {
if (mode != SYNC && call_self ) {
KERNEL_DEBUG_CONSTANT(
TRACE_MP_CPUS_CALL_LOCAL,
- VM_KERNEL_UNSLIDE(action_func), arg0, arg1, 0, 0);
+ VM_KERNEL_UNSLIDE(action_func), VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1), 0, 0);
if (action_func != NULL) {
ml_set_interrupts_enabled(FALSE);
action_func(arg0, arg1);
PMAP_LOCK(map);
if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL)
- panic("pmap_change_wiring: pte missing");
+ panic("pmap_change_wiring(%p,0x%llx,%d): pte missing",
+ map, vaddr, wired);
if (wired && !iswired(*pte)) {
/*
return(virt);
}
-unsigned int
+mach_vm_size_t
pmap_query_resident(
pmap_t pmap,
addr64_t s64,
addr64_t e64,
- unsigned int *compressed_count_p)
+ mach_vm_size_t *compressed_bytes_p)
{
pt_entry_t *pde;
pt_entry_t *spte, *epte;
addr64_t l64;
uint64_t deadline;
- unsigned int result;
+ mach_vm_size_t resident_bytes;
+ mach_vm_size_t compressed_bytes;
boolean_t is_ept;
- unsigned int compressed_count;
pmap_intr_assert();
if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64) {
- if (compressed_count_p) {
- *compressed_count_p = 0;
+ if (compressed_bytes_p) {
+ *compressed_bytes_p = 0;
}
return 0;
}
(uint32_t) (s64 >> 32), s64,
(uint32_t) (e64 >> 32), e64);
- result = 0;
- compressed_count = 0;
+ resident_bytes = 0;
+ compressed_bytes = 0;
PMAP_LOCK(pmap);
for (; spte < epte; spte++) {
if (pte_to_pa(*spte) != 0) {
- result++;
+ resident_bytes += PAGE_SIZE;
} else if (*spte & PTE_COMPRESSED) {
- compressed_count++;
+ compressed_bytes += PAGE_SIZE;
}
}
PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
pmap, 0, 0, 0, 0);
- if (compressed_count_p) {
- *compressed_count_p = compressed_count;
+ if (compressed_bytes_p) {
+ *compressed_bytes_p = compressed_bytes;
}
- return result;
+ return resident_bytes;
}
#if MACH_ASSERT
#define MSR_IA32_PP0_ENERGY_STATUS 0x639
#define MSR_IA32_PP1_ENERGY_STATUS 0x641
-#if !defined(XNU_HIDE_SKYLAKE)
#define MSR_IA32_IA_PERF_LIMIT_REASONS_SKL 0x64F
-#endif
#define MSR_IA32_IA_PERF_LIMIT_REASONS 0x690
#define MSR_IA32_GT_PERF_LIMIT_REASONS 0x6B0
assert(IP_VALID(port));
ip_lock(port);
temp_task_imp = IIT_NULL;
- if (!ipc_port_importance_delta_internal(port, &delta, &temp_task_imp)) {
+ if (!ipc_port_importance_delta_internal(port, IPID_OPTION_NORMAL, &delta, &temp_task_imp)) {
ip_unlock(port);
}
task_deallocate(task);
}
+/*
+ * Routine: ipc_importance_check_circularity
+ * Purpose:
+ * Check if queueing "port" in a message for "dest"
+ * would create a circular group of ports and messages.
+ *
+ * If no circularity (FALSE returned), then "port"
+ * is changed from "in limbo" to "in transit".
+ *
+ * That is, we want to set port->ip_destination == dest,
+ * but guaranteeing that this doesn't create a circle
+ * port->ip_destination->ip_destination->... == port
+ *
+ * Additionally, if port was successfully changed to "in transit",
+ * propagate boost assertions from the "in limbo" port to all
+ * the ports in the chain, and, if the destination task accepts
+ * boosts, to the destination task.
+ *
+ * Conditions:
+ * No ports locked. References held for "port" and "dest".
+ */
+
+boolean_t
+ipc_importance_check_circularity(
+ ipc_port_t port,
+ ipc_port_t dest)
+{
+ ipc_importance_task_t imp_task = IIT_NULL;
+ ipc_importance_task_t release_imp_task = IIT_NULL;
+ boolean_t imp_lock_held = FALSE;
+ int assertcnt = 0;
+ ipc_port_t base;
+
+ assert(port != IP_NULL);
+ assert(dest != IP_NULL);
+
+ if (port == dest)
+ return TRUE;
+ base = dest;
+
+ /* port is in limbo, so donation status is safe to latch */
+ if (port->ip_impdonation != 0) {
+ imp_lock_held = TRUE;
+ ipc_importance_lock();
+ }
+
+ /*
+ * First try a quick check that can run in parallel.
+ * No circularity if dest is not in transit.
+ */
+ ip_lock(port);
+
+ /*
+ * Even if port is just carrying assertions for others,
+ * we need the importance lock.
+ */
+ if (port->ip_impcount > 0 && !imp_lock_held) {
+ if (!ipc_importance_lock_try()) {
+ ip_unlock(port);
+ ipc_importance_lock();
+ ip_lock(port);
+ }
+ imp_lock_held = TRUE;
+ }
+
+ if (ip_lock_try(dest)) {
+ if (!ip_active(dest) ||
+ (dest->ip_receiver_name != MACH_PORT_NULL) ||
+ (dest->ip_destination == IP_NULL))
+ goto not_circular;
+
+ /* dest is in transit; further checking necessary */
+
+ ip_unlock(dest);
+ }
+ ip_unlock(port);
+
+ /*
+ * We're about to pay the cost to serialize,
+ * just go ahead and grab importance lock.
+ */
+ if (!imp_lock_held) {
+ ipc_importance_lock();
+ imp_lock_held = TRUE;
+ }
+
+ ipc_port_multiple_lock(); /* massive serialization */
+
+ /*
+ * Search for the end of the chain (a port not in transit),
+ * acquiring locks along the way.
+ */
+
+ for (;;) {
+ ip_lock(base);
+
+ if (!ip_active(base) ||
+ (base->ip_receiver_name != MACH_PORT_NULL) ||
+ (base->ip_destination == IP_NULL))
+ break;
+
+ base = base->ip_destination;
+ }
+
+ /* all ports in chain from dest to base, inclusive, are locked */
+
+ if (port == base) {
+ /* circularity detected! */
+
+ ipc_port_multiple_unlock();
+
+ /* port (== base) is in limbo */
+
+ assert(ip_active(port));
+ assert(port->ip_receiver_name == MACH_PORT_NULL);
+ assert(port->ip_destination == IP_NULL);
+
+ while (dest != IP_NULL) {
+ ipc_port_t next;
+
+ /* dest is in transit or in limbo */
+
+ assert(ip_active(dest));
+ assert(dest->ip_receiver_name == MACH_PORT_NULL);
+
+ next = dest->ip_destination;
+ ip_unlock(dest);
+ dest = next;
+ }
+
+ if (imp_lock_held)
+ ipc_importance_unlock();
+
+ return TRUE;
+ }
+
+ /*
+ * The guarantee: lock port while the entire chain is locked.
+ * Once port is locked, we can take a reference to dest,
+ * add port to the chain, and unlock everything.
+ */
+
+ ip_lock(port);
+ ipc_port_multiple_unlock();
+
+ not_circular:
+
+ /* port is in limbo */
+
+ assert(ip_active(port));
+ assert(port->ip_receiver_name == MACH_PORT_NULL);
+ assert(port->ip_destination == IP_NULL);
+
+ ip_reference(dest);
+ port->ip_destination = dest;
+
+ /* must have been in limbo or still bound to a task */
+ assert(port->ip_tempowner != 0);
+
+ /*
+ * We delayed dropping assertions from a specific task.
+ * Cache that info now (we'll drop assertions and the
+ * task reference below).
+ */
+ release_imp_task = port->ip_imp_task;
+ if (IIT_NULL != release_imp_task) {
+ port->ip_imp_task = IIT_NULL;
+ }
+ assertcnt = port->ip_impcount;
+
+ /* take the port out of limbo w.r.t. assertions */
+ port->ip_tempowner = 0;
+
+ /* now unlock chain */
+
+ ip_unlock(port);
+
+ for (;;) {
+
+ /* every port along chain track assertions behind it */
+ ipc_port_impcount_delta(dest, assertcnt, base);
+
+ if (dest == base)
+ break;
+
+ /* port is in transit */
+
+ assert(ip_active(dest));
+ assert(dest->ip_receiver_name == MACH_PORT_NULL);
+ assert(dest->ip_destination != IP_NULL);
+ assert(dest->ip_tempowner == 0);
+
+ port = dest->ip_destination;
+ ip_unlock(dest);
+ dest = port;
+ }
+
+ /* base is not in transit */
+ assert(!ip_active(base) ||
+ (base->ip_receiver_name != MACH_PORT_NULL) ||
+ (base->ip_destination == IP_NULL));
+
+ /*
+ * Find the task to boost (if any).
+ * We will boost "through" ports that don't know
+ * about inheritance to deliver receive rights that
+ * do.
+ */
+ if (ip_active(base) && (assertcnt > 0)) {
+ assert(imp_lock_held);
+ if (base->ip_tempowner != 0) {
+ if (IIT_NULL != base->ip_imp_task) {
+ /* specified tempowner task */
+ imp_task = base->ip_imp_task;
+ assert(ipc_importance_task_is_any_receiver_type(imp_task));
+ }
+ /* otherwise don't boost current task */
+
+ } else if (base->ip_receiver_name != MACH_PORT_NULL) {
+ ipc_space_t space = base->ip_receiver;
+
+ /* only spaces with boost-accepting tasks */
+ if (space->is_task != TASK_NULL &&
+ ipc_importance_task_is_any_receiver_type(space->is_task->task_imp_base))
+ imp_task = space->is_task->task_imp_base;
+ }
+
+ /* take reference before unlocking base */
+ if (imp_task != IIT_NULL) {
+ ipc_importance_task_reference(imp_task);
+ }
+ }
+
+ ip_unlock(base);
+
+ /*
+ * Transfer assertions now that the ports are unlocked.
+ * Avoid extra overhead if transferring to/from the same task.
+ *
+ * NOTE: If a transfer is occurring, the new assertions will
+ * be added to imp_task BEFORE the importance lock is unlocked.
+ * This is critical - to avoid decrements coming from the kmsgs
+ * beating the increment to the task.
+ */
+ boolean_t transfer_assertions = (imp_task != release_imp_task);
+
+ if (imp_task != IIT_NULL) {
+ assert(imp_lock_held);
+ if (transfer_assertions)
+ ipc_importance_task_hold_internal_assertion_locked(imp_task, assertcnt);
+ }
+
+ if (release_imp_task != IIT_NULL) {
+ assert(imp_lock_held);
+ if (transfer_assertions)
+ ipc_importance_task_drop_internal_assertion_locked(release_imp_task, assertcnt);
+ }
+
+ if (imp_lock_held)
+ ipc_importance_unlock();
+
+ if (imp_task != IIT_NULL)
+ ipc_importance_task_release(imp_task);
+
+ if (release_imp_task != IIT_NULL)
+ ipc_importance_task_release(release_imp_task);
+
+ return FALSE;
+}
+
/*
* Routine: ipc_importance_send
* Purpose:
ipc_importance_task_t task_imp;
kern_return_t kr;
-
assert(IP_VALID(port));
/* If no donation to be made, return quickly */
/*
* If we need to relock the port, do it with the importance still locked.
* This assures we get to add the importance boost through the port to
- * the task BEFORE anyone else can attempt to undo that operation because
+ * the task BEFORE anyone else can attempt to undo that operation if
* the sender lost donor status.
*/
if (TRUE == port_lock_dropped) {
ip_lock(port);
}
- ipc_importance_unlock();
portupdate:
}
#endif /* IMPORTANCE_DEBUG */
- /* adjust port boost count (with port locked) */
- if (TRUE == ipc_port_importance_delta(port, 1)) {
+ mach_port_delta_t delta = 1;
+ boolean_t need_port_lock;
+ task_imp = IIT_NULL;
+
+ /* adjust port boost count (with importance and port locked) */
+ need_port_lock = ipc_port_importance_delta_internal(port, IPID_OPTION_NORMAL, &delta, &task_imp);
+
+ /* if we need to adjust a task importance as a result, apply that here */
+ if (IIT_NULL != task_imp && delta != 0) {
+ assert(delta == 1);
+
+ /* if this results in a change of state, propagate the transistion */
+ if (ipc_importance_task_check_transition(task_imp, IIT_UPDATE_HOLD, delta)) {
+
+ /* can't hold the port lock during task transition(s) */
+ if (!need_port_lock) {
+ need_port_lock = TRUE;
+ ip_unlock(port);
+ }
+ ipc_importance_task_propagate_assertion_locked(task_imp, IIT_UPDATE_HOLD, TRUE);
+ }
+ }
+
+ ipc_importance_unlock();
+
+ if (need_port_lock) {
port_lock_dropped = TRUE;
ip_lock(port);
}
+
return port_lock_dropped;
}
ipc_importance_unlock();
}
- /* decrement port boost count */
+ /*
+ * decrement port boost count
+ * This is OK to do without the importance lock as we atomically
+ * unlinked the kmsg and snapshot the donating state while holding
+ * the importance lock
+ */
if (donating) {
ip_lock(port);
if (III_NULL != inherit) {
ip_unlock(port);
} else {
/* drop importance from port and destination task */
- if (ipc_port_importance_delta(port, -1) == FALSE) {
+ if (ipc_port_importance_delta(port, IPID_OPTION_NORMAL, -1) == FALSE) {
ip_unlock(port);
}
}
} else if (cleared_self_donation) {
ip_lock(port);
/* drop cleared donation from port and destination task */
- if (ipc_port_importance_delta(port, -1) == FALSE) {
+ if (ipc_port_importance_delta(port, IPID_OPTION_NORMAL, -1) == FALSE) {
ip_unlock(port);
}
}
ipc_importance_task_t task_imp = task_self->task_imp_base;
ipc_port_t port = kmsg->ikm_header->msgh_remote_port;
- /* defensive deduction for release builds lacking the assert */
ip_lock(port);
ipc_port_impcount_delta(port, -1, IP_NULL);
ip_unlock(port);
ip_lock(port);
/* inactive ports already had their importance boosts dropped */
if (!ip_active(port) ||
- ipc_port_importance_delta(port, -1) == FALSE) {
+ ipc_port_importance_delta(port, IPID_OPTION_NORMAL, -1) == FALSE) {
ip_unlock(port);
}
}
extern kern_return_t ipc_importance_task_hold_legacy_external_assertion(ipc_importance_task_t task_imp, uint32_t count);
extern kern_return_t ipc_importance_task_drop_legacy_external_assertion(ipc_importance_task_t task_imp, uint32_t count);
+extern boolean_t ipc_importance_check_circularity(ipc_port_t port, ipc_port_t dest);
+
/* prepare importance attributes for sending */
extern boolean_t ipc_importance_send(
ipc_kmsg_t kmsg,
ipc_port_t port;
thread_t th = current_thread();
mach_msg_return_t error = MACH_MSG_SUCCESS;
+ boolean_t kernel_reply = FALSE;
spl_t s;
/* Check if honor qlimit flag is set on thread. */
assert(IP_VALID(port));
ip_lock(port);
/* fall thru with reply - same options */
+ kernel_reply = TRUE;
}
#if IMPORTANCE_INHERITANCE
ipc_kmsg_destroy(kmsg);
return MACH_MSG_SUCCESS;
}
+
+ if (error != MACH_MSG_SUCCESS && kernel_reply) {
+ /*
+ * Kernel reply messages that fail can't be allowed to
+ * pseudo-receive on error conditions. We need to just treat
+ * the message as a successful delivery.
+ */
+ ip_release(port); /* JMM - Future: release right, not just ref */
+ kmsg->ikm_header->msgh_remote_port = MACH_PORT_NULL;
+ ipc_kmsg_destroy(kmsg);
+ return MACH_MSG_SUCCESS;
+ }
return error;
}
ipc_port_t dport = (ipc_port_t)dest_port;
/* dport still locked from above */
- if (ipc_port_importance_delta(dport, 1) == FALSE) {
+ if (ipc_port_importance_delta(dport, IPID_OPTION_SENDPOSSIBLE, 1) == FALSE) {
ip_unlock(dport);
}
}
if (port->ip_impdonation != 0 &&
port->ip_spimportant == 0 &&
(task_is_importance_donor(current_task()))) {
- port->ip_spimportant = 1;
*importantp = TRUE;
}
#endif /* IMPORTANCE_INHERTANCE */
(port->ip_spimportant == 0) &&
(((option & MACH_SEND_IMPORTANCE) != 0) ||
(task_is_importance_donor(current_task())))) {
- port->ip_spimportant = 1;
return TRUE;
}
#else
* but guaranteeing that this doesn't create a circle
* port->ip_destination->ip_destination->... == port
*
- * Additionally, if port was successfully changed to "in transit",
- * propagate boost assertions from the "in limbo" port to all
- * the ports in the chain, and, if the destination task accepts
- * boosts, to the destination task.
- *
* Conditions:
* No ports locked. References held for "port" and "dest".
*/
ipc_port_t port,
ipc_port_t dest)
{
- ipc_port_t base;
-
#if IMPORTANCE_INHERITANCE
- ipc_importance_task_t imp_task = IIT_NULL;
- ipc_importance_task_t release_imp_task = IIT_NULL;
- int assertcnt = 0;
-#endif /* IMPORTANCE_INHERITANCE */
+ /* adjust importance counts at the same time */
+ return ipc_importance_check_circularity(port, dest);
+#else
+ ipc_port_t base;
assert(port != IP_NULL);
assert(dest != IP_NULL);
* First try a quick check that can run in parallel.
* No circularity if dest is not in transit.
*/
-
ip_lock(port);
if (ip_lock_try(dest)) {
if (!ip_active(dest) ||
ip_reference(dest);
port->ip_destination = dest;
-#if IMPORTANCE_INHERITANCE
- /* must have been in limbo or still bound to a task */
- assert(port->ip_tempowner != 0);
-
- /*
- * We delayed dropping assertions from a specific task.
- * Cache that info now (we'll drop assertions and the
- * task reference below).
- */
- release_imp_task = port->ip_imp_task;
- if (IIT_NULL != release_imp_task) {
- port->ip_imp_task = IIT_NULL;
- }
- assertcnt = port->ip_impcount;
-
- /* take the port out of limbo w.r.t. assertions */
- port->ip_tempowner = 0;
-
-#endif /* IMPORTANCE_INHERITANCE */
-
/* now unlock chain */
ip_unlock(port);
for (;;) {
-
-#if IMPORTANCE_INHERITANCE
- /* every port along chain track assertions behind it */
- dest->ip_impcount += assertcnt;
-#endif /* IMPORTANCE_INHERITANCE */
-
if (dest == base)
break;
assert(dest->ip_receiver_name == MACH_PORT_NULL);
assert(dest->ip_destination != IP_NULL);
-#if IMPORTANCE_INHERITANCE
- assert(dest->ip_tempowner == 0);
-#endif /* IMPORTANCE_INHERITANCE */
-
port = dest->ip_destination;
ip_unlock(dest);
dest = port;
(base->ip_receiver_name != MACH_PORT_NULL) ||
(base->ip_destination == IP_NULL));
-#if IMPORTANCE_INHERITANCE
- /*
- * Find the task to boost (if any).
- * We will boost "through" ports that don't know
- * about inheritance to deliver receive rights that
- * do.
- */
- if (ip_active(base) && (assertcnt > 0)) {
- if (base->ip_tempowner != 0) {
- if (IIT_NULL != base->ip_imp_task) {
- /* specified tempowner task */
- imp_task = base->ip_imp_task;
- assert(ipc_importance_task_is_any_receiver_type(imp_task));
- }
- /* otherwise don't boost current task */
-
- } else if (base->ip_receiver_name != MACH_PORT_NULL) {
- ipc_space_t space = base->ip_receiver;
-
- /* only spaces with boost-accepting tasks */
- if (space->is_task != TASK_NULL &&
- ipc_importance_task_is_any_receiver_type(space->is_task->task_imp_base))
- imp_task = space->is_task->task_imp_base;
- }
-
- /* take reference before unlocking base */
- if (imp_task != IIT_NULL) {
- ipc_importance_task_reference(imp_task);
- }
- }
-#endif /* IMPORTANCE_INHERITANCE */
-
ip_unlock(base);
-#if IMPORTANCE_INHERITANCE
- /*
- * Transfer assertions now that the ports are unlocked.
- * Avoid extra overhead if transferring to/from the same task.
- */
- boolean_t transfer_assertions = (imp_task != release_imp_task) ? TRUE : FALSE;
-
- if (imp_task != IIT_NULL) {
- if (transfer_assertions)
- ipc_importance_task_hold_internal_assertion(imp_task, assertcnt);
- ipc_importance_task_release(imp_task);
- imp_task = IIT_NULL;
- }
-
- if (release_imp_task != IIT_NULL) {
- if (transfer_assertions)
- ipc_importance_task_drop_internal_assertion(release_imp_task, assertcnt);
- ipc_importance_task_release(release_imp_task);
- release_imp_task = IIT_NULL;
- }
-#endif /* IMPORTANCE_INHERITANCE */
-
return FALSE;
+#endif /* !IMPORTANCE_INHERITANCE */
}
/*
}
absdelta = 0 - delta;
- //assert(port->ip_impcount >= absdelta);
- /* if we have enough to deduct, we're done */
if (port->ip_impcount >= absdelta) {
port->ip_impcount -= absdelta;
return delta;
}
-#if DEVELOPMENT || DEBUG
+#if (DEVELOPMENT || DEBUG)
if (port->ip_receiver_name != MACH_PORT_NULL) {
task_t target_task = port->ip_receiver->is_task;
ipc_importance_task_t target_imp = target_task->task_imp_base;
printf("Over-release of importance assertions for port 0x%x receiver pid %d (%s), "
"dropping %d assertion(s) but port only has %d remaining.\n",
port->ip_receiver_name,
- target_imp->iit_bsd_pid, target_imp->iit_procname,
+ target_pid, target_procname,
absdelta, port->ip_impcount);
} else if (base != IP_NULL) {
target_procname = "unknown";
target_pid = -1;
}
- printf("Over-release of importance assertions for port %p "
+ printf("Over-release of importance assertions for port 0x%lx "
"enqueued on port 0x%x with receiver pid %d (%s), "
"dropping %d assertion(s) but port only has %d remaining.\n",
- port, base->ip_receiver_name,
- target_imp->iit_bsd_pid, target_imp->iit_procname,
+ (unsigned long)VM_KERNEL_UNSLIDE_OR_PERM((uintptr_t)port),
+ base->ip_receiver_name,
+ target_pid, target_procname,
absdelta, port->ip_impcount);
}
#endif
+
delta = 0 - port->ip_impcount;
port->ip_impcount = 0;
return delta;
* and if so, apply the delta.
* Conditions:
* The port is referenced and locked on entry.
+ * Importance may be locked.
* Nothing else is locked.
* The lock may be dropped on exit.
* Returns TRUE if lock was dropped.
boolean_t
ipc_port_importance_delta_internal(
ipc_port_t port,
+ natural_t options,
mach_port_delta_t *deltap,
ipc_importance_task_t *imp_task)
{
if (*deltap == 0)
return FALSE;
+ assert(options == IPID_OPTION_NORMAL || options == IPID_OPTION_SENDPOSSIBLE);
+
base = port;
/* if port is in transit, have to search for end of chain */
ipc_port_multiple_unlock();
}
- /* unlock down to the base, adding a boost at each level */
+ /*
+ * If the port lock is dropped b/c the port is in transit, there is a
+ * race window where another thread can drain messages and/or fire a
+ * send possible notification before we get here.
+ *
+ * We solve this race by checking to see if our caller armed the send
+ * possible notification, whether or not it's been fired yet, and
+ * whether or not we've already set the port's ip_spimportant bit. If
+ * we don't need a send-possible boost, then we'll just apply a
+ * harmless 0-boost to the port.
+ */
+ if (options & IPID_OPTION_SENDPOSSIBLE) {
+ assert(*deltap == 1);
+ if (port->ip_sprequests && port->ip_spimportant == 0)
+ port->ip_spimportant = 1;
+ else
+ *deltap = 0;
+ }
+
+ /* unlock down to the base, adjusting boost(s) at each level */
for (;;) {
- /*
- * JMM TODO - because of the port unlock to grab the multiple lock
- * above, a subsequent drop of importance could race and beat
- * the "previous" increase - causing the port impcount to go
- * negative briefly. The defensive deduction performed by
- * ipc_port_impcount_delta() defeats that, and therefore can
- * cause an importance leak once the increase finally arrives.
- *
- * Need to rework the importance delta logic to be more like
- * ipc_importance_inherit_from() where it locks all it needs in
- * one pass to avoid any lock drops - to keep that race from
- * ever occuring.
- */
*deltap = ipc_port_impcount_delta(port, *deltap, base);
if (port == base) {
boolean_t
ipc_port_importance_delta(
ipc_port_t port,
+ natural_t options,
mach_port_delta_t delta)
{
ipc_importance_task_t imp_task = IIT_NULL;
boolean_t dropped;
- dropped = ipc_port_importance_delta_internal(port, &delta, &imp_task);
+ dropped = ipc_port_importance_delta_internal(port, options, &delta, &imp_task);
- if (IIT_NULL == imp_task)
+ if (IIT_NULL == imp_task || delta == 0)
return dropped;
- if (!dropped) {
- dropped = TRUE;
+ if (!dropped)
ip_unlock(port);
- }
assert(ipc_importance_task_is_any_receiver_type(imp_task));
ipc_importance_task_drop_internal_assertion(imp_task, -delta);
ipc_importance_task_release(imp_task);
- return dropped;
+ return TRUE;
}
#endif /* IMPORTANCE_INHERITANCE */
ipc_port_t dest);
#if IMPORTANCE_INHERITANCE
+
+enum {
+ IPID_OPTION_NORMAL = 0, /* normal boost */
+ IPID_OPTION_SENDPOSSIBLE = 1, /* send-possible induced boost */
+};
+
/* apply importance delta to port only */
extern mach_port_delta_t
ipc_port_impcount_delta(
extern boolean_t
ipc_port_importance_delta_internal(
ipc_port_t port,
- mach_port_delta_t *delta,
+ natural_t options,
+ mach_port_delta_t *deltap,
ipc_importance_task_t *imp_task);
/* Apply an importance delta to a port and reflect change in receiver task */
extern boolean_t
ipc_port_importance_delta(
ipc_port_t port,
+ natural_t options,
mach_port_delta_t delta);
#endif /* IMPORTANCE_INHERITANCE */
#if IMPORTANCE_INHERITANCE
if (needboost == TRUE) {
- if (ipc_port_importance_delta(port, 1) == FALSE)
+ if (ipc_port_importance_delta(port, IPID_OPTION_SENDPOSSIBLE, 1) == FALSE)
ip_unlock(port);
} else
#endif /* IMPORTANCE_INHERITANCE */
* manager referenced during the callout.
*/
ivgt_lookup(key_index, FALSE, &manager, NULL);
- assert(IVAM_NULL != manager);
+ if (IVAM_NULL == manager) {
+ return KERN_INVALID_ARGUMENT;
+ }
/*
* Get the value(s) to pass to the manager
* manager referenced during the callout.
*/
ivgt_lookup(key_index, FALSE, &manager, NULL);
- assert(IVAM_NULL != manager);
+ if (IVAM_NULL == manager) {
+ return KERN_INVALID_ARGUMENT;
+ }
/*
* Get the value(s) to pass to the manager
if (recipe_size - recipe_used < sizeof(*recipe))
return KERN_NO_SPACE;
- recipe = (mach_voucher_attr_recipe_t)(void *)&recipes[recipe_used];
- content_size = recipe_size - recipe_used - sizeof(*recipe);
-
/*
* Get the manager for this key_index. The
* existence of a non-default value for this
*/
ivgt_lookup(key_index, FALSE, &manager, NULL);
assert(IVAM_NULL != manager);
+ if (IVAM_NULL == manager) {
+ continue;
+ }
+
+ recipe = (mach_voucher_attr_recipe_t)(void *)&recipes[recipe_used];
+ content_size = recipe_size - recipe_used - sizeof(*recipe);
/*
* Get the value(s) to pass to the manager
* execution.
*/
ivgt_lookup(key_index, TRUE, &manager, &control);
- assert(IVAM_NULL != manager);
+ if (IVAM_NULL == manager) {
+ return KERN_INVALID_ARGUMENT;
+ }
/*
* Get the values for this <voucher, key> pair
*/
int coalitions_get_list(int type, struct procinfo_coalinfo *coal_list, int list_sz);
boolean_t coalition_is_leader(task_t task, int coal_type, coalition_t *coal);
+task_t coalition_get_leader(coalition_t coal);
int coalition_get_task_count(coalition_t coal);
uint64_t coalition_get_page_count(coalition_t coal, int *ntasks);
int coalition_get_pid_list(coalition_t coal, uint32_t rolemask, int sort_order,
uint64_t bytesread;
uint64_t byteswritten;
uint64_t gpu_time;
+ uint64_t logical_immediate_writes;
+ uint64_t logical_deferred_writes;
+ uint64_t logical_invalidated_writes;
+ uint64_t logical_metadata_writes;
uint64_t task_count; /* tasks that have started in this coalition */
uint64_t dead_task_count; /* tasks that have exited in this coalition;
cr->bytesread += task->task_io_stats->disk_reads.size;
cr->byteswritten += task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size;
cr->gpu_time += task_gpu_utilisation(task);
+ cr->logical_immediate_writes += task->task_immediate_writes;
+ cr->logical_deferred_writes += task->task_deferred_writes;
+ cr->logical_invalidated_writes += task->task_invalidated_writes;
+ cr->logical_metadata_writes += task->task_metadata_writes;
/* remove the task from the coalition's list */
remqueue(&task->task_coalition[COALITION_TYPE_RESOURCE]);
uint64_t bytesread = coal->r.bytesread;
uint64_t byteswritten = coal->r.byteswritten;
uint64_t gpu_time = coal->r.gpu_time;
+ uint64_t logical_immediate_writes = coal->r.logical_immediate_writes;
+ uint64_t logical_deferred_writes = coal->r.logical_deferred_writes;
+ uint64_t logical_invalidated_writes = coal->r.logical_invalidated_writes;
+ uint64_t logical_metadata_writes = coal->r.logical_metadata_writes;
int64_t cpu_time_billed_to_me = 0;
int64_t cpu_time_billed_to_others = 0;
bytesread += task->task_io_stats->disk_reads.size;
byteswritten += task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size;
gpu_time += task_gpu_utilisation(task);
+ logical_immediate_writes += task->task_immediate_writes;
+ logical_deferred_writes += task->task_deferred_writes;
+ logical_invalidated_writes += task->task_invalidated_writes;
+ logical_metadata_writes += task->task_metadata_writes;
cpu_time_billed_to_me += (int64_t)bank_billed_time(task->bank_context);
cpu_time_billed_to_others += (int64_t)bank_serviced_time(task->bank_context);
}
cru_out->bytesread = bytesread;
cru_out->byteswritten = byteswritten;
cru_out->gpu_time = gpu_time;
+ cru_out->logical_immediate_writes = logical_immediate_writes;
+ cru_out->logical_deferred_writes = logical_deferred_writes;
+ cru_out->logical_invalidated_writes = logical_invalidated_writes;
+ cru_out->logical_metadata_writes = logical_metadata_writes;
ledger_dereference(sum_ledger);
sum_ledger = LEDGER_NULL;
}
+task_t coalition_get_leader(coalition_t coal)
+{
+ task_t leader = TASK_NULL;
+
+ if (!coal)
+ return TASK_NULL;
+
+ coalition_lock(coal);
+ if (coal->type != COALITION_TYPE_JETSAM)
+ goto out_unlock;
+
+ leader = coal->j.leader;
+ if (leader != TASK_NULL)
+ task_reference(leader);
+
+out_unlock:
+ coalition_unlock(coal);
+ return leader;
+}
+
+
int coalition_get_task_count(coalition_t coal)
{
int ntasks = 0;
nestedpanic +=1;
PANIC_UNLOCK();
Debugger("double panic");
- printf("double panic: We are hanging here...\n");
+ // a printf statement here was removed to avoid a panic-loop caused
+ // by a panic from printf
panic_stop();
/* NOTREACHED */
}
panicstr = (char *)0;
PANIC_UNLOCK();
+#if DEVELOPMENT || DEBUG
if (return_on_panic) {
panic_normal();
enable_preemption();
splx(s);
return;
}
+#else
+ (void)s;
+#endif
kdb_printf("panic: We are hanging here...\n");
panic_stop();
/* NOTREACHED */
ipc_kmsg_t reply;
kern_return_t kr;
ipc_port_t *destp;
+ ipc_port_t replyp = IPC_PORT_NULL;
mach_msg_format_0_trailer_t *trailer;
register mig_hash_t *ptr;
}
else {
if (!ipc_kobject_notify(request->ikm_header, reply->ikm_header)){
-#if MACH_IPC_TEST
+#if DEVELOPMENT || DEBUG
printf("ipc_kobject_server: bogus kernel message, id=%d\n",
request->ikm_header->msgh_id);
-#endif /* MACH_IPC_TEST */
+#endif /* DEVELOPMENT || DEBUG */
_MIG_MSGID_INVALID(request->ikm_header->msgh_id);
((mig_reply_error_t *) reply->ikm_header)->RetCode
ipc_kmsg_destroy(request);
}
+ replyp = (ipc_port_t)reply->ikm_header->msgh_remote_port;
+
if (kr == MIG_NO_REPLY) {
/*
* The server function will send a reply message
ipc_kmsg_free(reply);
return IKM_NULL;
- } else if (!IP_VALID((ipc_port_t)reply->ikm_header->msgh_remote_port)) {
+ } else if (!IP_VALID(replyp)) {
/*
* Can't queue the reply message if the destination
* (the reply port) isn't valid.
ipc_kmsg_destroy(reply);
+ return IKM_NULL;
+ } else if (replyp->ip_receiver == ipc_space_kernel) {
+ /*
+ * Don't send replies to kobject kernel ports
+ */
+#if DEVELOPMENT || DEBUG
+ printf("%s: refusing to send reply to kobject %d port (id:%d)\n",
+ __func__, ip_kotype(replyp),
+ request->ikm_header->msgh_id);
+#endif /* DEVELOPMENT || DEBUG */
+ ipc_kmsg_destroy(reply);
return IKM_NULL;
}
mach_msg_header_t *request_header,
mach_msg_header_t *reply_header)
{
+ mach_msg_max_trailer_t * trailer;
ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port;
((mig_reply_error_t *) reply_header)->RetCode = MIG_NO_REPLY;
+
+ trailer = (mach_msg_max_trailer_t *)
+ ((vm_offset_t)request_header + request_header->msgh_size);
+ if (0 != bcmp(&trailer->msgh_audit, &KERNEL_AUDIT_TOKEN,
+ sizeof(trailer->msgh_audit))) {
+ return FALSE;
+ }
+ if (0 != bcmp(&trailer->msgh_sender, &KERNEL_SECURITY_TOKEN,
+ sizeof(trailer->msgh_sender))) {
+ return FALSE;
+ }
+
switch (request_header->msgh_id) {
case MACH_NOTIFY_NO_SENDERS:
switch (ip_kotype(port)) {
int
kpc_set_config(uint32_t classes, kpc_config_t *configv)
{
+ int ret = 0;
struct kpc_config_remote mp_config = {
.classes = classes, .configv = configv,
.pmc_mask = kpc_get_configurable_pmc_mask(classes)
if (classes & KPC_CLASS_POWER_MASK)
mp_config.classes |= KPC_CLASS_CONFIGURABLE_MASK;
- kpc_set_config_arch( &mp_config );
+ ret = kpc_set_config_arch( &mp_config );
lck_mtx_unlock(&kpc_config_lock);
- return 0;
+ return ret;
}
/* allocate a buffer large enough for all possible counters */
assert(host_priv == &realhost);
+#if DEVELOPMENT || DEBUG
if (options & HOST_REBOOT_DEBUGGER) {
Debugger("Debugger");
return (KERN_SUCCESS);
}
+#endif
if (options & HOST_REBOOT_UPSDELAY) {
// UPS power cutoff path
KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
- VM_KERNEL_UNSLIDE(event), 0, 0, 0, 0);
+ VM_KERNEL_UNSLIDE_OR_PERM(event), 0, 0, 0, 0);
struct waitq *waitq;
waitq = global_eventq(event);
KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
- VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0);
+ VM_KERNEL_UNSLIDE_OR_PERM(event), interruptible, deadline, 0, 0);
wresult = waitq_assert_wait64_locked(waitq, CAST_EVENT64_T(event),
interruptible,
KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
- VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0);
+ VM_KERNEL_UNSLIDE_OR_PERM(event), interruptible, deadline, 0, 0);
wresult = waitq_assert_wait64_locked(waitq, CAST_EVENT64_T(event),
interruptible,
KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
- VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0);
+ VM_KERNEL_UNSLIDE_OR_PERM(event), interruptible, deadline, 0, 0);
wresult = waitq_assert_wait64_locked(waitq, CAST_EVENT64_T(event),
interruptible,
KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
- VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0);
+ VM_KERNEL_UNSLIDE_OR_PERM(event), interruptible, deadline, 0, 0);
wresult = waitq_assert_wait64_locked(waitq, CAST_EVENT64_T(event),
interruptible,
csr_init();
#endif
+ if (PE_i_can_has_debugger(NULL) &&
+ PE_parse_boot_argn("-show_pointers", &namep, sizeof (namep))) {
+ doprnt_hide_pointers = FALSE;
+ }
+
kernel_bootstrap_log("stackshot_lock_init");
stackshot_lock_init();
vm_commpage_init();
vm_commpage_text_init();
-
#if CONFIG_MACF
kernel_bootstrap_log("mac_policy_initmach");
mac_policy_initmach();
#endif
+
#if CONFIG_SCHED_SFI
kernel_bootstrap_log("sfi_init");
sfi_init();
/*
* Routine: semaphore_destroy_internal
*
- * This call will only succeed if the specified task is the SAME task
- * specified at the semaphore's creation.
+ * Disassociate a semaphore from its owning task, mark it inactive,
+ * and set any waiting threads running with THREAD_RESTART.
*
- * All threads currently blocked on the semaphore are awoken. These
- * threads will return with the KERN_TERMINATED error.
+ * Conditions:
+ * task is locked
+ * semaphore is locked
+ * semaphore is owned by the specified task
+ * Returns:
+ * with semaphore unlocked
*/
-kern_return_t
+static void
semaphore_destroy_internal(
task_t task,
semaphore_t semaphore)
{
int old_count;
- spl_t spl_level;
-
- /*
- * Disown semaphore
- */
- task_lock(task);
- if (semaphore->owner != task) {
- task_unlock(task);
- return KERN_INVALID_ARGUMENT;
- }
- spl_level = splsched();
- semaphore_lock(semaphore);
+ /* unlink semaphore from owning task */
+ assert(semaphore->owner == task);
remqueue((queue_entry_t) semaphore);
semaphore->owner = TASK_NULL;
task->semaphores_owned--;
- task_unlock(task);
-
/*
* Deactivate semaphore
*/
} else {
semaphore_unlock(semaphore);
}
- splx(spl_level);
-
- return KERN_SUCCESS;
}
/*
task_t task,
semaphore_t semaphore)
{
- kern_return_t kr;
+ spl_t spl_level;
if (semaphore == SEMAPHORE_NULL)
return KERN_INVALID_ARGUMENT;
if (task == TASK_NULL) {
- kr = KERN_INVALID_ARGUMENT;
- } else {
- kr = semaphore_destroy_internal(task, semaphore);
+ semaphore_dereference(semaphore);
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ task_lock(task);
+ spl_level = splsched();
+ semaphore_lock(semaphore);
+
+ if (semaphore->owner != task) {
+ semaphore_unlock(semaphore);
+ splx(spl_level);
+ task_unlock(task);
+ return KERN_INVALID_ARGUMENT;
}
+
+ semaphore_destroy_internal(task, semaphore);
+ /* semaphore unlocked */
+
+ splx(spl_level);
+ task_unlock(task);
+
semaphore_dereference(semaphore);
- return kr;
+ return KERN_SUCCESS;
+}
+
+/*
+ * Routine: semaphore_destroy_all
+ *
+ * Destroy all the semaphores associated with a given task.
+ */
+#define SEMASPERSPL 20 /* max number of semaphores to destroy per spl hold */
+
+void
+semaphore_destroy_all(
+ task_t task)
+{
+ uint32_t count;
+ spl_t spl_level;
+
+ count = 0;
+ task_lock(task);
+ while (!queue_empty(&task->semaphore_list)) {
+ semaphore_t semaphore;
+
+ semaphore = (semaphore_t) queue_first(&task->semaphore_list);
+
+ if (count == 0)
+ spl_level = splsched();
+ semaphore_lock(semaphore);
+
+ semaphore_destroy_internal(task, semaphore);
+ /* semaphore unlocked */
+
+ /* throttle number of semaphores per interrupt disablement */
+ if (++count == SEMASPERSPL) {
+ count = 0;
+ splx(spl_level);
+ }
+ }
+ if (count != 0)
+ splx(spl_level);
+
+ task_unlock(task);
}
/*
semaphore_dereference(
semaphore_t semaphore)
{
+ uint32_t collisions;
+ spl_t spl_level;
+
if (semaphore == NULL)
return;
assert(!port->ip_srights);
ipc_port_dealloc_kernel(port);
}
- if (semaphore->active) {
- assert(semaphore->owner != TASK_NULL);
- semaphore_destroy_internal(semaphore->owner, semaphore);
+
+ /*
+ * Lock the semaphore to lock in the owner task reference.
+ * Then continue to try to lock the task (inverse order).
+ */
+ spl_level = splsched();
+ semaphore_lock(semaphore);
+ for (collisions = 0; semaphore->active; collisions++) {
+ task_t task = semaphore->owner;
+
+ assert(task != TASK_NULL);
+
+ if (task_lock_try(task)) {
+ semaphore_destroy_internal(task, semaphore);
+ /* semaphore unlocked */
+ splx(spl_level);
+ task_unlock(task);
+ goto out;
+ }
+
+ /* failed to get out-of-order locks */
+ semaphore_unlock(semaphore);
+ splx(spl_level);
+ mutex_pause(collisions);
+ spl_level = splsched();
+ semaphore_lock(semaphore);
}
+ semaphore_unlock(semaphore);
+ splx(spl_level);
+
+ out:
zfree(semaphore_zone, semaphore);
}
extern void semaphore_reference(semaphore_t semaphore);
extern void semaphore_dereference(semaphore_t semaphore);
-extern kern_return_t semaphore_destroy_internal(task_t task, semaphore_t semaphore);
+extern void semaphore_destroy_all(task_t task);
#endif /* MACH_KERNEL_PRIVATE */
#endif
};
+/* System sleep state */
+boolean_t tasks_suspend_state;
+
+
void init_task_ledgers(void);
void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
assert(new_task->task_io_stats != NULL);
bzero(new_task->task_io_stats, sizeof(struct io_stat_info));
+ new_task->task_immediate_writes = 0;
+ new_task->task_deferred_writes = 0;
+ new_task->task_invalidated_writes = 0;
+ new_task->task_metadata_writes = 0;
bzero(&(new_task->cpu_time_qos_stats), sizeof(struct _cpu_time_qos_stats));
lck_mtx_lock(&tasks_threads_lock);
queue_enter(&tasks, new_task, task_t, tasks);
tasks_count++;
+ if (tasks_suspend_state) {
+ task_suspend_internal(new_task);
+ }
lck_mtx_unlock(&tasks_threads_lock);
*child_task = new_task;
return (KERN_SUCCESS);
}
+void
+tasks_system_suspend(boolean_t suspend)
+{
+ task_t task;
+
+ lck_mtx_lock(&tasks_threads_lock);
+ assert(tasks_suspend_state != suspend);
+ tasks_suspend_state = suspend;
+ queue_iterate(&tasks, task, task_t, tasks) {
+ if (task == kernel_task) {
+ continue;
+ }
+ suspend ? task_suspend_internal(task) : task_resume_internal(task);
+ }
+ lck_mtx_unlock(&tasks_threads_lock);
+}
+
/*
* task_start_halt:
*
void
task_synchronizer_destroy_all(task_t task)
{
- semaphore_t semaphore;
-
/*
* Destroy owned semaphores
*/
-
- while (!queue_empty(&task->semaphore_list)) {
- semaphore = (semaphore_t) queue_first(&task->semaphore_list);
- (void) semaphore_destroy_internal(task, semaphore);
- }
+ semaphore_destroy_all(task);
}
/*
/* We don't need the lock to read this flag */
return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
}
+
+void task_update_logical_writes(task_t task, uint32_t io_size, int flags)
+{
+ KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE, task_pid(task), io_size, flags, 0, 0);
+ switch(flags) {
+ case TASK_WRITE_IMMEDIATE:
+ OSAddAtomic64(io_size, (SInt64 *)&(task->task_immediate_writes));
+ break;
+ case TASK_WRITE_DEFERRED:
+ OSAddAtomic64(io_size, (SInt64 *)&(task->task_deferred_writes));
+ break;
+ case TASK_WRITE_INVALIDATED:
+ OSAddAtomic64(io_size, (SInt64 *)&(task->task_invalidated_writes));
+ break;
+ case TASK_WRITE_METADATA:
+ OSAddAtomic64(io_size, (SInt64 *)&(task->task_metadata_writes));
+ break;
+ }
+ return;
+}
low_mem_privileged_listener :1, /* if set, task would like to know about pressure changes before other tasks on the system */
mem_notify_reserved :27; /* reserved for future use */
- io_stat_info_t task_io_stats;
-
+ io_stat_info_t task_io_stats;
+ uint64_t task_immediate_writes __attribute__((aligned(8)));
+ uint64_t task_deferred_writes __attribute__((aligned(8)));
+ uint64_t task_invalidated_writes __attribute__((aligned(8)));
+ uint64_t task_metadata_writes __attribute__((aligned(8)));
+
/*
* The cpu_time_qos_stats fields are protected by the task lock
*/
uint32_t pid,
uint64_t uniqueid);
+extern void tasks_system_suspend(boolean_t suspend);
+
#if CONFIG_FREEZE
/* Freeze a task's resident pages */
extern void task_set_gpu_denied(task_t task, boolean_t denied);
extern boolean_t task_is_gpu_denied(task_t task);
+#define TASK_WRITE_IMMEDIATE 0x1
+#define TASK_WRITE_DEFERRED 0x2
+#define TASK_WRITE_INVALIDATED 0x4
+#define TASK_WRITE_METADATA 0x8
+extern void task_update_logical_writes(task_t task, uint32_t io_size, int flags);
+
#endif /* XNU_KERNEL_PRIVATE */
#ifdef KERNEL_PRIVATE
}
}
-#define UPDATE_IO_STATS(info, size) \
-{ \
- info.count++; \
- info.size += size; \
-}
-
-#define UPDATE_IO_STATS_ATOMIC(info, size) \
-{ \
- OSIncrementAtomic64((SInt64 *)&(info.count)); \
- OSAddAtomic64(size, (SInt64 *)&(info.size)); \
-}
-
void thread_update_io_stats(thread_t thread, int size, int io_flags)
{
int io_tier;
#if DEVELOPMENT || DEBUG
KERNEL_DEBUG_CONSTANT(
MACHDBG_CODE(DBG_MACH_SCHED,MACH_CALLOUT) | DBG_FUNC_NONE,
- VM_KERNEL_UNSLIDE(func), param0, param1, 0, 0);
+ VM_KERNEL_UNSLIDE(func), VM_KERNEL_UNSLIDE_OR_PERM(param0), VM_KERNEL_UNSLIDE_OR_PERM(param1), 0, 0);
#endif /* DEVELOPMENT || DEBUG */
#if CONFIG_DTRACE
#if TIMER_ASSERT
TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
- call,
+ VM_KERNEL_UNSLIDE_OR_PERM(call),
call->async_dequeue,
- TCE(call)->queue,
+ VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
0x1c, 0);
timer_call_enqueue_deadline_unlocked_async1++;
#endif
#if TIMER_ASSERT
TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
- call,
+ VM_KERNEL_UNSLIDE_OR_PERM(call),
call->async_dequeue,
- TCE(call)->queue,
+ VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
0, 0);
#endif
if (old_queue != NULL) {
#if TIMER_ASSERT
TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
- call,
+ VM_KERNEL_UNSLIDE_OR_PERM(call),
call->async_dequeue,
- TCE(call)->queue,
+ VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
0x1c, 0);
timer_call_dequeue_unlocked_async1++;
#endif
TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
DECR_TIMER_ENTER | DBG_FUNC_START,
- call,
- param1, deadline, flags, 0);
+ VM_KERNEL_UNSLIDE_OR_PERM(call),
+ VM_KERNEL_UNSLIDE_OR_PERM(param1), deadline, flags, 0);
urgency = (flags & TIMER_CALL_URGENCY_MASK);
TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
DECR_TIMER_ENTER | DBG_FUNC_END,
- call,
+ VM_KERNEL_UNSLIDE_OR_PERM(call),
(old_queue != NULL), deadline, queue->count, 0);
splx(s);
TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
DECR_TIMER_CANCEL | DBG_FUNC_START,
- call,
+ VM_KERNEL_UNSLIDE_OR_PERM(call),
TCE(call)->deadline, call->soft_deadline, call->flags, 0);
old_queue = timer_call_dequeue_unlocked(call);
}
TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
DECR_TIMER_CANCEL | DBG_FUNC_END,
- call,
- old_queue,
+ VM_KERNEL_UNSLIDE_OR_PERM(call),
+ VM_KERNEL_UNSLIDE_OR_PERM(old_queue),
TCE(call)->deadline - mach_absolute_time(),
TCE(call)->deadline - TCE(call)->entry_time, 0);
splx(s);
#if TIMER_ASSERT
TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
- call,
+ VM_KERNEL_UNSLIDE_OR_PERM(call),
call->async_dequeue,
- TCE(call)->queue,
+ VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
0x2b, 0);
#endif
timer_queue_unlock(queue);
TCOAL_DEBUG(0xDDDD0000, queue->earliest_soft_deadline, call->soft_deadline, 0, 0, 0);
TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
DECR_TIMER_EXPIRE | DBG_FUNC_NONE,
- call,
+ VM_KERNEL_UNSLIDE_OR_PERM(call),
call->soft_deadline,
TCE(call)->deadline,
TCE(call)->entry_time, 0);
TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
DECR_TIMER_CALLOUT | DBG_FUNC_START,
- call, VM_KERNEL_UNSLIDE(func), param0, param1, 0);
+ VM_KERNEL_UNSLIDE_OR_PERM(call), VM_KERNEL_UNSLIDE(func),
+ VM_KERNEL_UNSLIDE_OR_PERM(param0),
+ VM_KERNEL_UNSLIDE_OR_PERM(param1),
+ 0);
#if CONFIG_DTRACE
DTRACE_TMR7(callout__start, timer_call_func_t, func,
TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
DECR_TIMER_CALLOUT | DBG_FUNC_END,
- call, VM_KERNEL_UNSLIDE(func), param0, param1, 0);
+ VM_KERNEL_UNSLIDE_OR_PERM(call), VM_KERNEL_UNSLIDE(func),
+ VM_KERNEL_UNSLIDE_OR_PERM(param0),
+ VM_KERNEL_UNSLIDE_OR_PERM(param1),
+ 0);
call = NULL;
timer_queue_lock_spin(queue);
} else {
#ifdef TIMER_ASSERT
TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
- call,
- TCE(call)->queue,
- call->lock.interlock.lock_data,
+ VM_KERNEL_UNSLIDE_OR_PERM(call),
+ VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
+ VM_KERNEL_UNSLIDE_OR_PERM(call->lock.interlock.lock_data),
0x2b, 0);
#endif
timer_queue_migrate_lock_skips++;
call->soft_deadline,
TCE(call)->deadline,
TCE(call)->entry_time,
- TCE(call)->func,
+ VM_KERNEL_UNSLIDE(TCE(call)->func),
0);
call = TIMER_CALL(queue_next(qe(call)));
} while (!queue_end(&queue->head, qe(call)));
#ifdef TIMER_ASSERT
TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
- call,
- TCE(call)->queue,
- call->lock.interlock.lock_data,
+ VM_KERNEL_UNSLIDE_OR_PERM(call),
+ VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
+ VM_KERNEL_UNSLIDE_OR_PERM(call->lock.interlock.lock_data),
0x2c, 0);
#endif
timer_call_entry_dequeue_async(call);
if (deadline < now)
TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
DECR_TIMER_OVERDUE | DBG_FUNC_NONE,
- call,
+ VM_KERNEL_UNSLIDE_OR_PERM(call),
deadline,
now,
threshold,
#endif
TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
DECR_TIMER_ESCALATE | DBG_FUNC_NONE,
- call,
+ VM_KERNEL_UNSLIDE_OR_PERM(call),
TCE(call)->deadline,
TCE(call)->entry_time,
- TCE(call)->func,
+ VM_KERNEL_UNSLIDE(TCE(call)->func),
0);
tlp->escalates++;
timer_call_entry_dequeue(call);
TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
DECR_TIMER_UPDATE | DBG_FUNC_START,
- &tlp->queue,
+ VM_KERNEL_UNSLIDE_OR_PERM(&tlp->queue),
tlp->threshold.deadline,
tlp->threshold.preempted,
tlp->queue.count, 0);
TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
DECR_TIMER_UPDATE | DBG_FUNC_END,
- &tlp->queue,
+ VM_KERNEL_UNSLIDE_OR_PERM(&tlp->queue),
tlp->threshold.deadline,
tlp->threshold.scans,
tlp->queue.count, 0);
type, table);
assert(nelem > 0);
- elem = NULL;
try_again:
+ elem = NULL;
if (ntries++ > max_retries) {
struct wqt_elem *tmp;
if (table->used_elem + nelem >= table_size)
* WQS we're unlinking, or to an invalid object:
* no need to invalidate it
*/
- *wq_setid = right->sl_set_id.id;
+ *wq_setid = right ? right->sl_set_id.id : 0;
lt_invalidate(parent);
wqdbg_v("S1, L");
return left ? WQ_ITERATE_UNLINKED : WQ_ITERATE_INVALID;
* WQS we're unlinking, or to an invalid object:
* no need to invalidate it
*/
- *wq_setid = left->sl_set_id.id;
+ *wq_setid = left ? left->sl_set_id.id : 0;
lt_invalidate(parent);
wqdbg_v("S1, R");
return right ? WQ_ITERATE_UNLINKED : WQ_ITERATE_INVALID;
vm_offset_t zone_map_min_address = 0; /* initialized in zone_init */
vm_offset_t zone_map_max_address = 0;
+/* Globals for random boolean generator for elements in free list */
+#define MAX_ENTROPY_PER_ZCRAM 4
+#define RANDOM_BOOL_GEN_SEED_COUNT 4
+static unsigned int bool_gen_seed[RANDOM_BOOL_GEN_SEED_COUNT];
+static unsigned int bool_gen_global = 0;
+decl_simple_lock_data(, bool_gen_lock)
+
/* Helpful for walking through a zone's free element list. */
struct zone_free_element {
struct zone_free_element *next;
thread_deallocate(z->zone_replenish_thread);
}
+/*
+ * Boolean Random Number Generator for generating booleans to randomize
+ * the order of elements in newly zcram()'ed memory. The algorithm is a
+ * modified version of the KISS RNG proposed in the paper:
+ * http://stat.fsu.edu/techreports/M802.pdf
+ * The modifications have been documented in the technical paper
+ * paper from UCL:
+ * http://www0.cs.ucl.ac.uk/staff/d.jones/GoodPracticeRNG.pdf
+ */
+
+static void random_bool_gen_entropy(
+ int *buffer,
+ int count)
+{
+
+ int i, t;
+ simple_lock(&bool_gen_lock);
+ for (i = 0; i < count; i++) {
+ bool_gen_seed[1] ^= (bool_gen_seed[1] << 5);
+ bool_gen_seed[1] ^= (bool_gen_seed[1] >> 7);
+ bool_gen_seed[1] ^= (bool_gen_seed[1] << 22);
+ t = bool_gen_seed[2] + bool_gen_seed[3] + bool_gen_global;
+ bool_gen_seed[2] = bool_gen_seed[3];
+ bool_gen_global = t < 0;
+ bool_gen_seed[3] = t &2147483647;
+ bool_gen_seed[0] += 1411392427;
+ buffer[i] = (bool_gen_seed[0] + bool_gen_seed[1] + bool_gen_seed[3]);
+ }
+ simple_unlock(&bool_gen_lock);
+}
+
+static boolean_t random_bool_gen(
+ int *buffer,
+ int index,
+ int bufsize)
+{
+ int valindex, bitpos;
+ valindex = (index / (8 * sizeof(int))) % bufsize;
+ bitpos = index % (8 * sizeof(int));
+ return (boolean_t)(buffer[valindex] & (1 << bitpos));
+}
+
+static void
+random_free_to_zone(
+ zone_t zone,
+ vm_offset_t newmem,
+ vm_offset_t first_element_offset,
+ int element_count,
+ boolean_t from_zm,
+ int *entropy_buffer)
+{
+ vm_offset_t last_element_offset;
+ vm_offset_t element_addr;
+ vm_size_t elem_size;
+ int index;
+
+ elem_size = zone->elem_size;
+ last_element_offset = first_element_offset + ((element_count * elem_size) - elem_size);
+ for (index = 0; index < element_count; index++) {
+ assert(first_element_offset <= last_element_offset);
+ if (random_bool_gen(entropy_buffer, index, MAX_ENTROPY_PER_ZCRAM)) {
+ element_addr = newmem + first_element_offset;
+ first_element_offset += elem_size;
+ } else {
+ element_addr = newmem + last_element_offset;
+ last_element_offset -= elem_size;
+ }
+ if (element_addr != (vm_offset_t)zone) {
+ zone->count++; /* compensate for free_to_zone */
+ free_to_zone(zone, element_addr, FALSE);
+ }
+ if (!zone->use_page_list && from_zm) {
+ zone_page_alloc(element_addr, elem_size);
+ }
+ zone->cur_size += elem_size;
+ }
+}
+
/*
* Cram the given memory into the specified zone. Update the zone page count accordingly.
*/
{
vm_size_t elem_size;
boolean_t from_zm = FALSE;
+ vm_offset_t first_element_offset;
+ int element_count;
+ int entropy_buffer[MAX_ENTROPY_PER_ZCRAM];
/* Basic sanity checks */
assert(zone != ZONE_NULL && newmem != (vm_offset_t)0);
elem_size = zone->elem_size;
+ KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_ZALLOC, ZALLOC_ZCRAM) | DBG_FUNC_START, VM_KERNEL_ADDRPERM(zone), size, 0, 0, 0);
+
if (from_zone_map(newmem, size))
from_zm = TRUE;
ZONE_PAGE_COUNT_INCR(zone, (size / PAGE_SIZE));
+ random_bool_gen_entropy(entropy_buffer, MAX_ENTROPY_PER_ZCRAM);
+
lock_zone(zone);
if (zone->use_page_list) {
assert((size & PAGE_MASK) == 0);
for (; size > 0; newmem += PAGE_SIZE, size -= PAGE_SIZE) {
- vm_size_t pos_in_page;
page_metadata = (struct zone_page_metadata *)(newmem);
page_metadata->pages.next = NULL;
enqueue_tail(&zone->pages.all_used, (queue_entry_t)page_metadata);
- vm_offset_t first_element_offset;
if (zone_page_metadata_size % ZONE_ELEMENT_ALIGNMENT == 0){
first_element_offset = zone_page_metadata_size;
} else {
first_element_offset = zone_page_metadata_size + (ZONE_ELEMENT_ALIGNMENT - (zone_page_metadata_size % ZONE_ELEMENT_ALIGNMENT));
}
-
- for (pos_in_page = first_element_offset; (newmem + pos_in_page + elem_size) < (vm_offset_t)(newmem + PAGE_SIZE); pos_in_page += elem_size) {
- page_metadata->alloc_count++;
- zone->count++; /* compensate for free_to_zone */
- free_to_zone(zone, newmem + pos_in_page, FALSE);
- zone->cur_size += elem_size;
- }
- }
- } else {
- while (size >= elem_size) {
- zone->count++; /* compensate for free_to_zone */
- if (newmem == (vm_offset_t)zone) {
- /* Don't free zone_zone zone */
- } else {
- free_to_zone(zone, newmem, FALSE);
- }
- if (from_zm)
- zone_page_alloc(newmem, elem_size);
- size -= elem_size;
- newmem += elem_size;
- zone->cur_size += elem_size;
+ element_count = (int)((PAGE_SIZE - first_element_offset) / elem_size);
+ page_metadata->alloc_count += element_count;
+ random_free_to_zone(zone, newmem, first_element_offset, element_count, from_zm, entropy_buffer);
}
+ } else {
+ first_element_offset = 0;
+ element_count = (int)((size - first_element_offset) / elem_size);
+ random_free_to_zone(zone, newmem, first_element_offset, element_count, from_zm, entropy_buffer);
}
unlock_zone(zone);
+
+ KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_ZALLOC, ZALLOC_ZCRAM) | DBG_FUNC_END, VM_KERNEL_ADDRPERM(zone), 0, 0, 0, 0);
+
}
zone_bootstrap(void)
{
char temp_buf[16];
+ unsigned int i;
if (PE_parse_boot_argn("-zinfop", temp_buf, sizeof(temp_buf))) {
zinfo_per_task = TRUE;
/* Set up zone element poisoning */
zp_init();
+ /* Seed the random boolean generator for elements in zone free list */
+ for (i = 0; i < RANDOM_BOOL_GEN_SEED_COUNT; i++) {
+ bool_gen_seed[i] = (unsigned int)early_random();
+ }
+ simple_lock_init(&bool_gen_lock, 0);
+
/* should zlog log to debug zone corruption instead of leaks? */
if (PE_parse_boot_argn("-zc", temp_buf, sizeof(temp_buf))) {
corruption_debug_flag = TRUE;
/* initialize fake zones and zone info if tracking by task */
if (zinfo_per_task) {
vm_size_t zisize = sizeof(zinfo_usage_store_t) * ZINFO_SLOTS;
- unsigned int i;
for (i = 0; i < num_fake_zones; i++)
fake_zones[i].init(ZINFO_SLOTS - num_fake_zones + i);
uint64_t gpu_time;
uint64_t cpu_time_billed_to_me;
uint64_t cpu_time_billed_to_others;
+ uint64_t logical_immediate_writes;
+ uint64_t logical_deferred_writes;
+ uint64_t logical_invalidated_writes;
+ uint64_t logical_metadata_writes;
};
#ifdef PRIVATE
/* The requested property cannot be changed at this time.
*/
+#define KERN_INSUFFICIENT_BUFFER_SIZE 52
+ /* The provided buffer is of insufficient size for the requested data.
+ */
+
#define KERN_RETURN_MAX 0x100
/* Maximum return value allowable
*/
#define CPUFAMILY_ARM_SWIFT 0x1e2d6381
#define CPUFAMILY_ARM_CYCLONE 0x37a09642
#define CPUFAMILY_ARM_TYPHOON 0x2c91a47e
-#define CPUFAMILY_ARM_TWISTER 0x92fb37c8
/* The following synonyms are deprecated: */
#define CPUFAMILY_INTEL_6_14 CPUFAMILY_INTEL_YONAH
#endif /* PRIVATE */
+#define IO_NUM_PRIORITIES 4
-/*
- * Obsolete interfaces.
- */
-
-#define THREAD_SCHED_TIMESHARE_INFO 10
-#define THREAD_SCHED_RR_INFO 11
-#define THREAD_SCHED_FIFO_INFO 12
+#define UPDATE_IO_STATS(info, size) \
+{ \
+ info.count++; \
+ info.size += size; \
+}
-#define IO_NUM_PRIORITIES 4
+#define UPDATE_IO_STATS_ATOMIC(info, io_size) \
+{ \
+ OSIncrementAtomic64((SInt64 *)&(info.count)); \
+ OSAddAtomic64(io_size, (SInt64 *)&(info.size)); \
+}
struct io_stat_entry {
uint64_t count;
typedef struct io_stat_info *io_stat_info_t;
+/*
+ * Obsolete interfaces.
+ */
+
+#define THREAD_SCHED_TIMESHARE_INFO 10
+#define THREAD_SCHED_RR_INFO 11
+#define THREAD_SCHED_FIFO_INFO 12
+
#endif /* _MACH_THREAD_INFO_H_ */
* VM_KERNEL_UNSLIDE_OR_ADDRPERM:
* Use this macro when you are exposing an address to userspace that could
* come from either kernel text/data *or* the heap. This is a rare case,
- * but one that does come up and must be handled correctly.
+ * but one that does come up and must be handled correctly. If the argument
+ * is known to be lower than any potential heap address, no transformation
+ * is applied, to avoid revealing the operation on a constant.
*
* Nesting of these macros should be considered invalid.
*/
VM_KERNEL_IS_PRELINKINFO(_v) || \
VM_KERNEL_IS_KEXT_LINKEDIT(_v)) ? \
(vm_offset_t)(_v) - vm_kernel_slide : \
- VM_KERNEL_ADDRPERM(_v))
+ ((vm_offset_t)(_v) >= VM_MIN_KERNEL_AND_KEXT_ADDRESS ? VM_KERNEL_ADDRPERM(_v) : (vm_offset_t)(_v)))
#endif /* XNU_KERNEL_PRIVATE */
type symtab_name_t = c_string[*:32];
-type lockgroup_info_t = struct[63] of integer_t;
+type lockgroup_info_t = struct[33] of uint64_t;
type lockgroup_info_array_t = array[] of lockgroup_info_t;
type mach_memory_info_t = struct[8] of uint64_t;
int i;
int orig_offset;
vm_page_t page_run[MAX_RUN];
+ int dirty_count; /* keeps track of number of pages dirtied as part of this uiomove */
object = memory_object_control_to_vm_object(control);
if (object == VM_OBJECT_NULL) {
return 0;
}
orig_offset = start_offset;
-
+
+ dirty_count = 0;
while (io_requested && retval == 0) {
cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE;
if (cur_needed > MAX_RUN)
cur_needed = MAX_RUN;
-
+
for (cur_run = 0; cur_run < cur_needed; ) {
if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
assert(!dst_page->encrypted);
if (mark_dirty) {
+ if (dst_page->dirty == FALSE)
+ dirty_count++;
SET_PAGE_DIRTY(dst_page, FALSE);
if (dst_page->cs_validated &&
!dst_page->cs_tainted) {
orig_offset = 0;
}
vm_object_unlock(object);
-
+ task_update_logical_writes(current_task(), (dirty_count * PAGE_SIZE), TASK_WRITE_DEFERRED);
return (retval);
}
struct vm_page_delayed_work *dwp;
int dw_count;
int dw_limit;
+ int dirty_count;
dwp = &dw_array[0];
dw_count = 0;
dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
+ dirty_count = 0;
for (;
offset < offset_end && object->resident_page_count;
break;
case MEMORY_OBJECT_LOCK_RESULT_MUST_FREE:
+ if (m->dirty == TRUE)
+ dirty_count++;
dwp->dw_mask |= DW_vm_page_free;
break;
break;
}
}
+
+ if (dirty_count) {
+ task_update_logical_writes(current_task(), (dirty_count * PAGE_SIZE), TASK_WRITE_INVALIDATED);
+ }
/*
* We have completed the scan for applicable pages.
* Clean any pages that have been saved.
void pmap_pre_expand(pmap_t pmap, vm_map_offset_t vaddr);
#endif
-unsigned int pmap_query_resident(pmap_t pmap,
- vm_map_offset_t s,
- vm_map_offset_t e,
- unsigned int *compressed_count_p);
+mach_vm_size_t pmap_query_resident(pmap_t pmap,
+ vm_map_offset_t s,
+ vm_map_offset_t e,
+ mach_vm_size_t *compressed_bytes_p);
#if CONFIG_PGTRACE
int pmap_pgtrace_add_page(pmap_t pmap, vm_map_offset_t start, vm_map_offset_t end);
if (task == kernel_task)
return (0);
- if (vm_compressor_mode == COMPRESSED_PAGER_IS_ACTIVE || vm_compressor_mode == DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
+ if (COMPRESSED_PAGER_IS_SWAPLESS || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS)
+ return (0);
+
+ if (COMPRESSED_PAGER_IS_SWAPBACKED || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) {
if ((vm_compressor_low_on_space() || HARD_THROTTLE_LIMIT_REACHED()) &&
(unsigned int)pmap_compressed(task->map->pmap) > (c_segment_pages_compressed / 4))
return (1);
c_seg->c_generation_id > last_c_segment_to_warm_generation_id)
break;
+ if (vm_page_free_count < (AVAILABLE_MEMORY / 4))
+ break;
+
lck_mtx_lock_spin_always(&c_seg->c_lock);
lck_mtx_unlock_always(c_list_lock);
if (m->wpmapped == FALSE) {
vm_object_lock_assert_exclusive(m->object);
-
+ if (!m->object->internal)
+ task_update_logical_writes(current_task(), PAGE_SIZE, TASK_WRITE_DEFERRED);
m->wpmapped = TRUE;
}
if (must_disconnect) {
int superpage;
if (!object->pager_created &&
- object->phys_contiguous) {
+ object->phys_contiguous &&
+ VME_OFFSET(entry) == 0 &&
+ (entry->vme_end - entry->vme_start == object->vo_size) &&
+ VM_MAP_PAGE_ALIGNED(entry->vme_start, (object->vo_size-1))) {
superpage = VM_MEM_SUPERPAGE;
} else {
superpage = 0;
* limit the size of a single extent of wired memory
* to try and limit the damage to the system if
* too many pages get wired down
- * limit raised to 2GB with 128GB max physical limit
+ * limit raised to 2GB with 128GB max physical limit,
+ * but scaled by installed memory above this
*/
- if ( !(flags & KMA_VAONLY) && map_size > (1ULL << 31)) {
+ if ( !(flags & KMA_VAONLY) && map_size > MAX(1ULL<<31, sane_size/64)) {
return KERN_RESOURCE_SHORTAGE;
}
&real_map)) {
vm_map_unlock_read(lookup_map);
+ assert(map_pmap == NULL);
vm_map_unwire(map, start,
s, user_wire);
return(KERN_FAILURE);
if (rc != KERN_SUCCESS) {
/* undo what has been wired so far */
- vm_map_unwire(map, start, s, user_wire);
+ vm_map_unwire_nested(map, start, s, user_wire,
+ map_pmap, pmap_addr);
if (physpage_p) {
*physpage_p = 0;
}
__unused boolean_t src_volatile,
vm_map_copy_t *copy_result, /* OUT */
boolean_t use_maxprot)
+{
+ int flags;
+
+ flags = 0;
+ if (src_destroy) {
+ flags |= VM_MAP_COPYIN_SRC_DESTROY;
+ }
+ if (use_maxprot) {
+ flags |= VM_MAP_COPYIN_USE_MAXPROT;
+ }
+ return vm_map_copyin_internal(src_map,
+ src_addr,
+ len,
+ flags,
+ copy_result);
+}
+kern_return_t
+vm_map_copyin_internal(
+ vm_map_t src_map,
+ vm_map_address_t src_addr,
+ vm_map_size_t len,
+ int flags,
+ vm_map_copy_t *copy_result) /* OUT */
{
vm_map_entry_t tmp_entry; /* Result of last map lookup --
* in multi-level lookup, this
* entry contains the actual
* vm_object/offset.
*/
- register
vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
vm_map_offset_t src_start; /* Start of current entry --
boolean_t map_share=FALSE;
submap_map_t *parent_maps = NULL;
- register
vm_map_copy_t copy; /* Resulting copy */
vm_map_address_t copy_addr;
vm_map_size_t copy_size;
+ boolean_t src_destroy;
+ boolean_t use_maxprot;
+
+ if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
+ use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
/*
* Check for copies of zero bytes.
* setting up VM (and taking C-O-W faults) dominates the copy costs
* for small regions.
*/
- if ((len < msg_ool_size_small) && !use_maxprot)
+ if ((len < msg_ool_size_small) &&
+ !use_maxprot &&
+ !(flags & VM_MAP_COPYIN_ENTRY_LIST))
return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
src_destroy, copy_result);
mach_vm_size_t volatile_pmap_count;
mach_vm_size_t volatile_compressed_pmap_count;
mach_vm_size_t resident_count;
- unsigned int compressed_count;
vm_map_entry_t entry;
vm_object_t object;
for (entry = vm_map_first_entry(map);
entry != vm_map_to_entry(map);
entry = entry->vme_next) {
+ mach_vm_size_t pmap_resident_bytes, pmap_compressed_bytes;
+
if (entry->is_sub_map) {
continue;
}
volatile_compressed_count +=
vm_compressor_pager_get_count(object->pager);
}
- compressed_count = 0;
- volatile_pmap_count += pmap_query_resident(map->pmap,
- entry->vme_start,
- entry->vme_end,
- &compressed_count);
- volatile_compressed_pmap_count += compressed_count;
+ pmap_compressed_bytes = 0;
+ pmap_resident_bytes =
+ pmap_query_resident(map->pmap,
+ entry->vme_start,
+ entry->vme_end,
+ &pmap_compressed_bytes);
+ volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
+ volatile_compressed_pmap_count += (pmap_compressed_bytes
+ / PAGE_SIZE);
}
/* map is still locked on return */
vm_map_copy_t *copy_result, /* OUT */
boolean_t use_maxprot);
+#define VM_MAP_COPYIN_SRC_DESTROY 0x00000001
+#define VM_MAP_COPYIN_USE_MAXPROT 0x00000002
+#define VM_MAP_COPYIN_ENTRY_LIST 0x00000004
+#define VM_MAP_COPYIN_ALL_FLAGS 0x00000007
+extern kern_return_t vm_map_copyin_internal(
+ vm_map_t src_map,
+ vm_map_address_t src_addr,
+ vm_map_size_t len,
+ int flags,
+ vm_map_copy_t *copy_result); /* OUT */
+
extern kern_return_t vm_map_copy_extract(
vm_map_t src_map,
vm_map_address_t src_addr,
void
vm_object_purge(vm_object_t object, int flags)
{
+ unsigned int object_page_count = 0;
+ unsigned int pgcount = 0;
+ boolean_t skipped_object = FALSE;
+
vm_object_lock_assert_exclusive(object);
if (object->purgable == VM_PURGABLE_DENY)
}
assert(object->purgable == VM_PURGABLE_EMPTY);
+ object_page_count = object->resident_page_count;
+
vm_object_reap_pages(object, REAP_PURGEABLE);
if (object->pager != NULL &&
COMPRESSED_PAGER_IS_ACTIVE) {
- unsigned int pgcount;
if (object->activity_in_progress == 0 &&
object->paging_in_progress == 0) {
* pager if there's any kind of operation in
* progress on the VM object.
*/
+ skipped_object = TRUE;
}
}
vm_object_lock_assert_exclusive(object);
+
+ KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_VM, OBJECT_PURGE_ONE)),
+ VM_KERNEL_UNSLIDE_OR_PERM(object), /* purged object */
+ object_page_count,
+ pgcount,
+ skipped_object,
+ 0);
+
}
boolean_t vm_compressor_immediate_preferred = FALSE;
boolean_t vm_compressor_immediate_preferred_override = FALSE;
boolean_t vm_restricted_to_single_processor = FALSE;
+static boolean_t vm_pageout_waiter = FALSE;
+static boolean_t vm_pageout_running = FALSE;
+
static thread_t vm_pageout_external_iothread = THREAD_NULL;
static thread_t vm_pageout_internal_iothread = THREAD_NULL;
*/
unsigned int vm_pageout_active = 0; /* debugging */
-unsigned int vm_pageout_active_busy = 0; /* debugging */
unsigned int vm_pageout_inactive = 0; /* debugging */
unsigned int vm_pageout_inactive_throttled = 0; /* debugging */
unsigned int vm_pageout_inactive_forced = 0; /* debugging */
DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL);
vm_pageout_scan_event_counter++;
+ lck_mtx_lock(&vm_page_queue_free_lock);
+ vm_pageout_running = TRUE;
+ lck_mtx_unlock(&vm_page_queue_free_lock);
+
vm_pageout_scan();
/*
* we hold both the vm_page_queue_free_lock
assert(vm_page_free_wanted_privileged == 0);
assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
+ vm_pageout_running = FALSE;
+ if (vm_pageout_waiter) {
+ vm_pageout_waiter = FALSE;
+ thread_wakeup((event_t)&vm_pageout_waiter);
+ }
+
lck_mtx_unlock(&vm_page_queue_free_lock);
vm_page_unlock_queues();
/*NOTREACHED*/
}
+kern_return_t
+vm_pageout_wait(uint64_t deadline)
+{
+ kern_return_t kr;
+
+ lck_mtx_lock(&vm_page_queue_free_lock);
+ for (kr = KERN_SUCCESS; vm_pageout_running && (KERN_SUCCESS == kr); ) {
+ vm_pageout_waiter = TRUE;
+ if (THREAD_AWAKENED != lck_mtx_sleep_deadline(
+ &vm_page_queue_free_lock, LCK_SLEEP_DEFAULT,
+ (event_t) &vm_pageout_waiter, THREAD_UNINT, deadline)) {
+ kr = KERN_OPERATION_TIMED_OUT;
+ }
+ }
+ lck_mtx_unlock(&vm_page_queue_free_lock);
+
+ return (kr);
+}
+
#ifdef FAKE_DEADLOCK
#define VM_EXECVE 0x131
#define VM_WAKEUP_COMPACTOR_SWAPPER 0x132
+#define VM_DATA_WRITE 0x140
+
#define VM_DEBUG_EVENT(name, event, control, arg1, arg2, arg3, arg4) \
MACRO_BEGIN \
if (vm_debug_events) { \
extern kern_return_t mach_vm_pressure_level_monitor(boolean_t wait_for_pressure, unsigned int *pressure_level);
+extern kern_return_t vm_pageout_wait(uint64_t deadline);
+
#ifdef MACH_KERNEL_PRIVATE
#include <vm/vm_page.h>
decl_lck_mtx_data(,vm_purgeable_queue_lock)
-#define TOKEN_ADD 0x40 /* 0x100 */
-#define TOKEN_DELETE 0x41 /* 0x104 */
-#define TOKEN_RIPEN 0x42 /* 0x108 */
-#define OBJECT_ADD 0x48 /* 0x120 */
-#define OBJECT_REMOVE 0x49 /* 0x124 */
-#define OBJECT_PURGE 0x4a /* 0x128 */
-#define OBJECT_PURGE_ALL 0x4b /* 0x12c */
-
static token_idx_t vm_purgeable_token_remove_first(purgeable_q_t queue);
static void vm_purgeable_stats_helper(vm_purgeable_stat_t *stat, purgeable_q_t queue, int group, task_t target_task);
int best_object_task_importance;
int best_object_skipped;
int num_objects_skipped;
+ int try_lock_failed = 0;
+ int try_lock_succeeded = 0;
task_t owner;
best_object = VM_OBJECT_NULL;
* remaining elements in order.
*/
- num_objects_skipped = -1;
+ KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_VM, OBJECT_PURGE_LOOP) | DBG_FUNC_START),
+ pick_ripe,
+ group,
+ VM_KERNEL_UNSLIDE_OR_PERM(queue),
+ 0,
+ 0);
+
+ num_objects_skipped = 0;
for (object = (vm_object_t) queue_first(&queue->objq[group]);
!queue_end(&queue->objq[group], (queue_entry_t) object);
object = (vm_object_t) queue_next(&object->objq),
num_objects_skipped++) {
+ /*
+ * To prevent us looping for an excessively long time, choose
+ * the best object we've seen after looking at PURGEABLE_LOOP_MAX elements.
+ * If we haven't seen an eligible object after PURGEABLE_LOOP_MAX elements,
+ * we keep going until we find the first eligible object.
+ */
+ if ((num_objects_skipped >= PURGEABLE_LOOP_MAX) && (best_object != NULL)) {
+ break;
+ }
+
if (pick_ripe &&
! object->purgeable_when_ripe) {
/* we want an object that has a ripe token */
if (object_task_importance < best_object_task_importance) {
if (vm_object_lock_try(object)) {
+ try_lock_succeeded++;
if (best_object != VM_OBJECT_NULL) {
/* forget about previous best object */
vm_object_unlock(best_object);
/* can't get any better: stop looking */
break;
}
+ } else {
+ try_lock_failed++;
}
}
}
+
+ KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_VM, OBJECT_PURGE_LOOP) | DBG_FUNC_END),
+ num_objects_skipped, /* considered objects */
+ try_lock_failed,
+ try_lock_succeeded,
+ VM_KERNEL_UNSLIDE_OR_PERM(best_object),
+ ((best_object == NULL) ? 0 : best_object->resident_page_count));
+
object = best_object;
if (object == VM_OBJECT_NULL) {
void vm_purgeable_compressed_update(vm_object_t object,
int delta);
+#define PURGEABLE_LOOP_MAX 64
+
+#define TOKEN_ADD 0x40 /* 0x100 */
+#define TOKEN_DELETE 0x41 /* 0x104 */
+#define TOKEN_RIPEN 0x42 /* 0x108 */
+#define OBJECT_ADD 0x48 /* 0x120 */
+#define OBJECT_REMOVE 0x49 /* 0x124 */
+#define OBJECT_PURGE 0x4a /* 0x128 */
+#define OBJECT_PURGE_ALL 0x4b /* 0x12c */
+#define OBJECT_PURGE_ONE 0x4c /* 0x12d */
+#define OBJECT_PURGE_LOOP 0x4e /* 0x12e */
+
#endif /* __VM_PURGEABLE_INTERNAL__ */
VM_PAGE_CHECK(mem);
assert(VM_PAGE_WIRED(mem));
+ assert(!mem->gobbled);
assert(mem->object != VM_OBJECT_NULL);
#if DEBUG
vm_object_lock_assert_exclusive(mem->object);
lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
#endif
if (--mem->wire_count == 0) {
- assert(!mem->private && !mem->fictitious);
- vm_page_wire_count--;
+ if (!mem->private && !mem->fictitious) {
+ vm_page_wire_count--;
+ }
assert(mem->object->wired_page_count > 0);
mem->object->wired_page_count--;
if (!mem->object->wired_page_count) {
offset_in_page = 0;
}
- kr = vm_map_copyin(target_map,
- map_start,
- map_size,
- FALSE,
- ©);
+ kr = vm_map_copyin_internal(target_map,
+ map_start,
+ map_size,
+ VM_MAP_COPYIN_ENTRY_LIST,
+ ©);
if (kr != KERN_SUCCESS) {
return kr;
}
shr $32, %rcx
testl %ecx, %ecx
jz 4f
- movl $0, %gs:CPU_TLB_INVALID
testl $(1<<16), %ecx /* Global? */
jz 3f
+ movl $0, %gs:CPU_TLB_INVALID
mov %cr4, %rcx /* RMWW CR4, for lack of an alternative*/
and $(~CR4_PGE), %rcx
mov %rcx, %cr4
mov %rcx, %cr4
jmp 4f
3:
+ movb $0, %gs:CPU_TLB_INVALID_LOCAL
mov %cr3, %rcx
mov %rcx, %cr3
4:
boolean_t pmap_is_shared = (pmap->pm_shared || (pmap == kernel_pmap));
boolean_t need_global_flush = FALSE;
uint32_t event_code;
+ vm_map_offset_t event_startv, event_endv;
boolean_t is_ept = is_ept_pmap(pmap);
assert((processor_avail_count < 2) ||
if (pmap == kernel_pmap) {
event_code = PMAP_CODE(PMAP__FLUSH_KERN_TLBS);
+ event_startv = VM_KERNEL_UNSLIDE_OR_PERM(startv);
+ event_endv = VM_KERNEL_UNSLIDE_OR_PERM(endv);
} else if (is_ept) {
event_code = PMAP_CODE(PMAP__FLUSH_EPT);
+ event_startv = startv;
+ event_endv = endv;
} else {
event_code = PMAP_CODE(PMAP__FLUSH_TLBS);
+ event_startv = startv;
+ event_endv = endv;
}
PMAP_TRACE_CONSTANT(event_code | DBG_FUNC_START,
- pmap, options, startv, endv, 0);
+ VM_KERNEL_UNSLIDE_OR_PERM(pmap), options, event_startv, event_endv, 0);
if (is_ept) {
mp_cpus_call(CPUMASK_ALL, ASYNC, invept, (void*)pmap->pm_eptp);
continue;
PMAP_TRACE_CONSTANT(
PMAP_CODE(PMAP__FLUSH_TLBS_TO),
- pmap, cpus_to_signal, cpus_to_respond, 0, 0);
+ VM_KERNEL_UNSLIDE_OR_PERM(pmap), cpus_to_signal, cpus_to_respond, 0, 0);
is_timeout_traced = TRUE;
continue;
}
out:
PMAP_TRACE_CONSTANT(event_code | DBG_FUNC_END,
- pmap, cpus_to_signal, startv, endv, 0);
+ VM_KERNEL_UNSLIDE_OR_PERM(pmap), cpus_to_signal, event_startv, event_endv, 0);
}
mac_policy_list.chunks = 1;
mac_policy_list.entries = kalloc(sizeof(struct mac_policy_list_element) * MAC_POLICY_LIST_CHUNKSIZE);
+
bzero(mac_policy_list.entries, sizeof(struct mac_policy_list_element) * MAC_POLICY_LIST_CHUNKSIZE);
LIST_INIT(&mac_label_element_list);
vtableAddr = dereference(Cast(entry, 'uintptr_t *')) - 2 * sizeof('uintptr_t *')
vtype = kern.SymbolicateFromAddress(vtableAddr)
if vtype is None or len(vtype) < 1:
- out_string += "<object 0x{0: <16x}, id 0x{1:x}, vtable 0x{2: <16x}".format(entry, entry.reserved.fRegistryEntryID, vtableAddr)
+ out_string += "<object 0x{0: <16x}, id 0x{1:x}, vtable 0x{2: <16x}".format(entry, CastIOKitClass(entry, 'IORegistryEntry *').reserved.fRegistryEntryID, vtableAddr)
else:
- out_string += "<object 0x{0: <16x}, id 0x{1:x}, vtable 0x{2: <16x} <{3:s}>".format(entry, entry.reserved.fRegistryEntryID, vtableAddr, vtype[0].GetName())
+ out_string += "<object 0x{0: <16x}, id 0x{1:x}, vtable 0x{2: <16x} <{3:s}>".format(entry, CastIOKitClass(entry, 'IORegistryEntry *').reserved.fRegistryEntryID,
+ vtableAddr, vtype[0].GetName())
ztvAddr = kern.GetLoadAddressForSymbol('_ZTV15IORegistryEntry')
if vtableAddr != ztvAddr:
ReadIOPortInt(portAddr, 1, lcpu)
@lldb_command('readioport16')
-def ReadIOPort8(cmd_args=None):
+def ReadIOPort16(cmd_args=None):
""" Read value stored in the specified IO port. The CPU can be optionally
specified as well.
Prints 0xBAD10AD in case of a bad read
ReadIOPortInt(portAddr, 2, lcpu)
@lldb_command('readioport32')
-def ReadIOPort8(cmd_args=None):
+def ReadIOPort32(cmd_args=None):
""" Read value stored in the specified IO port. The CPU can be optionally
specified as well.
Prints 0xBAD10AD in case of a bad read
WriteIOPortInt(portAddr, 1, value, lcpu)
@lldb_command('writeioport16')
-def WriteIOPort8(cmd_args=None):
+def WriteIOPort16(cmd_args=None):
""" Write the value to the specified IO port. The size of the value is
determined by the name of the command. The CPU used can be optionally
specified as well.
WriteIOPortInt(portAddr, 2, value, lcpu)
@lldb_command('writeioport32')
-def WriteIOPort8(cmd_args=None):
+def WriteIOPort32(cmd_args=None):
""" Write the value to the specified IO port. The size of the value is
determined by the name of the command. The CPU used can be optionally
specified as well.
result_pkt = Cast(addressof(kern.globals.manual_pkt.data), 'kdp_readioport_reply_t *')
if(result_pkt.error == 0):
- print "This macro is incomplete till <rdar://problem/12868059> is fixed"
- # FIXME: Uncomment me when <rdar://problem/12868059> is fixed
- #if numbytes == 1:
- # result = dereference(Cast(result_pkt.data, 'uint8_t *'))
- #elif numbytes == 2:
- # result = dereference(Cast(result_pkt.data, 'uint16_t *'))
- #elif numbytes == 4:
- # result = dereference(cast(result_pkt.data, 'uint32_t *'))
-
- print "0x{0: <4x}: 0x{1: <1x}".format(addr, result)
+ if numbytes == 1:
+ result = dereference(Cast(addressof(result_pkt.data), 'uint8_t *'))
+ elif numbytes == 2:
+ result = dereference(Cast(addressof(result_pkt.data), 'uint16_t *'))
+ elif numbytes == 4:
+ result = dereference(Cast(addressof(result_pkt.data), 'uint32_t *'))
+
+ print "{0: <#6x}: {1:#0{2}x}".format(addr, result, (numbytes*2)+2)
def WriteIOPortInt(addr, numbytes, value, lcpu):
""" Writes 'value' into ioport specified by 'addr'. Prints errors if it encounters any
len_address = unsigned(addressof(kern.globals.manual_pkt.len))
data_address = unsigned(addressof(kern.globals.manual_pkt.data))
if not WriteInt32ToMemoryAddress(0, input_address):
- print "error writing 0x{0: x} to port 0x{1: <4x}".format(value, addr)
+ print "error writing {0: #x} to port {1: <#6x}: failed to write 0 to input_address".format(value, addr)
return
kdp_pkt_size = GetType('kdp_writeioport_req_t').GetByteSize()
if not WriteInt32ToMemoryAddress(kdp_pkt_size, len_address):
- print "error writing 0x{0: x} to port 0x{1: <4x}".format(value, addr)
+ print "error writing {0: #x} to port {1: <#6x}: failed to write kdp_pkt_size".format(value, addr)
return
kgm_pkt = kern.GetValueFromAddress(data_address, 'kdp_writeioport_req_t *')
WriteInt32ToMemoryAddress(numbytes, int(addressof(kgm_pkt.nbytes))) and
WriteInt16ToMemoryAddress(lcpu, int(addressof(kgm_pkt.lcpu)))
):
- print "This macro is incomplete till <rdar://problem/12868059> is fixed"
- # FIXME: Uncomment me when <rdar://problem/12868059> is fixed
- #if numbytes == 1:
- # if not WriteInt8ToMemoryAddress(value, int(addressof(kgm_pkt.data))):
- # print "error writing 0x{0: x} to port 0x{1: <4x}".format(value, addr)
- #elif numbytes == 2:
- # if not WriteInt16ToMemoryAddress(value, int(addressof(kgm_pkt.data))):
- # print "error writing 0x{0: x} to port 0x{1: <4x}".format(value, addr)
- #elif numbytes == 4:
- # if not WriteInt32ToMemoryAddress(value, int(addressof(kgm_pkt.data))):
- # print "error writing 0x{0: x} to port 0x{1: <4x}".format(value, addr)
-
+ if numbytes == 1:
+ if not WriteInt8ToMemoryAddress(value, int(addressof(kgm_pkt.data))):
+ print "error writing {0: #x} to port {1: <#6x}: failed to write 8 bit data".format(value, addr)
+ return
+ elif numbytes == 2:
+ if not WriteInt16ToMemoryAddress(value, int(addressof(kgm_pkt.data))):
+ print "error writing {0: #x} to port {1: <#6x}: failed to write 16 bit data".format(value, addr)
+ return
+ elif numbytes == 4:
+ if not WriteInt32ToMemoryAddress(value, int(addressof(kgm_pkt.data))):
+ print "error writing {0: #x} to port {1: <#6x}: failed to write 32 bit data".format(value, addr)
+ return
if not WriteInt32ToMemoryAddress(1, input_address):
- print "error writing 0x{0: x} to port 0x{1: <4x}".format(value, addr)
+ print "error writing {0: #x} to port {1: <#6x}: failed to write to input_address".format(value, addr)
return
result_pkt = Cast(addressof(kern.globals.manual_pkt.data), 'kdp_writeioport_reply_t *')
# Done with the write
if(result_pkt.error == 0):
- print "Writing 0x {0: x} to port {1: <4x} was successful".format(value, addr)
+ print "Writing {0: #x} to port {1: <#6x} was successful".format(value, addr)
else:
- print "error writing 0x{0: x} to port 0x{1: <4x}".format(value, addr)
+ print "error writing {0: #x} to port {1: <#6x}".format(value, addr)
@lldb_command('showinterruptcounts')
def showinterruptcounts(cmd_args=None):
]
dsc_libs = []
+ print "Shared cache UUID found from the binary data is <%s> " % str(dsc_common[0])
if dsc_common[0].replace('-', '').lower() == dsc_uuid:
print "SUCCESS: Found Matching dyld shared cache uuid. Loading library load addresses from layout provided."
_load_addr = dsc_common[1]
for tid,thdata in thlist.iteritems():
threadByID[str(tid)] = {}
thsnap = threadByID[str(tid)]
+ if "thread_snapshot_v2" not in thdata:
+ print "Found broken thread state for thread ID: %s." % tid
+ break
threadsnap = thdata["thread_snapshot_v2"]
thsnap["userTime"] = GetSecondsFromMATime(threadsnap["user_time"], timebase)
thsnap["id"] = threadsnap["thread_id"]
vm_size = uint64_t(vmmap.size).value
resident_pages = 0
if vmmap.pmap != 0: resident_pages = int(vmmap.pmap.stats.resident_count)
- out_string += format_string.format(vmmap, vmmap.pmap, vm_size, vmmap.hdr.nentries, resident_pages, vmmap.hint, vmmap.first_free)
+ first_free = 0
+ if int(vmmap.holelistenabled) == 0: first_free = vmmap.f_s.first_free
+ out_string += format_string.format(vmmap, vmmap.pmap, vm_size, vmmap.hdr.nentries, resident_pages, vmmap.hint, first_free)
return out_string
@lldb_type_summary(['vm_map_entry'])
#EndMacro: dumpcallqueue
+@lldb_command('showalltasklogicalwrites')
+def ShowAllTaskIOStats(cmd_args=None):
+ """ Commad to print I/O stats for all tasks
+ """
+ print "{0: <20s} {1: <20s} {2: <20s} {3: <20s} {4: <20s} {5: <20s}".format("task", "Immediate Writes", "Deferred Writes", "Invalidated Writes", "Metadata Writes", "name")
+ for t in kern.tasks:
+ pval = Cast(t.bsd_info, 'proc *')
+ print "{0: <#18x} {1: >20d} {2: >20d} {3: >20d} {4: >20d} {5: <20s}".format(t,
+ t.task_immediate_writes,
+ t.task_deferred_writes,
+ t.task_invalidated_writes,
+ t.task_metadata_writes,
+ str(pval.p_comm))
+
+
@lldb_command('showalltasks','C')
def ShowAllTasks(cmd_args=None, cmd_options={}):
""" Routine to print a summary listing of all the tasks
fake_thread_id = 0xdead0000 | (saved_state & ~0xffff0000)
fake_thread_id = fake_thread_id & 0xdeadffff
lldb_process.CreateOSPluginThread(0xdeadbeef, saved_state)
- lldbthread = lldb_process.GetThreadByID(fake_thread_id)
+ lldbthread = lldb_process.GetThreadByID(int(fake_thread_id))
if not lldbthread.IsValid():
print "Failed to create thread"
#include <sys/types.h>
#include <sys/time.h>
#include <sys/signal.h>
+#include <errno.h>
+#include "../unit_tests/tests_common.h"
#define MAX(A, B) ((A) < (B) ? (B) : (A))
static boolean_t threaded = FALSE;
static boolean_t oneway = FALSE;
static boolean_t do_select = FALSE;
+static boolean_t save_perfdata = FALSE;
+
int msg_type;
int num_ints;
int num_msgs;
fprintf(stderr, " -work num\t\tmicroseconds of client work\n");
fprintf(stderr, " -pages num\t\tpages of memory touched by client work\n");
fprintf(stderr, " -select \t\tselect prior to calling kevent().\n");
+ fprintf(stderr, " -perf \t\tCreate perfdata files for metrics.\n");
fprintf(stderr, "default values are:\n");
fprintf(stderr, " . no affinity\n");
fprintf(stderr, " . not timeshare\n");
} else if (0 == strcmp("-select", argv[0])) {
do_select = TRUE;
argc--; argv++;
+ } else if (0 == strcmp("-perf", argv[0])) {
+ save_perfdata = TRUE;
+ argc--; argv++;
} else
usage(progname);
}
exit(1);
}
+
int main(int argc, char *argv[])
{
int i;
double dsecs = (double) deltatv.tv_sec +
1.0E-6 * (double) deltatv.tv_usec;
+ double time_in_sec = (double)deltatv.tv_sec + (double)deltatv.tv_usec/1000.0;
+ double throughput_msg_p_sec = (double) totalmsg/dsecs;
+ double avg_msg_latency = dsecs*1.0E6 / (double)totalmsg;
+
printf(" in %ld.%03u seconds\n",
(long)deltatv.tv_sec, deltatv.tv_usec/1000);
printf(" throughput in messages/sec: %g\n",
printf(" average message latency (usec): %2.3g\n",
dsecs * 1.0E6 / (double) totalmsg);
+ if (save_perfdata == TRUE) {
+ record_perf_data("kqmpmm_avg_msg_latency", "usec", avg_msg_latency, "Message latency measured in microseconds. Lower is better", stderr);
+ }
return (0);
}
#include <sys/types.h>
#include <sys/time.h>
#include <sys/signal.h>
+#include <errno.h>
+#include "../unit_tests/tests_common.h" /* for record_perf_data() */
#include <libkern/OSAtomic.h>
static boolean_t threaded = FALSE;
static boolean_t oneway = FALSE;
static boolean_t useset = FALSE;
+static boolean_t save_perfdata = FALSE;
int msg_type;
int num_ints;
int num_msgs;
fprintf(stderr, " -verbose\t\tbe verbose (use multiple times to increase verbosity)\n");
fprintf(stderr, " -oneway\t\tdo not request return reply\n");
fprintf(stderr, " -count num\t\tnumber of messages to send\n");
+ fprintf(stderr, " -perf \t\tCreate perfdata files for metrics.\n");
fprintf(stderr, " -type trivial|inline|complex\ttype of messages to send\n");
fprintf(stderr, " -numints num\tnumber of 32-bit ints to send in messages\n");
fprintf(stderr, " -servers num\tnumber of server threads to run\n");
} else if (0 == strcmp("-oneway", argv[0])) {
oneway = TRUE;
argc--; argv++;
+ } else if (0 == strcmp("-perf", argv[0])) {
+ save_perfdata = TRUE;
+ argc--; argv++;
} else if (0 == strcmp("-type", argv[0])) {
if (argc < 2)
usage(progname);
printf(" average message latency (usec): %2.3g\n",
dsecs * 1.0E6 / (double) totalmsg);
+ double time_in_sec = (double)deltatv.tv_sec + (double)deltatv.tv_usec/1000.0;
+ double throughput_msg_p_sec = (double) totalmsg/dsecs;
+ double avg_msg_latency = dsecs*1.0E6 / (double)totalmsg;
+
+ if (save_perfdata == TRUE) {
+ record_perf_data("mpmm_avg_msg_latency", "usec", avg_msg_latency, "Message latency measured in microseconds. Lower is better", stderr);
+ }
+
if (stress_prepost) {
int64_t sendns = abs_to_ns(g_client_send_time);
dsecs = (double)sendns / (double)NSEC_PER_SEC;
--- /dev/null
+include ../Makefile.common
+
+CC:=$(shell xcrun -sdk "$(SDKROOT)" -find cc)
+
+ifdef RC_ARCHS
+ ARCHS:=$(RC_ARCHS)
+ else
+ ifeq "$(Embedded)" "YES"
+ ARCHS:=armv7 armv7s arm64 armv7k
+ else
+ ARCHS:=x86_64
+ endif
+endif
+
+CFLAGS := -g $(patsubst %, -arch %, $(ARCHS)) -isysroot $(SDKROOT) -isystem $(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders
+
+DSTROOT?=$(shell /bin/pwd)
+SYMROOT?=$(shell /bin/pwd)
+
+DEBUG:=0
+
+$(DSTROOT)/tlbcoh: TLBcoherency.c
+ $(CC) $(CFLAGS) -Wall TLBcoherency.c -o $(SYMROOT)/$(notdir $@) -DDEBUG=$(DEBUG) -g -Os
+ if [ ! -e $@ ]; then ditto $(SYMROOT)/$(notdir $@) $@; fi
+
+clean:
+ rm -rf $(DSTROOT)/tlbcoh $(SYMROOT)/*.dSYM $(SYMROOT)/tlbcoh
--- /dev/null
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/* A pool of threads which attempt to verify multiprocessor TLB coherency.
+ * Creates -t threads, by default 4
+ * Creates -s separate mmap(MAP_ANON) R/W mappings, sized at 1 page each but
+ * alterable via -z <npages>
+ * Initially read-faults each mapping in, verifying first-word zerofill--
+ * The kernel typically uses the physical aperture to perform the zerofill
+ * Writes map_address (page_aligned) | low 12 bits of the PID at the first word
+ * This can help verify ASID related inconsistencies
+ * Records a timestamp in a Structure associated with each mapping
+ * With a custom kernel, it has the option of creating a remapping of the page in
+ * the kernel's address space to exercise shared kernel mapping coherency.
+ * Each thread subsequently loops around on the set of mappings. One thread is designated
+ * the observer thread. The thread acquires a lock on the arena element,
+ * verifies that the mapping has the expected pattern (Address | PID), if the
+ * element is in the MAPPED state. Can optionally tell the kernel to check its
+ * alias as well. If it notices a mismatch, it has the option to issue a syscall
+ * to stop kernel tracing. If the -f option is supplied, the test is terminated.
+ * If the page has lingered beyond -l microseconds, non-observer threads will
+ * unmap the page, optionally calling into the kernel to unmap its alias, and
+ * repopulate the element.
+ * After this sequence, the thread will optionally usleep for -p microseconds,
+ * to allow for idle power management to engage if possible (errata might exist
+ * in those areas), or context switches to occur.
+ * Created Derek Kumar, 2011.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <pthread.h>
+#include <string.h>
+#include <mach/mach_time.h>
+#include <libkern/OSAtomic.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+
+typedef struct {
+ OSSpinLock tlock;
+ uintptr_t taddr;
+ unsigned tstate;
+ uint64_t tctime;
+} cpage;
+
+cpage *parray;
+
+#define ARENASIZE (1024)
+#define NTHREADS (4)
+#define PAGE_LINGER_TIME (2000000)
+#define MAX_THREADS (512)
+#define MYSYS (215)
+#define CONSISTENCY(...) fprintf(stderr, __VA_ARGS__ );
+
+unsigned arenasize = ARENASIZE, mapping_size;
+uint64_t page_linger_time = PAGE_LINGER_TIME;
+enum arenastates {MTOUCHED = 1, UNMAPPED = 2, MAPPED = 4, WP =8};
+enum syscaction {MDOMAP = 1, MDOUNMAP = 2, MDOCHECK = 4};
+enum ttypes {OBSERVER = 1, LOOPER = 2};
+bool trymode = true;
+bool all_stop = false;
+bool stop_on_failure = false;
+bool reuse_addrs = true;
+bool dosyscall = false;
+
+pid_t cpid;
+int sleepus;
+
+pthread_t threads[MAX_THREADS];
+uint32_t roles[MAX_THREADS];
+
+void usage(char **a) {
+ exit(1);
+}
+
+void set_enable(int val)
+{
+ int mib[6];
+ size_t needed;
+
+ mib[0] = CTL_KERN;
+ mib[1] = KERN_KDEBUG;
+ mib[2] = KERN_KDENABLE;
+ mib[3] = val;
+ mib[4] = 0;
+ mib[5] = 0;
+
+ if (sysctl(mib, 4, NULL, &needed, NULL, 0) < 0) {
+ printf("trace facility failure, KERN_KDENABLE\n");
+ }
+}
+
+void initialize_arena_element(int i) {
+ __unused int sysret;
+ void *hint = reuse_addrs ? (void *)0x1000 : NULL;
+ parray[i].taddr = (uintptr_t)mmap(hint, mapping_size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_SHARED, -1, 0);
+
+ if (parray[i].taddr == (uintptr_t)MAP_FAILED) {
+ perror("mmap");
+ exit(2);
+ }
+
+#if !defined(__LP64__)
+ uint32_t pattern = parray[i].taddr;
+ pattern |= cpid & 0xFFF;
+// memset_pattern4((void *)parray[i].taddr, &pattern, PAGE_SIZE); //
+// uncomment to fill the whole page, but a sufficiently unique first word
+// gets the job done without slowing down the test
+
+#else
+ uint64_t pattern = parray[i].taddr;
+ pattern |= (cpid & 0xFFF);
+// memset_pattern8(parray[i].taddr, &pattern, PAGE_SIZE);
+#endif
+
+ uint64_t val = (*(uintptr_t *)parray[i].taddr);
+
+ if (val != 0) {
+ CONSISTENCY("Mismatch, actual: 0x%llx, expected: 0x%llx\n", (unsigned long long)val, 0ULL);
+ if (stop_on_failure) {
+ set_enable(0);
+ exit(5);
+ }
+ }
+ for (int k = 0; k < (mapping_size >> PAGE_SHIFT); k++) {
+ *(uintptr_t *)(parray[i].taddr + k * PAGE_SIZE) = pattern;
+ }
+
+ parray[i].tctime = mach_absolute_time();
+ parray[i].tstate = MTOUCHED;
+
+ if (dosyscall) {
+ sysret = syscall(MYSYS, MDOMAP, parray[i].taddr, pattern, i, mapping_size);
+ }
+}
+
+void initialize_arena(void) {
+ for (int i = 0; i < arenasize; i++) {
+ initialize_arena_element(i);
+ }
+}
+
+void *tlbexerciser(void *targs) {
+ uint32_t role = *(uint32_t *)targs;
+ __unused int sysret;
+ printf("Starting thread %p, role: %u\n", pthread_self(), role);
+
+ for(;;) {
+ for (int i = 0; i < arenasize; i++) {
+ if (all_stop)
+ return NULL;
+
+ if (trymode) {
+ if (OSSpinLockTry(&parray[i].tlock) == false)
+ continue;
+ } else {
+ OSSpinLockLock(&parray[i].tlock);
+ }
+
+ if (parray[i].tstate != UNMAPPED) {
+ uintptr_t ad;
+ ad = parray[i].taddr | (cpid & 0xFFF);
+ uintptr_t val = *(uintptr_t *)parray[i].taddr;
+
+ if (val != ad) {
+ if (stop_on_failure)
+ all_stop = true;
+ syscall(180, 0x71BC0000, (ad >> 32), (ad & ~0), 0, 0, 0);
+ CONSISTENCY("Mismatch, actual: 0x%llx, expected: 0x%llx\n", (unsigned long long)val, (unsigned long long)ad);
+ if (stop_on_failure) {
+ set_enable(0);
+ exit(5);
+ }
+ }
+
+ if (dosyscall) {
+ sysret = syscall(MYSYS, MDOCHECK, parray[i].taddr, ad, i, 0);
+ }
+
+ if ((role != OBSERVER) && ((mach_absolute_time() - parray[i].tctime) > page_linger_time)) {
+ parray[i].tstate = UNMAPPED;
+ if (munmap((void *)parray[i].taddr, mapping_size) != 0) {
+ perror("munmap");
+ }
+
+ if (dosyscall) {
+ sysret = syscall(MYSYS, MDOUNMAP, parray[i].taddr, ad, i, mapping_size);
+ }
+ }
+ } else {
+ if (role != OBSERVER) {
+ initialize_arena_element(i);
+ }
+ }
+
+ parray[i].tlock = 0; //unlock
+
+ if (sleepus)
+ usleep(sleepus);
+ }
+ }
+
+ return NULL;
+}
+
+int main(int argc, char **argv) {
+ extern char *optarg;
+ int arg;
+ unsigned nthreads = NTHREADS;
+
+ mapping_size = PAGE_SIZE;
+
+ while ((arg = getopt(argc, argv, "l:t:h:s:p:z:fry")) != -1) {
+ switch (arg) {
+ case 'l':
+ page_linger_time = strtoull(optarg, NULL, 0);
+ break;
+ case 't':
+ nthreads = atoi(optarg);
+ break;
+ case 's':
+ arenasize = atoi(optarg); // we typically want this to
+ // be sized < 2nd level TLB
+ break;
+ case 'f':
+ stop_on_failure = true;
+ break;
+ case 'r':
+ reuse_addrs = false;
+ break;
+ case 'p':
+ sleepus = atoi(optarg);
+ break;
+ case 'y':
+ dosyscall = true;
+ break;
+ case 'z':
+ mapping_size = atoi(optarg) * PAGE_SIZE;
+ break;
+ case 'h':
+ usage(argv);
+ }
+ }
+
+ if(optind != argc) {
+ usage(argv);
+ }
+
+ printf("page_linger_time: 0x%llx, nthreads: %u, arenasize: %u sleepus: %d reuse_addrs: %u, stop_on_failure: %u, dosyscall: %u, mappingsize: 0x%x\n", page_linger_time, nthreads, arenasize, sleepus, reuse_addrs, (unsigned) stop_on_failure, dosyscall, mapping_size);
+
+ parray = calloc(arenasize, sizeof(cpage));
+ cpid = getpid();
+
+ initialize_arena();
+
+ for (int dex = 0; dex < nthreads; dex++) {
+ roles[dex] = LOOPER;
+ if (dex == 0)
+ roles[dex] = OBSERVER;
+ int result = pthread_create(&threads[dex], NULL, tlbexerciser, &roles[dex]);
+ if(result) {
+ printf("pthread_create: %d starting worker thread; aborting.\n", result);
+ return result;
+ }
+ }
+
+ for(int dex = 0; dex < nthreads; dex++) {
+ void *rtn;
+ int result = pthread_join(threads[dex], &rtn);
+
+ if(result) {
+ printf("pthread_join(): %d, aborting\n", result);
+ return result;
+ }
+
+ if(rtn) {
+ printf("***Aborting on worker error\n");
+ exit(1);
+ }
+ }
+ return 0;
+}
PRODUCT=`sw_vers -productName`
COUNT=
+# params are: record_perf_data(metric, unit, value, description)
+function record_perf_data() {
+ local METRIC=$1
+ local UNIT=$2
+ local DATA=$3
+ local DESCRIPTION=$4
+ echo "{ \"version\" : \"1.0\", \"measurements\" : {\"$METRIC\": {\"description\" : \"$DESCRIPTION\", \"names\":[\"$METRIC\"], \"units\" : [\"$UNIT\"], \"data\" : [$DATA] }}}"
+}
+
+PERFDATA_DIR=$BATS_TMP_DIR
+if [ "${PERFDATA_DIR}" == "" ]; then
+ PERFDATA_DIR=/tmp/
+fi
+
case "$PRODUCT" in
"iPhone OS")
COUNT=1000
echo "Running $i"
for j in `jot $(sysctl -n hw.ncpu) 1`; do
printf "\t%dx\t" $j
- /usr/bin/time ./${RUN} $j $((${COUNT}/$j)) ./$i
+ METRIC_NAME="${i}_${j}x"
+ TIMEOUT=` /usr/bin/time ./${RUN} $j $((${COUNT}/$j)) ./$i 2>&1`
+ echo ${TIMEOUT}
+ REALTIME=`echo ${TIMEOUT} | awk '{ print $1 }'`
+ TOTALTIME=`echo ${TIMEOUT} | awk '{ print $3 + $5 }'`
+ record_perf_data "${METRIC_NAME}_real" "s" $REALTIME "Real time in seconds. Lower is better. This may have variance based on load on system" > ${PERFDATA_DIR}/${METRIC_NAME}_real.perfdata
+ record_perf_data "${METRIC_NAME}_sys" "s" $TOTALTIME "User + Sys time in seconds. Lower is better." > /tmp/${METRIC_NAME}_sys.perfdata
if [ $? -ne 0 ]; then
echo "Failed $i, exit status $?"
exit 1