By default, architecture defaults to the build machine
architecture, and the kernel configuration is set to build for DEVELOPMENT.
- The machine configuration defaults to MX31ADS for arm and nothing for i386 and ppc.
+ The machine configuration defaults to S5L8900XRB for arm and default for i386 and ppc.
This will also create a bootable image, mach_kernel, and a kernel binary
with symbols, mach_kernel.sys.
-
- Here are the valid arm machine configs:
- LN2410SBC MX31ADS INTEGRATORCP S5I3000SMDK S5L8900XFPGA S5L8900XRB
- OLOCREEK
Examples:
- /* make a debug kernel for MX31 arm board */
- make TARGET_CONFIGS="debug arm MX31ADS"
+ /* make a debug kernel for H1 arm board */
+ make TARGET_CONFIGS="debug arm s5l8900xrb"
- $(OBJROOT)/DEBUG_ARM_MX31ADS/osfmk/DEBUG/osfmk.o: pre-linked object for osfmk component
- $(OBJROOT)/DEBUG_ARM_MX31ADS/mach_kernel: bootable image
+ $(OBJROOT)/DEBUG_ARM_S5L8900XRB/osfmk/DEBUG/osfmk.o: pre-linked object for osfmk component
+ $(OBJROOT)/DEBUG_ARM_S5L8900XRB/mach_kernel: bootable image
- /* make debug and development kernels for MX31 arm board */
- make TARGET_CONFIGS="debug arm MX31ADS development arm MX31ADS"
+ /* make debug and development kernels for H1 arm board */
+ make TARGET_CONFIGS="debug arm s5l8900xrb development arm s5l8900xrb"
- $(OBJROOT)/DEBUG_ARM_MX31ADS/osfmk/DEBUG/osfmk.o: pre-linked object for osfmk component
- $(OBJROOT)/DEBUG_ARM_MX31ADS/mach_kernel: bootable image
+ $(OBJROOT)/DEBUG_ARM_S5L8900XRB/osfmk/DEBUG/osfmk.o: pre-linked object for osfmk component
+ $(OBJROOT)/DEBUG_ARM_S5L8900XRB/mach_kernel: bootable image
$(OBJROOT)/DEVELOPMENT_ARM/osfmk/DEVELOPMENT/osfmk.o: pre-linked object for osfmk component
$(OBJROOT)/DEVELOPMENT_ARM/mach_kernel: bootable image
- /* this is all you need to do to build MX31ADS arm with DEVELOPMENT kernel configuration */
+ /* this is all you need to do to build H1 arm with DEVELOPMENT kernel configuration */
make TARGET_CONFIGS="default arm default"
or the following is equivalent
#
# EMBEDDED_BASE = [ intel mach bsmall vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug compat_43_tty sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot ]
# EMBEDDED_FILESYS = [ devfs hfs journaling fdesc fifo ]
-# EMBEDDED_NET = [ inet compat_oldsock mrouting tcpdrop_synfin bpfilter ipdivert config_mbuf_noexpand dummynet ipfirewall ipfw2 zlib ifnet_input_chk ]
+# EMBEDDED_NET = [ inet compat_oldsock mrouting tcpdrop_synfin bpfilter config_mbuf_noexpand ]
# EMBEDDED = [ EMBEDDED_BASE EMBEDDED_NET VPN EMBEDDED_FILESYS libdriver no_printf_str no_kprintf_str no_kdebug ]
# DEVELOPMENT = [ EMBEDDED_BASE EMBEDDED_NET NFS VPN EMBEDDED_FILESYS libdriver netmibs development mach_assert config_dtrace ]
#
options EVENT # <event>
#
-# Note: MAC options must be set in both bsd/conf and security/conf MASTER files
+# Note: MAC options must be set in all the bsd/conf, osfmk/conf, and
+# security/conf MASTER files.
#
options CONFIG_MACF # Mandatory Access Control Framework
options CONFIG_MACF_SOCKET_SUBSET # MAC socket subest (no labels)
config mach_kernel swap generic # <mach>
#
-# Note: MAC options must be set in both bsd/conf and security/conf MASTER files
+# Note: MAC options must be set in all the bsd/conf, osfmk/conf, and
+# security/conf MASTER files.
#
options CONFIG_MACF # Mandatory Access Control Framework
options CONFIG_MACF_SOCKET_SUBSET # MAC socket subest (no labels)
OPTIONS/audit optional audit
OPTIONS/config_fse optional config_fse
OPTIONS/sockets optional sockets
-OPTIONS/kpidirect optional kpidirect
OPTIONS/development optional development
OPTIONS/sysv_sem optional sysv_sem
OPTIONS/sysv_msg optional sysv_msg
size_t actual;
if (dtrace_copycheck( src, dst, len )) {
- if (copyinstr((const user_addr_t)src, (char *)dst, (vm_size_t)len, &actual)) {
+ /* copyin as many as 'len' bytes. */
+ int error = copyinstr((const user_addr_t)src, (char *)dst, (vm_size_t)len, &actual);
+
+ /*
+ * ENAMETOOLONG is returned when 'len' bytes have been copied in but the NUL terminator was
+ * not encountered. That does not require raising CPU_DTRACE_BADADDR, and we press on.
+ * Note that we do *not* stuff a NUL terminator when returning ENAMETOOLONG, that's left
+ * to the caller.
+ */
+ if (error && error != ENAMETOOLONG) {
DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
cpu_core[CPU->cpu_id].cpuc_dtrace_illval = src;
}
size_t actual;
if (dtrace_copycheck( dst, src, len )) {
+
+ /*
+ * ENAMETOOLONG is returned when 'len' bytes have been copied out but the NUL terminator was
+ * not encountered. We raise CPU_DTRACE_BADADDR in that case.
+ * Note that we do *not* stuff a NUL terminator when returning ENAMETOOLONG, that's left
+ * to the caller.
+ */
if (copyoutstr((const void *)src, dst, (size_t)len, &actual)) {
DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
cpu_core[CPU->cpu_id].cpuc_dtrace_illval = dst;
lockstat_probe_t lockstat_probes[] =
{
-#ifndef __PPC__
+#ifdef __i386__
/* Not implemented yet on PPC... */
{ LS_LCK_MTX_LOCK, LSA_ACQUIRE, LS_LCK_MTX_LOCK_ACQUIRE, DTRACE_IDNONE },
{ LS_LCK_MTX_LOCK, LSA_SPIN, LS_LCK_MTX_LOCK_SPIN, DTRACE_IDNONE },
static int mdevioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p, int is_char);
dev_t mdevadd(int devid, ppnum_t base, unsigned int size, int phys);
dev_t mdevlookup(int devid);
+void mdevremoveall(void);
static int mdevclose(__unused dev_t dev, __unused int flags,
__unused int devtype, __unused struct proc *p) {
if(!(mdev[devid].mdFlags & mdInited)) return -1; /* This one hasn't been defined */
return mdev[devid].mdBDev; /* Return the device number */
}
+
+void mdevremoveall(void) {
+
+ int i;
+
+ for(i = 0; i < 16; i++) {
+ if(!(mdev[i].mdFlags & mdInited)) continue; /* Ignore unused mdevs */
+
+ devfs_remove(mdev[i].mdbdevb); /* Remove the block device */
+ devfs_remove(mdev[i].mdcdevb); /* Remove the character device */
+
+ mdev[i].mdBase = 0; /* Clear the mdev's storage */
+ mdev[i].mdSize = 0;
+ mdev[i].mdSecsize = 0;
+ mdev[i].mdFlags = 0;
+ mdev[i].mdBDev = 0;
+ mdev[i].mdCDev = 0;
+ mdev[i].mdbdevb = 0;
+ mdev[i].mdcdevb = 0;
+ }
+}
void add_blocks(Block a, Block b, BlockWord carry);
void fips_initialize(void);
-void random_block(Block b);
+void random_block(Block b, int addOptional);
u_int32_t CalculateCRC(u_int8_t* buffer, size_t length);
/*
* get a random block of data per fips 186-2
*/
void
-random_block(Block b)
+random_block(Block b, int addOptional)
{
int repeatCount = 0;
do
{
// do one iteration
- Block xSeed;
- prngOutput (gPrngRef, (BYTE*) &xSeed, sizeof (xSeed));
- // add the seed to the previous value of g_xkey
- add_blocks (g_xkey, xSeed, 0);
-
+ if (addOptional)
+ {
+ Block xSeed;
+ prngOutput (gPrngRef, (BYTE*) &xSeed, sizeof (xSeed));
+
+ // add the seed to the previous value of g_xkey
+ add_blocks (g_xkey, xSeed, 0);
+ }
+
// compute "G"
SHA1Update (&g_sha1_ctx, (const u_int8_t *) &g_xkey, sizeof (g_xkey));
fips_initialize ();
}
+const Block kKnownAnswer = {0x92b404e5, 0x56588ced, 0x6c1acd4e, 0xbf053f68, 0x9f73a93};
+
void
fips_initialize(void)
{
- /* Read the initial value of g_xkey from yarrow */
- prngOutput (gPrngRef, (BYTE*) &g_xkey, sizeof (g_xkey));
+ /* So that we can do the self test, set the seed to zero */
+ memset(&g_xkey, 0, sizeof(g_xkey));
/* initialize our SHA1 generator */
SHA1Init (&g_sha1_ctx);
/* other initializations */
memset (zeros, 0, sizeof (zeros));
g_bytes_used = 0;
- random_block(g_random_data);
+ random_block(g_random_data, FALSE);
+
+ // check here to see if we got the initial data we were expecting
+ int i;
+ for (i = 0; i < kBSize; ++i)
+ {
+ if (kKnownAnswer[i] != g_random_data[i])
+ {
+ panic("FIPS random self test failed");
+ }
+ }
+
+ // now do the random block again to make sure that userland doesn't get predicatable data
+ random_block(g_random_data, TRUE);
}
/*
int bytes_available = kBSizeInBytes - g_bytes_used;
if (bytes_available == 0)
{
- random_block(g_random_data);
+ random_block(g_random_data, TRUE);
g_bytes_used = 0;
bytes_available = kBSizeInBytes;
}
int bytes_to_read = min(bytes_remaining, kBSizeInBytes - g_bytes_used);
if (bytes_to_read == 0)
{
- random_block(g_random_data);
+ random_block(g_random_data, TRUE);
g_bytes_used = 0;
bytes_to_read = min(bytes_remaining, kBSizeInBytes);
}
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
}
}
+/*
+ * Convert a raw catalog key and record into an in-core catalog descriptor.
+ *
+ * Note: The caller is responsible for releasing the catalog descriptor.
+ */
__private_extern__
int
cat_convertkey(
/*
* cat_lookup - lookup a catalog node using a cnode decriptor
+ *
+ * Note: The caller is responsible for releasing the output
+ * catalog descriptor (when supplied outdescp is non-null).
*/
__private_extern__
int
* cat_findname - obtain a descriptor from cnid
*
* Only a thread lookup is performed.
+ *
+ * Note: The caller is responsible for releasing the output
+ * catalog descriptor (when supplied outdescp is non-null).
+
*/
__private_extern__
int
/*
* cat_idlookup - lookup a catalog node using a cnode id
+ *
+ * Note: The caller is responsible for releasing the output
+ * catalog descriptor (when supplied outdescp is non-null).
*/
__private_extern__
int
*
* NOTE: both the catalog file and attribute file locks must
* be held before calling this function.
+ *
+ * The caller is responsible for releasing the output
+ * catalog descriptor (when supplied outdescp is non-null).
*/
__private_extern__
int
* 3. BTDeleteRecord(from_cnode);
* 4. BTDeleteRecord(from_thread);
* 5. BTInsertRecord(to_thread);
+ *
+ * Note: The caller is responsible for releasing the output
+ * catalog descriptor (when supplied out_cdp is non-null).
*/
__private_extern__
int
if (retval) {
hfs_systemfile_unlock(hfsmp, lockflags);
hfs_end_transaction(hfsmp);
+ cat_releasedesc(&desc);
break;
}
hfs_end_transaction(hfsmp);
cnid = desc.cd_parentcnid;
+ cat_releasedesc(&desc);
}
return retval;
/*
- * Copyright (c) 2002-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2002-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
(void) hfs_lock(cp, HFS_FORCE_LOCK);
+ /*
+ * Recycle named streams quickly so that the data fork vnode can
+ * go inactive in a timely manner (so that it can be zero filled
+ * or truncated if needed).
+ */
+ if (vnode_isnamedstream(vp))
+ recycle = 1;
+
/*
* We should lock cnode before checking the flags in the
* condition below and should unlock the cnode before calling
lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
- if (cp->c_blocks > 0)
- printf("hfs_inactive: attempting to delete a non-empty file!");
-
+ if (cp->c_blocks > 0) {
+ printf("hfs_inactive: deleting non-empty%sfile %d, "
+ "blks %d\n", VNODE_IS_RSRC(vp) ? " rsrc " : " ",
+ (int)cp->c_fileid, (int)cp->c_blocks);
+ }
//
// release the name pointer in the descriptor so that
hfs_volupdate(hfsmp, (v_type == VDIR) ? VOL_RMDIR : VOL_RMFILE, 0);
}
+ /*
+ * A file may have had delayed allocations, in which case hfs_update
+ * would not have updated the catalog record (cat_update). We need
+ * to do that now, before we lose our fork data. We also need to
+ * force the update, or hfs_update will again skip the cat_update.
+ */
if ((cp->c_flag & C_MODIFIED) ||
cp->c_touch_acctime || cp->c_touch_chgtime || cp->c_touch_modtime) {
+ cp->c_flag |= C_FORCEUPDATE;
hfs_update(vp, 0);
}
out:
(void) hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
cp = VTOC(vp);
+ /*
+ * Check if a deleted resource fork vnode missed a
+ * VNOP_INACTIVE call and requires truncation.
+ */
+ if (VNODE_IS_RSRC(vp) &&
+ (cp->c_flag & C_DELETED) &&
+ (VTOF(vp)->ff_blocks != 0)) {
+ hfs_unlock(cp);
+ ubc_setsize(vp, 0);
+
+ hfs_lock_truncate(cp, TRUE);
+ (void) hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
+
+ (void) hfs_truncate(vp, (off_t)0, IO_NDELAY, 1, ap->a_context);
+
+ hfs_unlock_truncate(cp, TRUE);
+ }
+ /*
+ * A file may have had delayed allocations, in which case hfs_update
+ * would not have updated the catalog record (cat_update). We need
+ * to do that now, before we lose our fork data. We also need to
+ * force the update, or hfs_update will again skip the cat_update.
+ */
+ if ((cp->c_flag & C_MODIFIED) ||
+ cp->c_touch_acctime || cp->c_touch_chgtime || cp->c_touch_modtime) {
+ cp->c_flag |= C_FORCEUPDATE;
+ hfs_update(vp, 0);
+ }
+
/*
* Keep track of an inactive hot file.
*/
if (cp->c_flag & C_HARDLINK) {
vnode_setmultipath(vp);
}
+ /*
+ * Tag resource fork vnodes as needing an VNOP_INACTIVE
+ * so that any deferred removes (open unlinked files)
+ * have the chance to process the resource fork.
+ */
+ if (VNODE_IS_RSRC(vp)) {
+ /* Force VL_NEEDINACTIVE on this vnode */
+ vnode_ref(vp);
+ vnode_rele(vp);
+ }
hfs_chashwakeup(cp, H_ALLOC | H_ATTACH);
/*
void
hfs_relorigin(struct cnode *cp, cnid_t parentcnid)
{
- linkorigin_t *origin = NULL;
+ linkorigin_t *origin, *prev;
void * thread = current_thread();
- TAILQ_FOREACH(origin, &cp->c_originlist, lo_link) {
+ TAILQ_FOREACH_SAFE(origin, &cp->c_originlist, lo_link, prev) {
if ((origin->lo_thread == thread) ||
(origin->lo_parentcnid == parentcnid)) {
TAILQ_REMOVE(&cp->c_originlist, origin, lo_link);
+ FREE(origin, M_TEMP);
break;
}
}
/*
- * Copyright (c) 1999-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 1999-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
} else if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
(bcmp(nameptr, HFS_DELETE_PREFIX, HFS_DELETE_PREFIX_LEN) == 0)) {
*vpp = NULL;
+ cat_releasedesc(&cndesc);
return (ENOENT); /* open unlinked file */
}
}
VTOC(vp)->c_blocks = fp->ff_blocks;
}
+ /*
+ Regardless of whether or not the totalblocks actually increased,
+ we should reset the allocLimit field. If it changed, it will
+ get updated; if not, it will remain the same.
+ */
+ hfsmp->allocLimit = vcb->totalBlocks;
hfs_systemfile_unlock(hfsmp, lockflags);
hfs_end_transaction(hfsmp);
journal_fork.cf_extents[0].blockCount = newBlockCount;
journal_fork.cf_blocks = newBlockCount;
error = cat_update(hfsmp, &journal_desc, &journal_attr, &journal_fork, NULL);
+ cat_releasedesc(&journal_desc); /* all done with cat descriptor */
if (error) {
printf("hfs_reclaim_journal_file: cat_update returned %d\n", error);
goto free_fail;
jib_fork.cf_extents[0].blockCount = 1;
jib_fork.cf_blocks = 1;
error = cat_update(hfsmp, &jib_desc, &jib_attr, &jib_fork, NULL);
+ cat_releasedesc(&jib_desc); /* all done with cat descriptor */
if (error) {
printf("hfs_reclaim_journal_info_block: cat_update returned %d\n", error);
goto fail;
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
hfs_lock_truncate(cp, TRUE);
- if ((error = hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK)))
- goto out;
-
+ if ((error = hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK))) {
+ hfs_unlock_truncate(cp, TRUE);
+ return (error);
+ }
error = hfs_removefile(dvp, vp, ap->a_cnp, ap->a_flags, 0, 0);
//
recycle_rsrc = 1;
}
- hfs_unlockpair(dcp, cp);
-out:
+ /*
+ * Drop the truncate lock before unlocking the cnode
+ * (which can potentially perform a vnode_put and
+ * recycle the vnode which in turn might require the
+ * truncate lock)
+ */
hfs_unlock_truncate(cp, TRUE);
+ hfs_unlockpair(dcp, cp);
if (recycle_rsrc && vnode_getwithvid(rvp, rvid) == 0) {
vnode_recycle(rvp);
int lockflags;
int error = 0;
int started_tr = 0;
- int isbigfile = 0, hasxattrs=0, isdir=0;
+ int isbigfile = 0, defer_remove=0, isdir=0;
cp = VTOC(vp);
dcp = VTOC(dvp);
* (needed for hfs_truncate)
*/
if (isdir == 0 && (cp->c_blocks - VTOF(vp)->ff_blocks)) {
- error = hfs_vgetrsrc(hfsmp, vp, &rvp, FALSE);
- if (error)
- goto out;
- /* Defer the vnode_put on rvp until the hfs_unlock(). */
- cp->c_flag |= C_NEED_RVNODE_PUT;
+ /*
+ * We must avoid calling hfs_vgetrsrc() when we have
+ * an active resource fork vnode to avoid deadlocks
+ * when that vnode is in the VL_TERMINATE state. We
+ * can defer removing the file and its resource fork
+ * until the call to hfs_vnop_inactive() occurs.
+ */
+ if (cp->c_rsrc_vp) {
+ defer_remove = 1;
+ } else {
+ error = hfs_vgetrsrc(hfsmp, vp, &rvp, FALSE);
+ if (error)
+ goto out;
+ /* Defer the vnode_put on rvp until the hfs_unlock(). */
+ cp->c_flag |= C_NEED_RVNODE_PUT;
+ }
}
/* Check if this file is being used. */
if (isdir == 0) {
individual transactions in case there are too many */
if ((hfsmp->hfs_attribute_vp != NULL) &&
(cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0) {
- hasxattrs = 1;
+ defer_remove = 1;
}
/*
/*
* There are two cases to consider:
- * 1. File is busy/big ==> move/rename the file
+ * 1. File is busy/big/defer_remove ==> move/rename the file
* 2. File is not in use ==> remove the file
*/
- if (dataforkbusy || rsrcforkbusy || isbigfile || hasxattrs) {
+ if (dataforkbusy || rsrcforkbusy || isbigfile || defer_remove) {
char delname[32];
struct cat_desc to_desc;
struct cat_desc todir_desc;
struct cat_fork *dataforkp = NULL;
struct cat_fork *rsrcforkp = NULL;
struct cat_fork datafork;
+ struct cat_fork rsrcfork;
struct hfsmount *hfsmp;
int lockflags;
int error;
dataforkp = &datafork;
}
+ /*
+ * For resource forks with delayed allocations, make sure
+ * the block count and file size match the number of blocks
+ * actually allocated to the file on disk.
+ */
+ if (rsrcforkp && (cp->c_rsrcfork->ff_unallocblocks != 0)) {
+ bcopy(rsrcforkp, &rsrcfork, sizeof(rsrcfork));
+ rsrcfork.cf_blocks = (cp->c_rsrcfork->ff_blocks - cp->c_rsrcfork->ff_unallocblocks);
+ rsrcfork.cf_size = rsrcfork.cf_blocks * HFSTOVCB(hfsmp)->blockSize;
+ rsrcforkp = &rsrcfork;
+ }
+
/*
* Lock the Catalog b-tree file.
*/
int error;
int vid;
+restart:
/* Attempt to use exising vnode */
if ((rvp = cp->c_rsrc_vp)) {
vid = vnode_vid(rvp);
error = vnode_getwithvid(rvp, vid);
- if (can_drop_lock)
+ if (can_drop_lock) {
(void) hfs_lock(cp, HFS_FORCE_LOCK);
-
+ /*
+ * When our lock was relinquished, the resource fork
+ * could have been recycled. Check for this and try
+ * again.
+ */
+ if (error == ENOENT)
+ goto restart;
+ }
if (error) {
const char * name = (const char *)VTOC(vp)->c_desc.cd_nameptr;
if (name)
- printf("hfs_vgetrsrc: couldn't get"
- " resource fork for %s\n", name);
+ printf("hfs_vgetrsrc: couldn't get resource"
+ " fork for %s, err %d\n", name, error);
return (error);
}
} else {
#endif
hfs_systemfile_unlock(hfsmp, lockflags);
hfs_end_transaction(hfsmp);
+ if (result)
+ break;
}
exit:
FREE(iterator, M_TEMP);
/*
* Initialize the calendar.
*/
+ bsd_init_kprintf("calling IOKitInitializeTime\n");
IOKitInitializeTime();
if (turn_on_log_leaks && !new_nkdbufs)
if (PE_parse_boot_arg("nbuf", &max_nbuf_headers)) {
customnbuf = 1;
}
+#if !defined(SECURE_KERNEL)
PE_parse_boot_arg("kmem", &setup_kmem);
+#endif
PE_parse_boot_arg("trace", &new_nkdbufs);
if (PE_parse_boot_arg("msgbuf", &msgbuf)) {
posix_spawn(proc_t ap, struct posix_spawn_args *uap, register_t *retval)
{
proc_t p = ap; /* quiet bogus GCC vfork() warning */
+ user_addr_t pid = uap->pid;
register_t ival[2]; /* dummy retval for vfork() */
struct image_params image_params, *imgp;
struct vnode_attr va;
*
* If the parent wants the pid, copy it out
*/
- if (uap->pid != USER_ADDR_NULL)
- (void)suword(uap->pid, p->p_pid);
+ if (pid != USER_ADDR_NULL)
+ (void)suword(pid, p->p_pid);
retval[0] = error;
/*
* Override inherited code signing flags with the
}
sig_lock_to_exit(p);
}
-#if !CONFIG_EMBEDDED /* BER_XXX */
- if (p->p_pid == 1) {
+ if (p == initproc) {
proc_unlock(p);
printf("pid 1 exited (signal %d, exit %d)",
WTERMSIG(rv), WEXITSTATUS(rv));
"launchd"),
init_task_failure_data);
}
-#endif
p->p_lflag |= P_LEXIT;
p->p_xstat = rv;
sysctl_nx
(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
{
+#ifdef SECURE_KERNEL
+ return ENOTSUP;
+#endif
int new_value, changed;
int error;
error = sysctl_io_number(req, nx_enabled, sizeof(nx_enabled), &new_value, &changed);
- if (error)
- return error;
+ if (error)
+ return error;
- if (changed) {
+ if (changed) {
#ifdef __i386__
/*
* Only allow setting if NX is supported on the chip
*/
if (!(cpuid_extfeatures() & CPUID_EXTFEATURE_XD))
- return ENOTSUP;
+ return ENOTSUP;
#endif
- nx_enabled = new_value;
- }
+ nx_enabled = new_value;
+ }
return(error);
}
if (!unregistering) {
if ((entry->sfe_flags & SFEF_UNREGISTERING) != 0) {
/*
- * Another thread is unregistering the filter, we need to
- * avoid detaching the filter here so the socket won't go
- * away.
+ * Another thread is unregistering the filter, we
+ * need to avoid detaching the filter here so the
+ * socket won't go away. Bump up the socket's
+ * usecount so that it won't be freed until after
+ * the filter unregistration has been completed;
+ * at this point the caller has already held the
+ * socket's lock, so we can directly modify the
+ * usecount.
*/
+ if (!(entry->sfe_flags & SFEF_DETACHXREF)) {
+ entry->sfe_socket->so_usecount++;
+ entry->sfe_flags |= SFEF_DETACHXREF;
+ }
lck_mtx_unlock(sock_filter_lock);
return;
}
else {
/*
* Clear the removing flag. We will perform the detach here or
- * request a delayed deatch.
+ * request a delayed detach. Since we do an extra ref release
+ * below, bump up the usecount if we haven't done so.
*/
entry->sfe_flags &= ~SFEF_UNREGISTERING;
+ if (!(entry->sfe_flags & SFEF_DETACHXREF)) {
+ entry->sfe_socket->so_usecount++;
+ entry->sfe_flags |= SFEF_DETACHXREF;
+ }
}
if (entry->sfe_socket->so_filteruse != 0) {
filter->sf_flags |= SFF_DETACHING;
for (next_entry = entry_head; next_entry;
- next_entry = next_entry->sfe_next_onfilter) {
- socket_lock(next_entry->sfe_socket, 1);
+ next_entry = next_entry->sfe_next_onfilter) {
+ /*
+ * Mark this as "unregistering"; upon dropping the
+ * lock, another thread may win the race and attempt
+ * to detach a socket from it (e.g. as part of close)
+ * before we get a chance to detach. Setting this
+ * flag practically tells the other thread to go away.
+ * If the other thread wins, this causes an extra
+ * reference hold on the socket so that it won't be
+ * deallocated until after we finish with the detach
+ * for it below. If we win the race, the extra
+ * reference hold is also taken to compensate for the
+ * extra reference release when detach is called
+ * with a "1" for its second parameter.
+ */
next_entry->sfe_flags |= SFEF_UNREGISTERING;
- socket_unlock(next_entry->sfe_socket, 0); /* Radar 4201550: prevents the socket from being deleted while being unregistered */
}
}
#define PTHREAD_START_SETSCHED 0x02000000
#define PTHREAD_START_DETACHED 0x04000000
#define PTHREAD_START_POLICY_BITSHIFT 16
-#define PTHREAD_START_POLICY_MASK 0xffff
+#define PTHREAD_START_POLICY_MASK 0xff
#define PTHREAD_START_IMPORTANCE_MASK 0xffff
#define SCHED_OTHER POLICY_TIMESHARE
extinfo.timeshare = 0;
thread_policy_set(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extinfo, THREAD_EXTENDED_POLICY_COUNT);
- precedinfo.importance = importance;
+#define BASEPRI_DEFAULT 31
+ precedinfo.importance = (importance - BASEPRI_DEFAULT);
thread_policy_set(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT);
}
int error;
if ( (error = preparefileread(p, &fp, fd, 1)) )
- return (error);
+ goto out;
error = dofileread(vfs_context_current(), fp, uap->buf, uap->nbyte,
uap->offset, FOF_OFFSET, retval);
if (!error)
KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE),
uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
-
+
+out:
return (error);
}
mutex_held = so->so_proto->pr_domain->dom_mtx;
lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
- /* Double check here and return if there's no outstanding upcall */
- if (!(so->so_flags & SOF_UPCALLINUSE))
+ /*
+ * Double check here and return if there's no outstanding upcall;
+ * otherwise proceed further only if SOF_UPCALLCLOSEWAIT is set.
+ */
+ if (!(so->so_flags & SOF_UPCALLINUSE) ||
+ !(so->so_flags & SOF_UPCALLCLOSEWAIT))
return;
so->so_flags |= SOF_CLOSEWAIT;
#endif /* MAC_SOCKET */
break;
+#ifdef __APPLE_API_PRIVATE
+ case SO_UPCALLCLOSEWAIT:
+ error = sooptcopyin(sopt, &optval, sizeof (optval),
+ sizeof (optval));
+ if (error)
+ goto bad;
+ if (optval)
+ so->so_flags |= SOF_UPCALLCLOSEWAIT;
+ else
+ so->so_flags &= ~SOF_UPCALLCLOSEWAIT;
+ break;
+#endif
+
default:
error = ENOPROTOOPT;
break;
#endif /* MAC_SOCKET */
break;
+#ifdef __APPLE_API_PRIVATE
+ case SO_UPCALLCLOSEWAIT:
+ optval = (so->so_flags & SOF_UPCALLCLOSEWAIT);
+ goto integer;
+#endif
+
default:
error = ENOPROTOOPT;
break;
sb->sb_mb = m0;
}
sb->sb_lastrecord = m0;
+ sb->sb_mbtail = m0;
m = m0->m_next;
m0->m_next = 0;
static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
register_t *);
static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
- size_t);
+ size_t, boolean_t);
static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
- user_addr_t, size_t);
+ user_addr_t, size_t, boolean_t);
#if SENDFILE
static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
boolean_t);
goto out;
}
if (uap->namelen > sizeof (ss)) {
- error = getsockaddr(so, &sa, uap->name, uap->namelen);
+ error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
} else {
- error = getsockaddr_s(so, &ss, uap->name, uap->namelen);
+ error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
if (error == 0) {
sa = (struct sockaddr *)&ss;
want_free = FALSE;
boolean_t want_free = TRUE;
int error;
int fd = uap->s;
+ boolean_t dgram;
AUDIT_ARG(fd, uap->s);
error = file_socket(fd, &so);
goto out;
}
+ /*
+ * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
+ * if this is a datagram socket; translate for other types.
+ */
+ dgram = (so->so_type == SOCK_DGRAM);
+
/* Get socket address now before we obtain socket lock */
if (uap->namelen > sizeof (ss)) {
- error = getsockaddr(so, &sa, uap->name, uap->namelen);
+ error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
} else {
- error = getsockaddr_s(so, &ss, uap->name, uap->namelen);
+ error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
if (error == 0) {
sa = (struct sockaddr *)&ss;
want_free = FALSE;
if (mp->msg_name != USER_ADDR_NULL) {
if (mp->msg_namelen > sizeof (ss)) {
error = getsockaddr(so, &to, mp->msg_name,
- mp->msg_namelen);
+ mp->msg_namelen, TRUE);
} else {
error = getsockaddr_s(so, &ss, mp->msg_name,
- mp->msg_namelen);
+ mp->msg_namelen, TRUE);
if (error == 0) {
to = (struct sockaddr *)&ss;
want_free = FALSE;
*/
static int
getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
- size_t len)
+ size_t len, boolean_t translate_unspec)
{
struct sockaddr *sa;
int error;
* sockets we leave it unchanged and let the lower layer
* handle it.
*/
- if (sa->sa_family == AF_UNSPEC &&
+ if (translate_unspec && sa->sa_family == AF_UNSPEC &&
INP_CHECK_SOCKAF(so, AF_INET) &&
len == sizeof (struct sockaddr_in))
sa->sa_family = AF_INET;
static int
getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
- user_addr_t uaddr, size_t len)
+ user_addr_t uaddr, size_t len, boolean_t translate_unspec)
{
int error;
* sockets we leave it unchanged and let the lower layer
* handle it.
*/
- if (ss->ss_family == AF_UNSPEC &&
+ if (translate_unspec && ss->ss_family == AF_UNSPEC &&
INP_CHECK_SOCKAF(so, AF_INET) &&
len == sizeof (struct sockaddr_in))
ss->ss_family = AF_INET;
static int
proto_hash_value(u_long protocol_family)
{
+ /*
+ * dlil_proto_unplumb_all() depends on the mapping between
+ * the hash bucket index and the protocol family defined
+ * here; future changes must be applied there as well.
+ */
switch(protocol_family) {
case PF_INET:
return 0;
ifnet_lock_done(ifp);
}
+
+__private_extern__ void
+dlil_proto_unplumb_all(struct ifnet *ifp)
+{
+ /*
+ * if_proto_hash[0-3] are for PF_INET, PF_INET6, PF_APPLETALK
+ * and PF_VLAN, where each bucket contains exactly one entry;
+ * PF_VLAN does not need an explicit unplumb.
+ *
+ * if_proto_hash[4] is for other protocols; we expect anything
+ * in this bucket to respond to the DETACHING event (which would
+ * have happened by now) and do the unplumb then.
+ */
+ (void) proto_unplumb(PF_INET, ifp);
+#if INET6
+ (void) proto_unplumb(PF_INET6, ifp);
+#endif /* INET6 */
+#if NETAT
+ (void) proto_unplumb(PF_APPLETALK, ifp);
+#endif /* NETAT */
+}
interface_filter_t *filter_ref);
void dlil_detach_filter(interface_filter_t filter);
int dlil_detach_protocol(ifnet_t ifp, u_long protocol);
+extern void dlil_proto_unplumb_all(ifnet_t);
#endif /* BSD_KERNEL_PRIVATE */
u_int32_t count;
u_int32_t i;
- if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp, &count) != 0) {
+ if (ifnet_list_get_all(IFNET_FAMILY_ANY, &ifp, &count) == 0) {
for (i = 0; i < count; i++) {
if_down(ifp[i]);
+ dlil_proto_unplumb_all(ifp[i]);
}
ifnet_list_free(ifp);
}
-
+
return 0;
}
struct socket_filter;
-#define SFEF_DETACHUSEZERO 0x1 // Detach when use reaches zero
-#define SFEF_UNREGISTERING 0x2 // Remove due to unregister
+#define SFEF_DETACHUSEZERO 0x1 /* Detach when use reaches zero */
+#define SFEF_UNREGISTERING 0x2 /* Remove due to unregister */
+#define SFEF_DETACHXREF 0x4 /* Extra reference held for detach */
struct socket_filter_entry {
struct socket_filter_entry *sfe_next_onsocket;
extern struct dlil_threading_info *dlil_lo_thread_ptr;
extern int dlil_multithreaded_input;
+static errno_t
+ifnet_list_get_common(ifnet_family_t, boolean_t, ifnet_t **, u_int32_t *);
+
/*
Temporary work around until we have real reference counting
}
errno_t
-ifnet_list_get(
- ifnet_family_t family,
- ifnet_t **list,
- u_int32_t *count)
+ifnet_list_get(ifnet_family_t family, ifnet_t **list, u_int32_t *count)
+{
+ return (ifnet_list_get_common(family, FALSE, list, count));
+}
+
+__private_extern__ errno_t
+ifnet_list_get_all(ifnet_family_t family, ifnet_t **list, u_int32_t *count)
+{
+ return (ifnet_list_get_common(family, TRUE, list, count));
+}
+
+static errno_t
+ifnet_list_get_common(ifnet_family_t family, boolean_t get_all, ifnet_t **list,
+ u_int32_t *count)
{
struct ifnet *ifp;
u_int32_t cmax = 0;
*count = 0;
errno_t result = 0;
-
- if (list == NULL || count == NULL) return EINVAL;
-
+
+ if (list == NULL || count == NULL)
+ return (EINVAL);
+
ifnet_head_lock_shared();
- TAILQ_FOREACH(ifp, &ifnet, if_link)
- {
- if (ifp->if_eflags & IFEF_DETACHING) continue;
- if (family == 0 || ifp->if_family == family)
+ TAILQ_FOREACH(ifp, &ifnet, if_link) {
+ if ((ifp->if_eflags & IFEF_DETACHING) && !get_all)
+ continue;
+ if (family == IFNET_FAMILY_ANY || ifp->if_family == family)
cmax++;
}
-
+
if (cmax == 0)
result = ENXIO;
-
+
if (result == 0) {
- MALLOC(*list, ifnet_t*, sizeof(ifnet_t) * (cmax + 1), M_TEMP, M_NOWAIT);
+ MALLOC(*list, ifnet_t*, sizeof(ifnet_t) * (cmax + 1),
+ M_TEMP, M_NOWAIT);
if (*list == NULL)
result = ENOMEM;
}
if (result == 0) {
- TAILQ_FOREACH(ifp, &ifnet, if_link)
- {
- if (ifp->if_eflags & IFEF_DETACHING) continue;
- if (*count + 1 > cmax) break;
- if (family == 0 || ((ifnet_family_t)ifp->if_family) == family)
- {
+ TAILQ_FOREACH(ifp, &ifnet, if_link) {
+ if ((ifp->if_eflags & IFEF_DETACHING) && !get_all)
+ continue;
+ if (*count + 1 > cmax)
+ break;
+ if (family == IFNET_FAMILY_ANY ||
+ ((ifnet_family_t)ifp->if_family) == family) {
(*list)[*count] = (ifnet_t)ifp;
ifnet_reference((*list)[*count]);
(*count)++;
(*list)[*count] = NULL;
}
ifnet_head_done();
-
- return 0;
+
+ return (result);
}
void
-ifnet_list_free(
- ifnet_t *interfaces)
+ifnet_list_free(ifnet_t *interfaces)
{
int i;
-
- if (interfaces == NULL) return;
-
- for (i = 0; interfaces[i]; i++)
- {
+
+ if (interfaces == NULL)
+ return;
+
+ for (i = 0; interfaces[i]; i++) {
ifnet_release(interfaces[i]);
}
-
+
FREE(interfaces, M_TEMP);
}
*/
errno_t ifnet_list_get(ifnet_family_t family, ifnet_t **interfaces, u_int32_t *count);
+#ifdef KERNEL_PRIVATE
+/*!
+ @function ifnet_list_get_all
+ @discussion Get a list of attached interfaces. List will be set to
+ point to an array allocated by ifnet_list_get. The interfaces
+ are refcounted and the counts will be incremented before the
+ function returns. The list of interfaces must be freed using
+ ifnet_list_free. This is similar to ifnet_list_get, except
+ that it includes interfaces that are detaching.
+ @param family The interface family (i.e. IFNET_FAMILY_ETHERNET). To
+ find interfaces of all families, use IFNET_FAMILY_ANY.
+ @param interfaces A pointer to an array of interface references.
+ @param count A pointer that will be filled in with the number of
+ matching interfaces in the array.
+ @result 0 on success otherwise the errno error.
+ */
+errno_t ifnet_list_get_all(ifnet_family_t family, ifnet_t **interfaces, u_int32_t *count);
+#endif /* KERNEL_PRIVATE */
+
/*!
@function ifnet_list_free
@discussion Free a list of interfaces returned by ifnet_list_get.
}
}
if (locked) {
+ locked = 0;
lck_mtx_unlock(entry->domain->dom_mtx);
}
}
* close routine typically issues RTM_DELETE which clears the RTF_UP
* flag on the entry so that the code below reclaims the storage.
*/
- if (rnh->rnh_close && rt->rt_refcnt == 0)
+ if (rnh && rnh->rnh_close && rt->rt_refcnt == 0)
rnh->rnh_close((struct radix_node *)rt, rnh);
/*
#endif
static struct router_info *
- find_rti(struct ifnet *ifp);
+ find_rti(struct ifnet *ifp, int wait);
static struct igmpstat igmpstat;
static struct router_info *
find_rti(
- struct ifnet *ifp)
+ struct ifnet *ifp, int wait)
{
struct router_info *rti = Head;
rti = rti->rti_next;
}
- MALLOC(rti, struct router_info *, sizeof *rti, M_IGMP, M_NOWAIT);
+ MALLOC(rti, struct router_info *, sizeof *rti, M_IGMP, wait);
if (rti != NULL)
{
rti->rti_ifp = ifp;
timer = igmp->igmp_code * PR_FASTHZ / IGMP_TIMER_SCALE;
if (timer == 0)
timer = 1;
- rti = find_rti(ifp);
+ rti = find_rti(ifp, M_NOWAIT);
if (rti == NULL) {
m_freem(m);
return;
inm->inm_timer = 0;
inm->inm_state = IGMP_OTHERMEMBER;
} else {
- inm->inm_rti = find_rti(inm->inm_ifp);
+ inm->inm_rti = find_rti(inm->inm_ifp, M_WAITOK);
if (inm->inm_rti == NULL) return ENOMEM;
igmp_sendpkt(inm, inm->inm_rti->rti_type, 0);
inm->inm_timer = IGMP_RANDOM_DELAY(
while (inm != NULL) {
if (inm->inm_timer == 0) {
/* do nothing */
- } else if (--inm->inm_timer == 0) {
+ } else if ((--inm->inm_timer == 0) && (inm->inm_rti != NULL)) {
igmp_sendpkt(inm, inm->inm_rti->rti_type, 0);
inm->inm_state = IGMP_IREPORTEDLAST;
} else {
#define IP_TRAFFIC_MGT_BACKGROUND 65 /* int*; get background IO flags; set background IO */
-#if CONFIG_FORCE_OUT_IFP
+#ifdef PRIVATE
/* This is a hack, this is only a hack. */
#define IP_FORCE_OUT_IFP 69 /* char ifname[] - send traffic on this interface */
#endif
* Main firewall chains definitions and global var's definitions.
*/
#ifdef KERNEL
+#if IPFIREWALL
#define IP_FW_PORT_DYNT_FLAG 0x10000
#define IP_FW_PORT_TEE_FLAG 0x20000
u_int16_t divert_rule; /* divert cookie */
u_int32_t retval;
};
+//struct ip_fw_args;
/*
* Function definitions.
extern int fw_one_pass;
extern int fw_enable;
#define IPFW_LOADED (ip_fw_chk_ptr != NULL)
+#endif /* IPFIREWALL */
#endif /* KERNEL */
#endif /* !__LP64__ */
/* Firewall hooks */
+#if IPFIREWALL
ip_fw_chk_t *ip_fw_chk_ptr;
int fw_enable = 1;
int fw_bypass = 1;
#endif
int (*fr_checkp)(struct ip *, int, struct ifnet *, int, struct mbuf **) = NULL;
+#endif /* IPFIREWALL */
SYSCTL_NODE(_net_inet_ip, OID_AUTO, linklocal, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "link local");
u_short sum;
struct in_addr pkt_dst;
u_int32_t div_info = 0; /* packet divert/tee info */
+#if IPFIREWALL
struct ip_fw_args args;
+#endif
ipfilter_t inject_filter_ref = 0;
struct m_tag *tag;
struct route ipforward_rt;
}
#endif /* DUMMYNET */
+#if IPDIVERT
if ((tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DIVERT, NULL)) != NULL) {
struct divert_tag *div_tag;
m_tag_delete(m, tag);
}
+#endif
+
if ((tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) {
struct ip_fwd_tag *ipfwd_tag;
* to be sent and the original packet to be freed).
*/
ip_nhops = 0; /* for source routed packets */
+#if IPFIREWALL
if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, args.next_hop, &ipforward_rt)) {
+#else
+ if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, NULL, &ipforward_rt)) {
+#endif
return;
}
* Cache the destination address of the packet; this may be
* changed by use of 'ipfw fwd'.
*/
+#if IPFIREWALL
pkt_dst = args.next_hop == NULL ?
ip->ip_dst : args.next_hop->sin_addr;
+#else
+ pkt_dst = ip->ip_dst;
+#endif
/*
* Enable a consistency check between the destination address
* the packets are received.
*/
checkif = ip_checkinterface && (ipforwarding == 0) &&
- ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) &&
- (args.next_hop == NULL);
+ ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0)
+#if IPFIREWALL
+ && (args.next_hop == NULL);
+#else
+ ;
+#endif
lck_mtx_lock(rt_mtx);
TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
OSAddAtomic(1, (SInt32*)&ipstat.ips_cantforward);
m_freem(m);
} else {
+#if IPFIREWALL
ip_forward(m, 0, args.next_hop, &ipforward_rt);
+#else
+ ip_forward(m, 0, NULL, &ipforward_rt);
+#endif
if (ipforward_rt.ro_rt != NULL) {
rtfree(ipforward_rt.ro_rt);
ipforward_rt.ro_rt = NULL;
*/
OSAddAtomic(1, (SInt32*)&ipstat.ips_delivered);
{
+#if IPFIREWALL
if (args.next_hop && ip->ip_p == IPPROTO_TCP) {
/* TCP needs IPFORWARD info if available */
struct m_tag *fwd_tag;
ip_proto_dispatch_in(m, hlen, ip->ip_p, 0);
}
+#else
+ ip_proto_dispatch_in(m, hlen, ip->ip_p, 0);
+#endif
return;
}
#if IPFIREWALL_FORWARD
int fwd_rewrite_src = 0;
#endif
+#if IPFIREWALL
struct ip_fw_args args;
+#endif
int didfilter = 0;
ipfilter_t inject_filter_ref = 0;
struct m_tag *tag;
KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
packetlist = m0;
- args.next_hop = NULL;
#if IPFIREWALL
+ args.next_hop = NULL;
args.eh = NULL;
args.rule = NULL;
args.divert_rule = 0; /* divert cookie */
m_tag_delete(m0, tag);
}
#endif /* IPDIVERT */
-#endif /* IPFIREWALL */
if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) {
struct ip_fwd_tag *ipfwd_tag;
m_tag_delete(m0, tag);
}
+#endif /* IPFIREWALL */
m = m0;
hlen = len;
}
ip = mtod(m, struct ip *);
+#if IPFIREWALL
pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
+#else
+ pkt_dst = ip->ip_dst;
+#endif
/*
* Fill in IP header.
struct inpcbinfo ripcbinfo;
/* control hooks for ipfw and dummynet */
+#if IPFIREWALL
ip_fw_ctl_t *ip_fw_ctl_ptr;
#if DUMMYNET
ip_dn_ctl_t *ip_dn_ctl_ptr;
#endif /* DUMMYNET */
+#endif /* IPFIREWALL */
/*
* Nominal space allocated to a raw ip socket.
* Grow the congestion window, if the
* connection is cwnd bound.
*/
- if (tp->snd_cwnd < tp->snd_wnd) {
- tp->t_bytes_acked += acked;
- if (tp->t_bytes_acked > tp->snd_cwnd) {
- tp->t_bytes_acked -= tp->snd_cwnd;
- tp->snd_cwnd += tp->t_maxseg;
- }
- }
sbdrop(&so->so_snd, acked);
if (SEQ_GT(tp->snd_una, tp->snd_recover) &&
SEQ_LEQ(th->th_ack, tp->snd_recover))
tp->ecn_flags &= ~TE_SENDIPECT;
}
- soisconnected(so);
#if CONFIG_MACF_NET && CONFIG_MACF_SOCKET
/* XXXMAC: recursive lock: SOCK_LOCK(so); */
mac_socketpeer_label_associate_mbuf(m, so);
tp->t_state = TCPS_ESTABLISHED;
tp->t_timer[TCPT_KEEP] = TCP_KEEPIDLE(tp);
}
+ /* soisconnected may lead to socket_unlock in case of upcalls,
+ * make sure this is done when everything is setup.
+ */
+ soisconnected(so);
} else {
/*
* Received initial SYN in SYN-SENT[*] state => simul-
case TCPS_SYN_RECEIVED:
tcpstat.tcps_connects++;
- soisconnected(so);
/* Do window scaling? */
if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
(void) tcp_reass(tp, (struct tcphdr *)0, &tlen,
(struct mbuf *)0);
tp->snd_wl1 = th->th_seq - 1;
+
/* FALLTHROUGH */
+ /* soisconnected may lead to socket_unlock in case of upcalls,
+ * make sure this is done when everything is setup.
+ */
+ soisconnected(so);
+
/*
* In ESTABLISHED state: drop duplicate ACKs; ACK out of range
* ACKs. If the ack is in the range
register u_int cw = tp->snd_cwnd;
register u_int incr = tp->t_maxseg;
- if (cw >= tp->snd_ssthresh) {
- tp->t_bytes_acked += acked;
- if (tp->t_bytes_acked >= cw) {
+ if ((acked > incr) && tcp_do_rfc3465) {
+ if (cw >= tp->snd_ssthresh) {
+ tp->t_bytes_acked += acked;
+ if (tp->t_bytes_acked >= cw) {
/* Time to increase the window. */
- tp->t_bytes_acked -= cw;
- } else {
+ tp->t_bytes_acked -= cw;
+ } else {
/* No need to increase yet. */
- incr = 0;
+ incr = 0;
+ }
+ } else {
+ /*
+ * If the user explicitly enables RFC3465
+ * use 2*SMSS for the "L" param. Otherwise
+ * use the more conservative 1*SMSS.
+ *
+ * (See RFC 3465 2.3 Choosing the Limit)
+ */
+ u_int abc_lim;
+
+ abc_lim = (tcp_do_rfc3465 == 0) ?
+ incr : incr * 2;
+ incr = lmin(acked, abc_lim);
}
- } else {
+ }
+ else {
/*
- * If the user explicitly enables RFC3465
- * use 2*SMSS for the "L" param. Otherwise
- * use the more conservative 1*SMSS.
- *
- * (See RFC 3465 2.3 Choosing the Limit)
+ * If the window gives us less than ssthresh packets
+ * in flight, open exponentially (segsz per packet).
+ * Otherwise open linearly: segsz per window
+ * (segsz^2 / cwnd per packet).
*/
- u_int abc_lim;
-
- abc_lim = (tcp_do_rfc3465 == 0) ?
- incr : incr * 2;
- incr = min(acked, abc_lim);
+
+ if (cw >= tp->snd_ssthresh) {
+ incr = incr * incr / cw;
+ }
}
+
tp->snd_cwnd = min(cw+incr, TCP_MAXWIN<<tp->snd_scale);
}
if (acked > so->so_snd.sb_cc) {
tp->snd_wnd -= acked;
ourfinisacked = 0;
}
- sowwakeup(so);
/* detect una wraparound */
if ((tcp_do_newreno || tp->sack_enable) &&
!IN_FASTRECOVERY(tp) &&
}
if (SEQ_LT(tp->snd_nxt, tp->snd_una))
tp->snd_nxt = tp->snd_una;
+
+ /*
+ * sowwakeup must happen after snd_una, et al. are updated so that
+ * the sequence numbers are in sync with so_snd
+ */
+ sowwakeup(so);
switch (tp->t_state) {
* we'll hang forever.
*/
if (so->so_state & SS_CANTRCVMORE) {
- soisdisconnected(so);
tp->t_timer[TCPT_2MSL] = tcp_maxidle;
add_to_time_wait(tp);
+ soisdisconnected(so);
}
tp->t_state = TCPS_FIN_WAIT_2;
goto drop;
extern int slowlink_wsize; /* window correction for slow links */
extern u_long route_generation;
+#if IPFIREWALL
extern int fw_enable; /* firewall check for packet chaining */
extern int fw_bypass; /* firewall check: disable packet chaining if there is rules */
+#endif /* IPFIREWALL */
extern vm_size_t so_cache_zone_element_size;
long adv = lmin(recwin, (long)TCP_MAXWIN << tp->rcv_scale) -
(tp->rcv_adv - tp->rcv_nxt);
- if (adv >= (long) (2 * tp->t_maxseg))
- goto send;
- if (2 * adv >= (long) so->so_rcv.sb_hiwat)
- goto send;
+ if (adv >= (long) (2 * tp->t_maxseg)) {
+
+ /*
+ * Update only if the resulting scaled value of the window changed, or
+ * if there is a change in the sequence since the last ack.
+ * This avoids what appears as dupe ACKS (see rdar://5640997)
+ */
+
+ if ((tp->last_ack_sent != tp->rcv_nxt) || (((recwin + adv) >> tp->rcv_scale) > recwin))
+ goto send;
+ }
+ if (2 * adv >= (long) so->so_rcv.sb_hiwat)
+ goto send;
}
/*
tp->sackhint.sack_bytes_rexmit += len;
}
th->th_ack = htonl(tp->rcv_nxt);
+ tp->last_ack_sent = tp->rcv_nxt;
+
if (optlen) {
bcopy(opt, th + 1, optlen);
th->th_off = (sizeof (struct tcphdr) + optlen) >> 2;
boolean_t chain;
boolean_t unlocked = FALSE;
+ /* Make sure ACK/DELACK conditions are cleared before
+ * we unlock the socket.
+ */
+
+ tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
/*
* If allowed, unlock TCP socket while in IP
* but only if the connection is established and
* - there is a non default rule set for the firewall
*/
- chain = tcp_packet_chaining > 1 &&
+ chain = tcp_packet_chaining > 1
#if IPSEC
- ipsec_bypass &&
+ && ipsec_bypass
+#endif
+#if IPFIREWALL
+ && (fw_enable == 0 || fw_bypass)
#endif
- (fw_enable == 0 || fw_bypass);
+ ; // I'm important, not extraneous
+
while (pkt != NULL) {
struct mbuf *npkt = pkt->m_nextpkt;
void
tcp_slowtimo()
{
- struct inpcb *inp;
+ struct inpcb *inp, *nxt;
struct tcpcb *tp;
struct socket *so;
int i;
}
- LIST_FOREACH(inp, &tcb, inp_list) {
+ LIST_FOREACH_SAFE(inp, &tcb, inp_list, nxt) {
tcp_garbage_collect(inp, 0);
}
/* Now cleanup the time wait ones */
- LIST_FOREACH(inp, &time_wait_slots[cur_tw_slot], inp_list) {
+ LIST_FOREACH_SAFE(inp, &time_wait_slots[cur_tw_slot], inp_list, nxt) {
tcp_garbage_collect(inp, 1);
}
static int nd6_inuse, nd6_allocated;
struct llinfo_nd6 llinfo_nd6 = {&llinfo_nd6, &llinfo_nd6, NULL, NULL, 0, 0, 0, 0, 0 };
-size_t nd_ifinfo_indexlim = 8;
+size_t nd_ifinfo_indexlim = 32; /* increased for 5589193 */
struct nd_ifinfo *nd_ifinfo = NULL;
struct nd_drhead nd_defrouter;
struct nd_prhead nd_prefix = { 0 };
bzero(q, n);
if (nd_ifinfo) {
bcopy((caddr_t)nd_ifinfo, q, n/2);
+ /* Radar 5589193:
+ * SU fix purposely leaks the old nd_ifinfo array
+ * if we grow the arraw to more than 32 interfaces
+ * Fix for future release is to use proper locking.
+
FREE((caddr_t)nd_ifinfo, M_IP6NDP);
+ */
}
nd_ifinfo = (struct nd_ifinfo *)q;
}
lck_mtx_unlock(&nmp->nm_lock);
goto bad;
}
+ /* just playin' it safe */
+ sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
if (!(nmp->nm_flag & NFSMNT_INT))
sock_nointerrupt(so, 1);
so->so_upcall = nfsrv_rcv;
so->so_rcv.sb_flags |= SB_UPCALL;
socket_unlock(so, 1);
+ /* just playin' it safe */
+ sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
/* mark that the socket is not in the nfsrv_sockwg list */
slp->ns_wgq.tqe_next = SLPNOLIST;
/* get a pointer to the next consecutive bytes in an mbuf chain */
#define nfsm_chain_get_opaque_pointer(E, NMC, LEN, PTR) \
do { \
+ uint32_t rndlen; \
if (E) break; \
- if ((NMC)->nmc_left >= (uint32_t)(LEN)) { \
+ rndlen = nfsm_rndup(LEN); \
+ if ((NMC)->nmc_left >= rndlen) { \
(PTR) = (void*)(NMC)->nmc_ptr; \
- (NMC)->nmc_left -= nfsm_rndup(LEN); \
- (NMC)->nmc_ptr += nfsm_rndup(LEN); \
+ (NMC)->nmc_left -= rndlen; \
+ (NMC)->nmc_ptr += rndlen; \
} else { \
(E) = nfsm_chain_get_opaque_pointer_f((NMC), (LEN), (u_char**)&(PTR)); \
} \
/* copy the next consecutive bytes of opaque data from an mbuf chain */
#define nfsm_chain_get_opaque(E, NMC, LEN, PTR) \
do { \
+ uint32_t rndlen; \
if (E) break; \
- if ((NMC)->nmc_left >= (LEN)) { \
+ rndlen = nfsm_rndup(LEN); \
+ if ((NMC)->nmc_left >= rndlen) { \
u_char *__tmpptr = (u_char*)(NMC)->nmc_ptr; \
- (NMC)->nmc_left -= nfsm_rndup(LEN); \
- (NMC)->nmc_ptr += nfsm_rndup(LEN); \
+ (NMC)->nmc_left -= rndlen; \
+ (NMC)->nmc_ptr += rndlen; \
bcopy(__tmpptr, (PTR), (LEN)); \
} else { \
(E) = nfsm_chain_get_opaque_f((NMC), (LEN), (u_char*)(PTR)); \
struct user_aiocb {
int aio_fildes; /* File descriptor */
- off_t aio_offset; /* File offset */
+ off_t aio_offset __attribute((aligned(8))); /* File offset */
user_addr_t aio_buf __attribute((aligned(8))); /* Location of buffer */
user_size_t aio_nbytes; /* Length of transfer */
int aio_reqprio; /* Request priority offset */
#include <kern/lock.h>
#include <kern/locks.h>
#include <kern/thread_call.h>
+#include <kern/thread.h>
#include <machine/machine_routines.h>
#include <sys/syslog.h>
#include <sys/ucred.h>
/* pseudo-errors returned inside kernel to modify return to process */
#define ERESTART (-1) /* restart syscall */
#define EJUSTRETURN (-2) /* don't modify regs, just return */
+#define ERECYCLE (-5) /* restart lookup under heavy vnode pressure/recycling */
#endif
#endif /* _SYS_ERRNO_H_ */
*/
void cache_purgevfs(mount_t mp);
int cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp,
- vfs_context_t context, int *trailing_slash, int *dp_authorized);
+ vfs_context_t context, int *trailing_slash, int *dp_authorized, vnode_t last_dp);
void vnode_cache_authorized_action(vnode_t vp, vfs_context_t context, kauth_action_t action);
void vnode_uncache_authorized_action(vnode_t vp, kauth_action_t action);
#define RB_UNIPROC 0x80 /* don't start slaves */
#define RB_SAFEBOOT 0x100 /* booting safe */
#define RB_UPSDELAY 0x200 /* Delays restart by 5 minutes */
+#define RB_QUICK 0x400 /* quick and ungraceful reboot with file system caches flushed*/
#define RB_PANIC 0 /* reboot due to panic */
#define RB_BOOT 1 /* reboot due to boot() */
#define SO_REUSESHAREUID 0x1025 /* APPLE: Allow reuse of port/socket by different userids */
#ifdef __APPLE_API_PRIVATE
#define SO_NOTIFYCONFLICT 0x1026 /* APPLE: send notification if there is a bind on a port which is already in use */
+#define SO_UPCALLCLOSEWAIT 0x1027 /* APPLE: block on close until an upcall returns */
#endif
#define SO_LINGER_SEC 0x1080 /* linger on close if data present (in seconds) */
#define SO_RESTRICTIONS 0x1081 /* APPLE: deny inbound/outbound/both/flag set */
#ifdef __APPLE_API_PRIVATE
#define SOF_NOTIFYCONFLICT 0x400 /* notify that a bind was done on a port already in use */
#endif
+#define SOF_UPCALLCLOSEWAIT 0x800 /* block on close until an upcall returns */
int so_usecount; /* refcounting of socket use */;
int so_retaincnt;
u_int32_t so_filteruse; /* usecount for the socket filters */
/*
* Returns: 0 Success
- * ENOENT No such file or directory
+ * ERECYCLE vnode was recycled from underneath us. Force lookup to be re-driven from namei.
+ * This errno value should not be seen by anyone outside of the kernel.
*/
int
-cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, vfs_context_t ctx, int *trailing_slash, int *dp_authorized)
+cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp,
+ vfs_context_t ctx, int *trailing_slash, int *dp_authorized, vnode_t last_dp)
{
char *cp; /* pointer into pathname argument */
int vid;
kauth_cred_t ucred;
boolean_t ttl_enabled = FALSE;
struct timeval tv;
- mount_t mp;
+ mount_t mp;
unsigned int hash;
-#if CONFIG_MACF
- int error;
-#endif
+ int error = 0;
ucred = vfs_context_ucred(ctx);
*trailing_slash = 0;
error = mac_vnode_check_lookup(ctx, dp, cnp);
if (error) {
name_cache_unlock();
- return (error);
+ goto errorout;
}
}
#endif /* MAC */
dp = NULLVP;
} else {
need_dp:
- /*
+ /*
* return the last directory we looked at
- * with an io reference held
+ * with an io reference held. If it was the one passed
+ * in as a result of the last iteration of VNOP_LOOKUP,
+ * it should already hold an io ref. No need to increase ref.
*/
- if (dp == ndp->ni_usedvp) {
- /*
- * if this vnode matches the one passed in via USEDVP
- * than this context already holds an io_count... just
- * use vnode_get to get an extra ref for lookup to play
- * with... can't use the getwithvid variant here because
- * it will block behind a vnode_drain which would result
- * in a deadlock (since we already own an io_count that the
- * vnode_drain is waiting on)... vnode_get grabs the io_count
- * immediately w/o waiting... it always succeeds
- */
- vnode_get(dp);
- } else if ( (vnode_getwithvid(dp, vid)) ) {
- /*
- * failure indicates the vnode
- * changed identity or is being
- * TERMINATED... in either case
- * punt this lookup.
- *
- * don't necessarily return ENOENT, though, because
- * we really want to go back to disk and make sure it's
- * there or not if someone else is changing this
- * vnode.
- */
- return (ERESTART);
+ if (last_dp != dp){
+
+ if (dp == ndp->ni_usedvp) {
+ /*
+ * if this vnode matches the one passed in via USEDVP
+ * than this context already holds an io_count... just
+ * use vnode_get to get an extra ref for lookup to play
+ * with... can't use the getwithvid variant here because
+ * it will block behind a vnode_drain which would result
+ * in a deadlock (since we already own an io_count that the
+ * vnode_drain is waiting on)... vnode_get grabs the io_count
+ * immediately w/o waiting... it always succeeds
+ */
+ vnode_get(dp);
+ } else if ( (vnode_getwithvid(dp, vid)) ) {
+ /*
+ * failure indicates the vnode
+ * changed identity or is being
+ * TERMINATED... in either case
+ * punt this lookup.
+ *
+ * don't necessarily return ENOENT, though, because
+ * we really want to go back to disk and make sure it's
+ * there or not if someone else is changing this
+ * vnode.
+ */
+ error = ERECYCLE;
+ goto errorout;
+ }
}
}
if (vp != NULLVP) {
ndp->ni_dvp = dp;
ndp->ni_vp = vp;
- return (0);
+errorout:
+ /*
+ * If we came into cache_lookup_path after an iteration of the lookup loop that
+ * resulted in a call to VNOP_LOOKUP, then VNOP_LOOKUP returned a vnode with a io ref
+ * on it. It is now the job of cache_lookup_path to drop the ref on this vnode
+ * when it is no longer needed. If we get to this point, and last_dp is not NULL
+ * and it is ALSO not the dvp we want to return to caller of this function, it MUST be
+ * the case that we got to a subsequent path component and this previous vnode is
+ * no longer needed. We can then drop the io ref on it.
+ */
+ if ((last_dp != NULLVP) && (last_dp != ndp->ni_dvp)){
+ vnode_put(last_dp);
+ }
+
+ //initialized to 0, should be the same if no error cases occurred.
+ return error;
}
blhdr->checksum = 0;
blhdr->checksum = calc_checksum((char *)blhdr, BLHDR_CHECKSUM_SIZE);
- if (kmem_alloc(kernel_map, (vm_offset_t *)&bparray, tr->blhdr->num_blocks * sizeof(struct buf *))) {
- panic("can't allocate %lu bytes for bparray\n", tr->blhdr->num_blocks * sizeof(struct buf *));
+ if (kmem_alloc(kernel_map, (vm_offset_t *)&bparray, blhdr->num_blocks * sizeof(struct buf *))) {
+ panic("can't allocate %lu bytes for bparray\n", blhdr->num_blocks * sizeof(struct buf *));
}
// calculate individual block checksums
blhdr->binfo[i].b.bp = bparray[i];
}
- kmem_free(kernel_map, (vm_offset_t)bparray, tr->blhdr->num_blocks * sizeof(struct buf *));
+ kmem_free(kernel_map, (vm_offset_t)bparray, blhdr->num_blocks * sizeof(struct buf *));
if (ret != amt) {
printf("jnl: %s: end_transaction: only wrote %d of %d bytes to the journal!\n",
* lookup:EROFS
* lookup:EACCES
* lookup:EPERM
- * lookup:???
+ * lookup:ERECYCLE vnode was recycled from underneath us in lookup.
+ * This means we should re-drive lookup from this point.
+ * lookup: ???
* VNOP_READLINK:???
*/
int
struct filedesc *fdp; /* pointer to file descriptor state */
char *cp; /* pointer into pathname argument */
struct vnode *dp; /* the directory we are searching */
+ struct vnode *usedvp = ndp->ni_dvp; /* store pointer to vp in case we must loop due to
+ heavy vnode pressure */
+ u_long cnpflags = ndp->ni_cnd.cn_flags; /* store in case we have to restore after loop */
uio_t auio;
int error;
struct componentname *cnp = &ndp->ni_cnd;
#endif
fdp = p->p_fd;
+vnode_recycled:
+
/*
* Get a buffer for the name to be translated, and copy the
* name into the buffer.
}
cnp->cn_pnbuf = NULL;
ndp->ni_vp = NULLVP;
+ if (error == ERECYCLE){
+ /* vnode was recycled underneath us. re-drive lookup to start at
+ the beginning again, since recycling invalidated last lookup*/
+ ndp->ni_cnd.cn_flags = cnpflags;
+ ndp->ni_dvp = usedvp;
+ goto vnode_recycled;
+ }
+
return (error);
}
* ENOTDIR Not a directory
* EROFS Read-only file system [CREATE]
* EISDIR Is a directory [CREATE]
- * cache_lookup_path:ENOENT
+ * cache_lookup_path:ERECYCLE (vnode was recycled from underneath us, redrive lookup again)
* vnode_authorize:EROFS
* vnode_authorize:EACCES
* vnode_authorize:EPERM
int current_mount_generation = 0;
int vbusyflags = 0;
int nc_generation = 0;
+ vnode_t last_dp = NULLVP;
/*
* Setup: break out flag bits into variables.
dirloop:
ndp->ni_vp = NULLVP;
- if ( (error = cache_lookup_path(ndp, cnp, dp, ctx, &trailing_slash, &dp_authorized)) ) {
+ if ( (error = cache_lookup_path(ndp, cnp, dp, ctx, &trailing_slash, &dp_authorized, last_dp)) ) {
dp = NULLVP;
goto bad;
}
if (*cp == '\0')
goto emptyname;
- vnode_put(dp);
+ /*
+ * cache_lookup_path is now responsible for dropping io ref on dp
+ * when it is called again in the dirloop. This ensures we hold
+ * a ref on dp until we complete the next round of lookup.
+ */
+ last_dp = dp;
goto dirloop;
}
}
static struct klist fs_klist;
+lck_grp_t *fs_klist_lck_grp;
+lck_mtx_t *fs_klist_lock;
void
vfs_event_init(void)
{
-
klist_init(&fs_klist);
+ fs_klist_lck_grp = lck_grp_alloc_init("fs_klist", NULL);
+ fs_klist_lock = lck_mtx_alloc_init(fs_klist_lck_grp, NULL);
}
void
vfs_event_signal(__unused fsid_t *fsid, u_int32_t event, __unused intptr_t data)
{
-
+ lck_mtx_lock(fs_klist_lock);
KNOTE(&fs_klist, event);
+ lck_mtx_unlock(fs_klist_lock);
}
/*
filt_fsattach(struct knote *kn)
{
+ lck_mtx_lock(fs_klist_lock);
kn->kn_flags |= EV_CLEAR;
KNOTE_ATTACH(&fs_klist, kn);
+ lck_mtx_unlock(fs_klist_lock);
return (0);
}
static void
filt_fsdetach(struct knote *kn)
{
-
+ lck_mtx_lock(fs_klist_lock);
KNOTE_DETACH(&fs_klist, kn);
+ lck_mtx_unlock(fs_klist_lock);
}
static int
vgone(vp, flags); /* clean and reclaim the vnode */
/*
- * give the vnode a new identity so
- * that vnode_getwithvid will fail
- * on any stale cache accesses
+ * give the vnode a new identity so that vnode_getwithvid will fail
+ * on any stale cache accesses...
+ * grab the list_lock so that if we're in "new_vnode"
+ * behind the list_lock trying to steal this vnode, the v_id is stable...
+ * once new_vnode drops the list_lock, it will block trying to take
+ * the vnode lock until we release it... at that point it will evaluate
+ * whether the v_vid has changed
*/
+ vnode_list_lock();
vp->v_id++;
+ vnode_list_unlock();
+
if (isfifo) {
struct fifoinfo * fip;
lf.l_len = 0;
lf.l_type = locktype; /* F_WRLCK or F_RDLCK */
/* Note: id is just a kernel address that's not a proc */
- error = VNOP_ADVLOCK(xvp, (caddr_t)xvp, F_SETLK, &lf, F_FLOCK, context);
+ error = VNOP_ADVLOCK(xvp, (caddr_t)xvp, F_SETLK, &lf, F_FLOCK|F_WAIT, context);
return (error == ENOTSUP ? 0 : error);
}
* Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
*/
+#ifndef SECURE_KERNEL
extern int allow_stack_exec, allow_data_exec;
SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW, &allow_stack_exec, 0, "");
SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW, &allow_data_exec, 0, "");
+#endif /* !SECURE_KERNEL */
#if CONFIG_NO_PRINTF_STRINGS
void
memory_object_size_t file_size;
user_addr_t user_mappings;
struct shared_file_mapping_np *mappings;
-#define SFM_MAX_STACK 4
+#define SFM_MAX_STACK 8
struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK];
unsigned int mappings_count;
vm_size_t mappings_size;
_ubc_msync
_ubc_offtoblk
_ubc_page_op
+_ubc_pages_resident
_ubc_range_op
_ubc_setcred
_ubc_setsize
__Z17IODeviceTreeAllocPv
__Z17IOServiceOrderingPK15OSMetaClassBaseS1_Pv
__Z18IODTCompareNubNamePK15IORegistryEntryP8OSStringPS3_
-__Z19IODTMapOneInterruptP15IORegistryEntryPmPP6OSDataPPK8OSSymbol
__Z19printDictionaryKeysP12OSDictionaryPc
__Z19tellAppWithResponseP8OSObjectPv
__Z20IODTMakeNVDescriptorP15IORegistryEntryP17IONVRAMDescriptor
__Z21IODTResolveAddressingP15IORegistryEntryPKcP14IODeviceMemory
__Z22IODTResolveAddressCellP15IORegistryEntryPmS1_S1_
__Z22tellClientWithResponseP8OSObjectPv
-__Z23IODTFindInterruptParentP15IORegistryEntry
__Z23IODTFindMatchingEntriesP15IORegistryEntrymPKc
__Z24broadcast_aggressivenessP8OSObjectPvS1_S1_S1_
__Z26serializedAllowPowerChangeP8OSObjectPvS1_S1_S1_
__ZN14IOPMrootDomain23setQuickSpinDownTimeoutEv
__ZN14IOPMrootDomain24displayWranglerPublishedEPvS0_P9IOService
__ZN14IOPMrootDomain24receivePowerNotificationEm
+__ZN14IOPMrootDomain24systemPowerEventOccurredEPK8OSSymbolP8OSObject
+__ZN14IOPMrootDomain24systemPowerEventOccurredEPK8OSSymbolj
__ZN14IOPMrootDomain25announcePowerSourceChangeEv
__ZN14IOPMrootDomain26handleSleepTimerExpirationEv
__ZN14IOPMrootDomain26restoreUserSpinDownTimeoutEv
-9.1.0
+9.2.0
# The first line of this file contains the master version number for the kernel.
# All other instances of the kernel version in xnu are derived from this file.
__Z17IODeviceTreeAllocPv
__Z17IOServiceOrderingPK15OSMetaClassBaseS1_Pv
__Z18IODTCompareNubNamePK15IORegistryEntryP8OSStringPS3_
-__Z19IODTMapOneInterruptP15IORegistryEntryPmPP6OSDataPPK8OSSymbol
__Z19printDictionaryKeysP12OSDictionaryPc
__Z19tellAppWithResponseP8OSObjectPv
__Z20IODTMakeNVDescriptorP15IORegistryEntryP17IONVRAMDescriptor
__Z21IODTResolveAddressingP15IORegistryEntryPKcP14IODeviceMemory
__Z22IODTResolveAddressCellP15IORegistryEntryPmS1_S1_
__Z22tellClientWithResponseP8OSObjectPv
-__Z23IODTFindInterruptParentP15IORegistryEntry
__Z23IODTFindMatchingEntriesP15IORegistryEntrymPKc
__Z24broadcast_aggressivenessP8OSObjectPvS1_S1_S1_
__Z26serializedAllowPowerChangeP8OSObjectPvS1_S1_S1_
kInflowForciblyEnabledBit = (1 << 0)
};
+/* kIOPMMessageInternalBatteryFullyDischarged
+ * The battery has drained completely to its "Fully Discharged" state.
+ */
#define kIOPMMessageInternalBatteryFullyDischarged \
iokit_family_msg(sub_iokit_powermanagement, 0x120)
+/* kIOPMMessageSystemPowerEventOccurred
+ * Some major system thermal property has changed, and interested clients may
+ * modify their behavior.
+ */
+#define kIOPMMessageSystemPowerEventOccurred \
+ iokit_family_msg(sub_iokit_powermanagement, 0x130)
+
/*******************************************************************************
*
* Power commands issued to root domain
+ * Use with IOPMrootDomain::receivePowerNotification()
*
* These commands are issued from system drivers only:
* ApplePMU, AppleSMU, IOGraphics, AppleACPIFamily
kIOPMClamshellOpened = (1<<10) // clamshell was opened
};
+
/*******************************************************************************
*
* Power Management Return Codes
#define kIOPMPSPostDishargeWaitSecondsKey "PostDischargeWaitSeconds"
+/* CPU Power Management status keys
+ * Pass as arguments to IOPMrootDomain::systemPowerEventOccurred
+ * Or as arguments to IOPMSystemPowerEventOccurred()
+ * Or to decode the dictionary obtained from IOPMCopyCPUPowerStatus()
+ * These keys reflect restrictions placed on the CPU by the system
+ * to bring the CPU's power consumption within allowable thermal and
+ * power constraints.
+ */
+
+
+/* kIOPMGraphicsPowerLimitsKey
+ * The key representing the dictionary of graphics power limits.
+ * The dictionary contains the other kIOPMCPUPower keys & their associated
+ * values (e.g. Speed limit, Processor Count, and Schedule limits).
+ */
+#define kIOPMGraphicsPowerLimitsKey "Graphics_Power_Limits"
+
+/* kIOPMGraphicsPowerLimitPerformanceKey
+ * The key representing the percent of overall performance made available
+ * by the graphics chip as a percentage (integer 0 - 100).
+ */
+#define kIOPMGraphicsPowerLimitPerformanceKey "Graphics_Power_Performance"
+
+
+
+/* kIOPMCPUPowerLimitsKey
+ * The key representing the dictionary of CPU Power Limits.
+ * The dictionary contains the other kIOPMCPUPower keys & their associated
+ * values (e.g. Speed limit, Processor Count, and Schedule limits).
+ */
+#define kIOPMCPUPowerLimitsKey "CPU_Power_Limits"
+
+/* kIOPMCPUPowerLimitProcessorSpeedKey defines the speed & voltage limits placed
+ * on the CPU.
+ * Represented as a percentage (0-100) of maximum CPU speed.
+ */
+#define kIOPMCPUPowerLimitProcessorSpeedKey "CPU_Speed_Limit"
+
+/* kIOPMCPUPowerLimitProcessorCountKey reflects how many, if any, CPUs have been
+ * taken offline. Represented as an integer number of CPUs (0 - Max CPUs).
+ */
+#define kIOPMCPUPowerLimitProcessorCountKey "CPU_Available_CPUs"
+
+/* kIOPMCPUPowerLimitSchedulerTimeKey represents the percentage (0-100) of CPU time
+ * available. 100% at normal operation. The OS may limit this time for a percentage
+ * less than 100%.
+ */
+#define kIOPMCPUPowerLimitSchedulerTimeKey "CPU_Scheduler_Limit"
+
+
+/* Thermal Level Warning Key
+ * Indicates the thermal constraints placed on the system. This value may
+ * cause clients to action to consume fewer system resources.
+ * The value associated with this warning is defined by the platform.
+ */
+#define kIOPMThermalLevelWarningKey "Thermal_Level_Warning"
+
+/* Thermal Warning Level values
+ * kIOPMThermalWarningLevelNormal - under normal operating conditions
+ * kIOPMThermalWarningLevelDanger - thermal pressure may cause system slowdown
+ * kIOPMThermalWarningLevelCrisis - thermal conditions may cause imminent shutdown
+ *
+ * The platform may define additional thermal levels if necessary.
+ */
+enum {
+ kIOPMThermalWarningLevelNormal = 0,
+ kIOPMThermalWarningLevelDanger = 5,
+ kIOPMThermalWarningLevelCrisis = 10
+};
+
// PM Settings Controller setting types
// Settings types used primarily with:
kPCICantSleep = 0x00000004
};
+
+
/*
*IOPMrootDomain registry property keys
*/
#define kRootDomainSupportedFeatures "Supported Features"
#define kRootDomainSleepReasonKey "Last Sleep Reason"
#define kRootDomainSleepOptionsKey "Last Sleep Options"
+#define kIOPMRootDomainPowerStatusKey "Power Status"
/*
* Possible sleep reasons found under kRootDomainSleepReasonsKey
virtual IOReturn setProperties ( OSObject * );
IOReturn shutdownSystem ( void );
IOReturn restartSystem ( void );
+
+/*! @function systemPowerEventOccurred
+ @abstract Other drivers may inform IOPMrootDomain of system PM events
+ @discussion systemPowerEventOccurred is a richer alternative to receivePowerNotification()
+ Only Apple-owned kexts should have reason to call systemPowerEventOccurred.
+ @param event An OSSymbol describing the type of power event.
+ @param value A 32-bit integer value associated with the event.
+ @param shouldUpdate indicates whether the root domain should send a notification
+ to interested parties. Pass false if you're calling systemPowerEventOccurred
+ several times in succession; and pass true only on the last invocatino.
+ @result kIOReturnSuccess on success */
+ IOReturn systemPowerEventOccurred(const OSSymbol *event,
+ uint32_t intValue);
+ IOReturn systemPowerEventOccurred(const OSSymbol *event,
+ OSObject *value);
+
virtual IOReturn receivePowerNotification (UInt32 msg);
virtual void setSleepSupported( IOOptionBits flags );
virtual IOOptionBits getSleepSupported();
IOOptionBits options = _options;
vm_size_t size = _capacity;
void * buffer = _buffer;
- IOVirtualAddress source = _ranges.v64->address;
+ mach_vm_address_t source = (_ranges.v) ? _ranges.v64->address : 0;
IOMemoryMap * map = 0;
vm_offset_t alignment = _alignment;
else if (buffer)
{
if (kIOMemoryTypePhysical64 == (flags & kIOMemoryTypeMask))
- IOFreePhysical((mach_vm_address_t) source, size);
+ IOFreePhysical(source, size);
else if (options & kIOMemoryPhysicallyContiguous)
IOKernelFreeContiguous((mach_vm_address_t) buffer, size);
else if (alignment > 1)
else
fInternalState->fCheckAddressing = (fNumAddressBits && (highPage >= (1UL << (fNumAddressBits - PAGE_SHIFT))));
+ fInternalState->fNewMD = true;
mem->retain();
fMemory = mem;
if (offset >= memLength)
return kIOReturnOverrun;
- if ((offset == internalState->fPreparedOffset) || (offset != state->fOffset)) {
+ if ((offset == internalState->fPreparedOffset) || (offset != state->fOffset) || internalState->fNewMD) {
state->fOffset = 0;
state->fIOVMAddr = 0;
internalState->fNextRemapIndex = 0;
+ internalState->fNewMD = false;
state->fMapped = (IS_MAPPED(fMappingOptions) && fMapper);
mdOp = kIOMDFirstSegment;
};
return( false );
}
-IORegistryEntry * IODTFindInterruptParent( IORegistryEntry * regEntry )
+static IORegistryEntry * IODTFindInterruptParent( IORegistryEntry * regEntry, IOItemCount index )
{
IORegistryEntry * parent;
UInt32 phandle;
+ OSData * data;
+ unsigned int len;
- if( GetUInt32( regEntry, gIODTInterruptParentKey, &phandle))
- parent = FindPHandle( phandle );
+ if( (data = OSDynamicCast( OSData, regEntry->getProperty( gIODTInterruptParentKey )))
+ && (sizeof(UInt32) <= (len = data->getLength()))) {
+ if (((index + 1) * sizeof(UInt32)) > len)
+ index = 0;
+ phandle = ((UInt32 *) data->getBytesNoCopy())[index];
+ parent = FindPHandle( phandle );
- else if( 0 == regEntry->getProperty( "interrupt-controller"))
+ } else if( 0 == regEntry->getProperty( "interrupt-controller"))
parent = regEntry->getParentEntry( gIODTPlane);
else
parent = 0;
*aCellCount = 0;
}
-UInt32 IODTMapOneInterrupt( IORegistryEntry * regEntry, UInt32 * intSpec,
- OSData ** spec, const OSSymbol ** controller )
+static UInt32 IODTMapOneInterrupt( IORegistryEntry * regEntry, UInt32 * intSpec, UInt32 index,
+ OSData ** spec, const OSSymbol ** controller )
{
IORegistryEntry *parent = 0;
OSData *data;
UInt32 i, original_icells;
bool cmp, ok = false;
- parent = IODTFindInterruptParent( regEntry );
+ parent = IODTFindInterruptParent( regEntry, index );
IODTGetICellCounts( parent, &icells, &acells );
addrCmp = 0;
if( acells) {
OSData * local2;
UInt32 * localBits;
UInt32 * localEnd;
+ IOItemCount index;
OSData * map;
OSObject * oneMap;
OSArray * mapped;
OSArray * controllerInts;
- const OSSymbol * controller;
+ const OSSymbol * controller = 0;
OSArray * controllers;
UInt32 skip = 1;
bool ok, nw;
localBits = (UInt32 *) local->getBytesNoCopy();
localEnd = localBits + (local->getLength() / sizeof(UInt32));
+ index = 0;
mapped = OSArray::withCapacity( 1 );
controllers = OSArray::withCapacity( 1 );
if( ok) do {
if( nw) {
- skip = IODTMapOneInterrupt( regEntry, localBits, &map, &controller );
+ skip = IODTMapOneInterrupt( regEntry, localBits, index, &map, &controller );
if( 0 == skip) {
IOLog("%s: error mapping interrupt[%d]\n",
regEntry->getName(), mapped->getCount());
controller->retain();
}
+ index++;
localBits += skip;
mapped->setObject( map );
controllers->setObject( controller );
const OSSymbol * sym = OSSymbol::withCStringNoCopy(kIOHibernateRTCVariablesKey);
if (sym) {
- gIOOptionsEntry->removeProperty(sym);
+ if (gIOOptionsEntry->getProperty(sym)) {
+ gIOOptionsEntry->removeProperty(sym);
+ gIOOptionsEntry->sync();
+ }
sym->release();
}
}
UInt8 fCopyContig;
UInt8 fPrepared;
UInt8 fDoubleBuffer;
- UInt8 __pad[1];
+ UInt8 fNewMD;
ppnum_t fCopyPageAlloc;
ppnum_t fCopyPageCount;
IODelete(_ranges.v64, IOAddressRange, _rangesCount);
else
IODelete(_ranges.v, IOVirtualRange, _rangesCount);
+
+ _ranges.v = NULL;
}
if (reserved && reserved->devicePager)
// re-enable this timer for next sleep
idleSleepPending = false;
gSleepOrShutdownPending = 0;
+
+ // Invalidate prior activity tickles to allow wake from doze.
+ if (wrangler) wrangler->changePowerStateTo(0);
break;
case RESTART_STATE:
#endif __i386__
}
+//******************************************************************************
+// systemPowerEventOccurred
+//
+// The power controller is notifying us of a hardware-related power management
+// event that we must handle.
+//
+// systemPowerEventOccurred covers the same functionality that receivePowerNotification
+// does; it simply provides a richer API for conveying more information.
+//******************************************************************************
+IOReturn IOPMrootDomain::systemPowerEventOccurred(
+ const OSSymbol *event,
+ uint32_t intValue)
+{
+ IOReturn attempt = kIOReturnSuccess;
+ OSNumber *newNumber = NULL;
+
+ if (!event)
+ return kIOReturnBadArgument;
+
+ newNumber = OSNumber::withNumber(intValue, 8*sizeof(intValue));
+ if (!newNumber)
+ return kIOReturnInternalError;
+
+ attempt = systemPowerEventOccurred(event, (OSObject *)newNumber);
+
+ newNumber->release();
+
+ return attempt;
+}
+
+IOReturn IOPMrootDomain::systemPowerEventOccurred(
+ const OSSymbol *event,
+ OSObject *value)
+{
+ OSDictionary *thermalsDict = NULL;
+ bool shouldUpdate = true;
+
+ if (!event || !value)
+ return kIOReturnBadArgument;
+
+ // LOCK
+ // We reuse featuresDict Lock because it already exists and guards
+ // the very infrequently used publish/remove feature mechanism; so there's zero rsk
+ // of stepping on that lock.
+ if (featuresDictLock) IOLockLock(featuresDictLock);
+
+ thermalsDict = (OSDictionary *)getProperty(kIOPMRootDomainPowerStatusKey);
+
+ if (thermalsDict && OSDynamicCast(OSDictionary, thermalsDict)) {
+ thermalsDict = OSDictionary::withDictionary(thermalsDict);
+ } else {
+ thermalsDict = OSDictionary::withCapacity(1);
+ }
+
+ if (!thermalsDict) {
+ shouldUpdate = false;
+ goto exit;
+ }
+
+ thermalsDict->setObject (event, value);
+
+ setProperty (kIOPMRootDomainPowerStatusKey, thermalsDict);
+
+ thermalsDict->release();
+
+exit:
+ // UNLOCK
+ if (featuresDictLock) IOLockUnlock(featuresDictLock);
+
+ if (shouldUpdate)
+ messageClients (kIOPMMessageSystemPowerEventOccurred, (void *)NULL);
+
+ return kIOReturnSuccess;
+}
+
//******************************************************************************
// receivePowerNotification
//
// The power controller is notifying us of a hardware-related power management
-// event that we must handle. This is a result of an 'environment' interrupt from
+// event that we must handle. This may be a result of an 'environment' interrupt from
// the power mgt micro.
//******************************************************************************
#include <IOKit/system.h>
#include <libkern/c++/OSContainers.h>
+#include <libkern/crypto/sha1.h>
extern "C" {
#include <machine/machine_routines.h>
void IOPlatformExpert::registerNVRAMController(IONVRAMController * caller)
{
OSData * data;
- IORegistryEntry * nvram;
- OSString * string;
+ IORegistryEntry * entry;
+ OSString * string = 0;
+ char uuid[ 36 + 1 ];
- nvram = IORegistryEntry::fromPath( "/options", gIODTPlane );
- if ( nvram )
+ entry = IORegistryEntry::fromPath( "/efi/platform", gIODTPlane );
+ if ( entry )
{
- data = OSDynamicCast( OSData, nvram->getProperty( "platform-uuid" ) );
- if ( data && data->getLength( ) == sizeof( uuid_t ) )
+ data = OSDynamicCast( OSData, entry->getProperty( "system-id" ) );
+ if ( data && data->getLength( ) == 16 )
{
- char uuid[ 36 + 1 ];
- uuid_unparse( ( UInt8 * ) data->getBytesNoCopy( ), uuid );
+ SHA1_CTX context;
+ uint8_t digest[ SHA_DIGEST_LENGTH ];
+ const uuid_t space = { 0x2A, 0x06, 0x19, 0x90, 0xD3, 0x8D, 0x44, 0x40, 0xA1, 0x39, 0xC4, 0x97, 0x70, 0x37, 0x65, 0xAC };
+ SHA1Init( &context );
+ SHA1Update( &context, space, sizeof( space ) );
+ SHA1Update( &context, data->getBytesNoCopy( ), data->getLength( ) );
+ SHA1Final( digest, &context );
+
+ digest[ 6 ] = ( digest[ 6 ] & 0x0F ) | 0x50;
+ digest[ 8 ] = ( digest[ 8 ] & 0x3F ) | 0x80;
+
+ uuid_unparse( digest, uuid );
string = OSString::withCString( uuid );
- if ( string )
- {
- getProvider( )->setProperty( kIOPlatformUUIDKey, string );
- publishResource( kIOPlatformUUIDKey, string );
+ }
- string->release( );
+ entry->release( );
+ }
+
+ if ( string == 0 )
+ {
+ entry = IORegistryEntry::fromPath( "/options", gIODTPlane );
+ if ( entry )
+ {
+ data = OSDynamicCast( OSData, entry->getProperty( "platform-uuid" ) );
+ if ( data && data->getLength( ) == sizeof( uuid_t ) )
+ {
+ uuid_unparse( ( uint8_t * ) data->getBytesNoCopy( ), uuid );
+ string = OSString::withCString( uuid );
}
+
+ entry->release( );
}
+ }
+
+ if ( string )
+ {
+ getProvider( )->setProperty( kIOPlatformUUIDKey, string );
+ publishResource( kIOPlatformUUIDKey, string );
- nvram->release( );
+ string->release( );
}
publishResource("IONVRAM");
object = dictionary->getObject( kIOPlatformUUIDKey );
if ( object )
{
- IORegistryEntry * nvram;
+ IORegistryEntry * entry;
OSString * string;
uuid_t uuid;
status = uuid_parse( string->getCStringNoCopy( ), uuid );
if ( status != 0 ) return kIOReturnBadArgument;
- nvram = IORegistryEntry::fromPath( "/options", gIODTPlane );
- if ( nvram )
+ entry = IORegistryEntry::fromPath( "/options", gIODTPlane );
+ if ( entry )
{
- nvram->setProperty( "platform-uuid", uuid, sizeof( uuid_t ) );
- nvram->release( );
+ entry->setProperty( "platform-uuid", uuid, sizeof( uuid_t ) );
+ entry->release( );
}
setProperty( kIOPlatformUUIDKey, string );
extern dev_t mdevadd(int devid, ppnum_t base, unsigned int size, int phys);
extern dev_t mdevlookup(int devid);
+extern void mdevremoveall(void);
kern_return_t
IOKitBSDInit( void )
void IOSecureBSDRoot(const char * rootName)
{
#if CONFIG_EMBEDDED
+ IOReturn result;
IOPlatformExpert *pe;
- const OSSymbol *functionName = OSSymbol::withCStringNoCopy("SecureRootName");
+ const OSSymbol *functionName = OSSymbol::withCStringNoCopy("SecureRootName");
while ((pe = IOService::getPlatform()) == 0) IOSleep(1 * 1000);
- pe->callPlatformFunction(functionName, false, (void *)rootName, (void *)0, (void *)0, (void *)0);
+ // Returns kIOReturnNotPrivileged is the root device is not secure.
+ // Returns kIOReturnUnsupported if "SecureRootName" is not implemented.
+ result = pe->callPlatformFunction(functionName, false, (void *)rootName, (void *)0, (void *)0, (void *)0);
functionName->release();
+
+ if (result == kIOReturnNotPrivileged) mdevremoveall();
#endif
}
options KDEBUG # kernel tracing # <kdebug>
options NETWORKING # kernel networking # <networking>
options CRYPTO # want crypto code # <crypto>
-options KPIDIRECT # direct access # <kpidirect>
options CONFIG_DTRACE # enable dtrace # <config_dtrace>
#makeoptions LIBDRIVER = "libDriver_kern.o" # <libdriver>
OPTIONS/iokitcpp optional iokitcpp
OPTIONS/kdebug optional kdebug
OPTIONS/networking optional networking
-OPTIONS/kpidirect optional kpidirect
OPTIONS/hibernation optional hibernation
OPTIONS/crypto optional crypto
OPTIONS/config_dtrace optional config_dtrace
Syntax: showMCAstate
| Print machine-check register state after MC exception.
end
+
+define _pt_step
+ #
+ # Step to lower-level page table and print attributes
+ # $kgm_pt_paddr: current page table entry physical address
+ # $kgm_pt_index: current page table entry index (0..511)
+ # returns
+ # $kgm_pt_paddr: next level page table entry physical address
+ # or null if invalid
+ # For $kgm_pt_verbose = 0: print nothing
+ # 1: print basic information
+ # 2: print basic information and hex table dump
+ # The trickery with kdp_src_high32 is required for accesses above 4GB.
+ #
+ set $kgm_entryp = $kgm_pt_paddr + 8*$kgm_pt_index
+ set kdp_src_high32 = $kgm_pt_paddr >> 32
+ set kdp_trans_off = 1
+ set $entry = *(pt_entry_t *)($kgm_entryp & 0x0ffffffffULL)
+ if $kgm_pt_verbose == 2
+ x/512g ($kgm_pt_paddr & 0x0ffffffffULL)
+ end
+ set kdp_trans_off = 0
+ set kdp_src_high32 = 0
+ set $kgm_paddr_mask = ~((0xffffULL<<48) | 0xfffULL)
+ if $kgm_pt_verbose == 0
+ if $entry & (0x1 << 0)
+ set $kgm_pt_paddr = $entry & $kgm_paddr_mask
+ else
+ set $kgm_pt_paddr = 0
+ end
+ else
+ printf "0x%016llx:\n\t0x%016llx\n\t", $kgm_entryp, $entry
+ if $entry & (0x1 << 0)
+ printf "valid"
+ set $kgm_pt_paddr = $entry & $kgm_paddr_mask
+ else
+ printf "invalid"
+ set $kgm_pt_paddr = 0
+ end
+ if $entry & (0x1 << 1)
+ printf " writeable"
+ else
+ printf " read-only"
+ end
+ if $entry & (0x1 << 2)
+ printf " user"
+ else
+ printf " supervisor"
+ end
+ if $entry & (0x1 << 3)
+ printf " PWT"
+ end
+ if $entry & (0x1 << 4)
+ printf " PCD"
+ end
+ if $entry & (0x1 << 5)
+ printf " accessed"
+ end
+ if $entry & (0x1 << 6)
+ printf " dirty"
+ end
+ if $entry & (0x1 << 7)
+ printf " PAT"
+ end
+ if $entry & (0x1 << 8)
+ printf " global"
+ end
+ if $entry & (0x3 << 9)
+ printf " avail:0x%x", ($entry >> 9) & 0x3
+ end
+ if $entry & (0x1 << 63)
+ printf " noexec"
+ end
+ printf "\n"
+ end
+end
+
+define _pmap_walk
+ set $kgm_pmap = (pmap_t) $arg0
+ set $kgm_vaddr = $arg1
+ set $kgm_pt_paddr = $kgm_pmap->pm_cr3
+ if $kgm_pt_paddr && cpu_64bit
+ set $kgm_pt_index = ($kgm_vaddr >> 39) & 0x1ffULL
+ if $kgm_pt_verbose
+ printf "pml4 (index %d):\n", $kgm_pt_index
+ end
+ _pt_step
+ end
+ if $kgm_pt_paddr
+ set $kgm_pt_index = ($kgm_vaddr >> 30) & 0x1ffULL
+ if $kgm_pt_verbose
+ printf "pdpt (index %d):\n", $kgm_pt_index
+ end
+ _pt_step
+ end
+ if $kgm_pt_paddr
+ set $kgm_pt_index = ($kgm_vaddr >> 21) & 0x1ffULL
+ if $kgm_pt_verbose
+ printf "pdt (index %d):\n", $kgm_pt_index
+ end
+ _pt_step
+ end
+ if $kgm_pt_paddr
+ set $kgm_pt_index = ($kgm_vaddr >> 12) & 0x1ffULL
+ if $kgm_pt_verbose
+ printf "pt (index %d):\n", $kgm_pt_index
+ end
+ _pt_step
+ end
+ if $kgm_pt_paddr
+ set $kgm_paddr = $kgm_pt_paddr + ($kgm_vaddr & 0xfffULL)
+ set kdp_trans_off = 1
+ set kdp_src_high32 = $kgm_paddr >> 32
+ set $kgm_value = *($kgm_paddr & 0x0ffffffffULL)
+ set kdp_trans_off = 0
+ set kdp_src_high32 = 0
+ printf "phys 0x%016llx: 0x%08x\n", $kgm_paddr, $kgm_value
+ else
+ set $kgm_paddr = 0
+ printf "(no translation)\n"
+ end
+end
+
+define pmap_walk
+ if $kgm_mtype != 7
+ printf "Not available for current architecture.\n"
+ else
+ if $argc != 2
+ printf "pmap_walk <pmap> <vaddr>\n"
+ else
+ if !$kgm_pt_verbose
+ set $kgm_pt_verbose = 1
+ else
+ if $kgm_pt_verbose != 2
+ set $kgm_pt_verbose = 1
+ end
+ end
+ _pmap_walk $arg0 $arg1
+ end
+ end
+end
+
+document pmap_walk
+Syntax: (gdb) pmap_walk <pmap> <virtual_address>
+| Perform a page-table walk in <pmap> for <virtual_address>.
+| Set $kgm_pt_verbose=2 for full hex dump of page tables.
+end
+
+define pmap_vtop
+ if $kgm_mtype != 7
+ printf "Not available for current architecture.\n"
+ else
+ if $argc != 2
+ printf "pmap_vtop <pamp> <vaddr>\n"
+ else
+ set $kgm_pt_verbose = 0
+ _pmap_walk $arg0 $arg1
+ end
+ end
+end
+
+document pmap_vtop
+Syntax: (gdb) pmap_vtop <pmap> <virtual_address>
+| For page-tables in <pmap> translate <virtual_address> to physical address.
+end
+
# add version string
SRCS += libsyscall_version.c
libsyscall_version.c:
- /Developer/Makefiles/bin/version.pl Libsyscall > $@
+ ${NEXT_ROOT}/Developer/Makefiles/bin/version.pl Libsyscall > $@
CFLAGS += -I${SYMROOT}
.include "${.CURDIR}/Makefile.inc"
KERNELFRAMEWORK = ${DESTDIR}/System/Library/Frameworks/Kernel.framework
PRIVKERNELHDRS = ${KERNELFRAMEWORK}/Versions/A/PrivateHeaders
+.if ${MACHINE_ARCH} == armv6
+ARCHDIR = arm
+.else
+ARCHDIR = ${MACHINE_ARCH}
+.endif
+
installhdrs-md: gen_md_mig_defs
- mkdir -p ${INCDIR}/mach/${MACHINE_ARCH}
- ${INSTALL} -o 0 -c -m 444 ${MD_MIGHDRS} ${INCDIR}/mach/${MACHINE_ARCH}
+ mkdir -p ${INCDIR}/mach/${ARCHDIR}
+ ${INSTALL} -o 0 -c -m 444 ${MD_MIGHDRS} ${INCDIR}/mach/${ARCHDIR}
mkdir -p ${PRIVHDRSPPC}
${INSTALL} -c -m 444 ${PRIVHDRSPPCHDRS} ${PRIVHDRSPPC}
##########################################################################
# Make a __xxx.s file: if it exists in the $CustomDir, just copy it, otherwise
-# create one. We define the macro __SYSCALL_I386_ARG_BYTES so that SYS.h could
+# create one. We define the macro __SYSCALL_32BIT_ARG_BYTES so that SYS.h could
# use that to define __SYSCALL dependent on the arguments' total size.
##########################################################################
sub make_s {
} else {
my $f = IO::File->new($path, 'w');
die "$MyName: $path: $!\n" unless defined($f);
- print $f "#define __SYSCALL_I386_ARG_BYTES $bytes\n\n";
+ print $f "#define __SYSCALL_32BIT_ARG_BYTES $bytes\n\n";
print $f "#include \"SYS.h\"\n\n";
print $f "__SYSCALL($pseudo, $name, $args)\n";
print "Creating $path\n";
BRANCH_EXTERN(cerror) ;\
2:
-#if defined(__SYSCALL_I386_ARG_BYTES) && ((__SYSCALL_I386_ARG_BYTES >= 4) && (__SYSCALL_I386_ARG_BYTES <= 20))
+#if defined(__SYSCALL_32BIT_ARG_BYTES) && ((__SYSCALL_32BIT_ARG_BYTES >= 4) && (__SYSCALL_32BIT_ARG_BYTES <= 20))
#define UNIX_SYSCALL_NONAME(name, nargs) \
- movl $(SYS_##name | (__SYSCALL_I386_ARG_BYTES << I386_SYSCALL_ARG_BYTES_SHIFT)), %eax ;\
+ movl $(SYS_##name | (__SYSCALL_32BIT_ARG_BYTES << I386_SYSCALL_ARG_BYTES_SHIFT)), %eax ;\
UNIX_SYSCALL_SYSENTER ;\
jnb 2f ;\
BRANCH_EXTERN(cerror) ;\
2:
-#else /* __SYSCALL_I386_ARG_BYTES < 4 || > 20 */
+#else /* __SYSCALL_32BIT_ARG_BYTES < 4 || > 20 */
#define UNIX_SYSCALL_NONAME(name, nargs) \
.globl cerror ;\
movl $ SYS_##name, %eax ;\
# machine-dependent mach sources
-.if exists(${.CURDIR}/mach/${MACHINE_ARCH}/Makefile.inc)
-.include "${.CURDIR}/mach/${MACHINE_ARCH}/Makefile.inc"
+.if ${MACHINE_ARCH} == armv6
+ARCHDIR = arm
+.else
+ARCHDIR = ${MACHINE_ARCH}
+.endif
+.if exists(${.CURDIR}/mach/${ARCHDIR}/Makefile.inc)
+.include "${.CURDIR}/mach/${ARCHDIR}/Makefile.inc"
.endif
.PATH: ${.CURDIR}/mach
Are you sure? To specify multiple configurations please use KERNEL_CONFIGS)
endif
-#
-# Machine Configuration options
-#
-# ppc supported configurations : none
-# i386 supported configurations : none
-# arm supported configurations : LN2410SBC MX31ADS INTEGRATORCP S5I3000SMDK S5L8900XFPGA S5L8900XRB OLOCREEK
-#
-ifndef SUPPORTED_MACHINE_CONFIGS
-export SUPPORTED_MACHINE_CONFIGS = LN2410SBC MX31ADS INTEGRATORCP S5I3000SMDK S5L8900XFPGA S5L8900XRB OLOCREEK DEFAULT
-endif
-
-export DEFAULT_ARM_MACHINE_CONFIG = S5L8900XRB
-
ifndef MACHINE_CONFIG
export MACHINE_CONFIG = DEFAULT
endif
-ifndef MACHINE_FLAGS_LN2410SBC
-export MACHINE_FLAGS_LN2410SBC = -DARM_BOARD_CONFIG_LN2410_920T
-endif
-ifndef MACHINE_FLAGS_MX31ADS
-export MACHINE_FLAGS_MX31ADS = -DARM_BOARD_CONFIG_MX31ADS_1136JFS
-endif
-ifndef MACHINE_FLAGS_INTEGRATORCP
-export MACHINE_FLAGS_INTEGRATORCP = -DARM_BOARD_CONFIG_INTEGRATORCP_1136JFS
-endif
-ifndef MACHINE_FLAGS_S5I3000SMDK
-export MACHINE_FLAGS_S5I3000SMDK = -DARM_BOARD_CONFIG_S5I3000SMDK_1176JZFS
-endif
-ifndef MACHINE_FLAGS_S5L8900XFPGA
-export MACHINE_FLAGS_S5L8900XFPGA = -DARM_BOARD_CONFIG_S5L8900XFPGA_1136JFS
-endif
-ifndef MACHINE_FLAGS_S5L8900XRB
-export MACHINE_FLAGS_S5L8900XRB = -DARM_BOARD_CONFIG_S5L8900XRB
-endif
-ifndef MACHINE_FLAGS_OLOCREEK
-export MACHINE_FLAGS_OLOCREEK = -DARM_BOARD_CONFIG_OLOCREEK
-endif
-ifndef MACHINE_FLAGS_DEFAULT
-export MACHINE_FLAGS_DEFAULT =
-endif
#
# Target configuration options. NOTE - target configurations will
ARCH_FLAGS_I386 = -arch i386
ARCH_FLAGS_ARM = $($(addsuffix $(MACHINE_CONFIG),ARCH_FLAGS_ARM_))
-ARCH_FLAGS_ARM_LN2410SBC = -arch arm
-ARCH_FLAGS_ARM_MX31ADS = -arch armv6
-ARCH_FLAGS_ARM_INTEGRATORCP = -arch armv6
-ARCH_FLAGS_ARM_S5I3000SMDK = -arch armv6
-ARCH_FLAGS_ARM_S5L8900XFPGA = -arch armv6
-ARCH_FLAGS_ARM_S5L8900XRB = -arch armv6
-ARCH_FLAGS_ARM_OLOCREEK = -arch arm
#
# Default CFLAGS
ifeq (-arch armv6,$(ARCH_FLAGS_ARM))
CFLAGS_ARM += -mthumb
endif
+ifeq (-arch armv5,$(ARCH_FLAGS_ARM))
+CFLAGS_ARM += -mthumb
+endif
+ifeq (-arch xscale,$(ARCH_FLAGS_ARM))
+CFLAGS_ARM += -mthumb
+endif
export CFLAGS_RELEASEPPC = -O2 -mcpu=750 -mmultiple
export CFLAGS_RELEASE_TRACEPPC = -O2 -mcpu=750 -mmultiple
options X86_64
options DISPATCH_COUNTS
+#
+# Note: MAC options must be set in all the bsd/conf, osfmk/conf, and
+# security/conf MASTER files.
+#
options CONFIG_MACF # Mandatory Access Control Framework
#options CONFIG_MACF_MACH # MACF applied to Mach services
options DISPATCH_COUNTS
+#
+# Note: MAC options must be set in all the bsd/conf, osfmk/conf, and
+# security/conf MASTER files.
+#
options CONFIG_MACF # Mandatory Access Control Framework
#options CONFIG_MACF_MACH # MACF applied to Mach services
/* For use with the MP rendezvous mechanism
*/
+#if !CONFIG_EMBEDDED
static void
machine_halt_cpu(__unused void *arg) {
panic_io_port_read();
pmCPUHalt(PM_HALT_DEBUG);
}
+#endif
void
Debugger(
acpi_sleep_cpu(func, refcon);
#endif
- /* reset UART if kprintf is enabled */
+ /* Reset UART if kprintf is enabled.
+ * However kprintf should not be used before rtc_sleep_wakeup()
+ * for compatibility with firewire kprintf.
+ */
+
if (FALSE == disable_serial_output)
serial_init();
DBG(" CVT: HPET to BUS = %08X.%08X\n",
(uint32_t)(hpet2bus >> 32), (uint32_t)hpet2bus);
- /* Make sure the counter is off in the HPET configuration flags */
- uint64_t hpetcon = ((hpetReg_t *)hpetArea)->GEN_CONF;
- hpetcon = hpetcon & ~1;
- ((hpetReg_t *)hpetArea)->GEN_CONF = hpetcon;
-
- /*
- * Convert current TSC to HPET value,
- * set it, and start it ticking.
- */
- uint64_t currtsc = rdtsc64();
- uint64_t tscInHPET = tmrCvt(currtsc, tsc2hpet);
- ((hpetReg_t *)hpetArea)->MAIN_CNT = tscInHPET;
- hpetcon = hpetcon | 1;
- ((hpetReg_t *)hpetArea)->GEN_CONF = hpetcon;
- kprintf("HPET started: TSC = %08X.%08X, HPET = %08X.%08X\n",
- (uint32_t)(currtsc >> 32), (uint32_t)currtsc,
- (uint32_t)(tscInHPET >> 32), (uint32_t)tscInHPET);
-
#if MACH_KDB
db_display_hpet((hpetReg_t *)hpetArea); /* (BRINGUP) */
#endif
info->hpet2tsc = hpet2tsc;
info->bus2hpet = bus2hpet;
info->hpet2bus = hpet2bus;
- info->rcbaArea = rcbaArea;
- info->rcbaAreap = rcbaAreap;
+ /*
+ * XXX
+ * We're repurposing the rcbaArea so we can use the HPET.
+ * Eventually we'll rename this correctly.
+ */
+ info->rcbaArea = hpetArea;
+ info->rcbaAreap = hpetAreap;
}
{
ia32_mcg_status_t status;
- mca_exception_taken = TRUE;
mca_save_state();
- /* Serialize in case of multiple simultaneous machine-checks */
+ /*
+ * Serialize in case of multiple simultaneous machine-checks.
+ * Only the first caller is allowed to print MCA registers.
+ */
simple_lock(&mca_lock);
+ if (mca_exception_taken) {
+ simple_unlock(&mca_lock);
+ return;
+ }
+ mca_exception_taken = TRUE;
/*
* Report machine-check capabilities:
extern void rtc_clock_stepped(
uint32_t new_frequency,
uint32_t old_frequency);
-extern void rtc_clock_napped(uint64_t);
+extern void rtc_clock_napped(uint64_t, uint64_t);
extern void x86_lowmem_free(void);
*/
wrmsr64(MSR_IA32_KERNEL_GS_BASE,
UBER64((unsigned long)current_cpu_datap()));
+
+#if ONLY_SAFE_FOR_LINDA_SERIAL
kprintf("fast_syscall_init64() KERNEL_GS_BASE=0x%016llx\n",
rdmsr64(MSR_IA32_KERNEL_GS_BASE));
+#endif
}
/*
ml_load_desc64();
+#if ONLY_SAFE_FOR_LINDA_SERIAL
kprintf("64-bit descriptor tables loaded\n");
+#endif
}
void
#define iswired(pte) ((pte) & INTEL_PTE_WIRED)
int nx_enabled = 1; /* enable no-execute protection */
+#ifdef CONFIG_EMBEDDED
+int allow_data_exec = 0; /* no exec from data, embedded is hardcore like that */
+#else
int allow_data_exec = VM_ABI_32; /* 32-bit apps may execute data by default, 64-bit apps may not */
+#endif
int allow_stack_exec = 0; /* No apps may execute from the stack by default */
int cpu_64bit = 0;
rtc_nanotime_t rtc_nanotime_info = {0,0,0,0,1,0};
+/*
+ * tsc_to_nanoseconds:
+ *
+ * Basic routine to convert a raw 64 bit TSC value to a
+ * 64 bit nanosecond value. The conversion is implemented
+ * based on the scale factor and an implicit 32 bit shift.
+ */
+static inline uint64_t
+_tsc_to_nanoseconds(uint64_t value)
+{
+ asm volatile("movl %%edx,%%esi ;"
+ "mull %%ecx ;"
+ "movl %%edx,%%edi ;"
+ "movl %%esi,%%eax ;"
+ "mull %%ecx ;"
+ "addl %%edi,%%eax ;"
+ "adcl $0,%%edx "
+ : "+A" (value) : "c" (rtc_nanotime_info.scale) : "esi", "edi");
+
+ return (value);
+}
+
static uint32_t
deadline_to_decrementer(
uint64_t deadline,
/*
* rtc_clock_napped:
*
- * Invoked from power manangement when we have awoken from a nap (C3/C4)
- * during which the TSC lost counts. The nanotime data is updated according
- * to the provided value which indicates the number of nanoseconds that the
- * TSC was not counting.
- *
- * The caller must guarantee non-reentrancy.
+ * Invoked from power management when we exit from a low C-State (>= C4)
+ * and the TSC has stopped counting. The nanotime data is updated according
+ * to the provided value which represents the new value for nanotime.
*/
void
-rtc_clock_napped(
- uint64_t delta)
+rtc_clock_napped(uint64_t base, uint64_t tsc_base)
{
rtc_nanotime_t *rntp = &rtc_nanotime_info;
- uint32_t generation;
+ uint64_t oldnsecs;
+ uint64_t newnsecs;
+ uint64_t tsc;
assert(!ml_get_interrupts_enabled());
- generation = rntp->generation;
- rntp->generation = 0;
- rntp->ns_base += delta;
- rntp->generation = ((generation + 1) != 0) ? (generation + 1) : 1;
- rtc_nanotime_set_commpage(rntp);
+ tsc = rdtsc64();
+ oldnsecs = rntp->ns_base + _tsc_to_nanoseconds(tsc - rntp->tsc_base);
+ newnsecs = base + _tsc_to_nanoseconds(tsc - tsc_base);
+
+ /*
+ * Only update the base values if time using the new base values
+ * is later than the time using the old base values.
+ */
+ if (oldnsecs < newnsecs) {
+ _rtc_nanotime_store(tsc_base, base, rntp->scale, rntp->shift, rntp);
+ rtc_nanotime_set_commpage(rntp);
+ }
}
void
: "i" (CR0_PG)
: "eax" );
+#if ONLY_SAFE_FOR_LINDA_SERIAL
kprintf("cpu_IA32e_enable(%p)\n", cdp);
+#endif
if ((rdmsr64(MSR_IA32_EFER) & MSR_IA32_EFER_LMA) == 0)
panic("cpu_IA32e_enable() MSR_IA32_EFER_LMA not asserted");
#include <i386/eflags.h>
/*
- * i386_saved_state:
+ * x86_saved_state32/64:
*
* Has been exported to servers. See: mach/i386/thread_status.h
*
* Get the TSC increment. The TSC is incremented by this
* on every bus tick. Calculate the TSC conversion factors
* to and from nano-seconds.
+ * The tsc granularity is also called the "bus ratio". If the N/2 bit
+ * is set this indicates the bus ration is 0.5 more than this - i.e.
+ * that the true bus ratio is (2*tscGranularity + 1)/2.
*/
if (cpuid_info()->cpuid_family == CPU_FAMILY_PENTIUM_M) {
uint64_t prfsts;
prfsts = rdmsr64(IA32_PERF_STS);
tscGranularity = (uint32_t)bitfield(prfsts, 44, 40);
- N_by_2_bus_ratio = prfsts & bit(46);
+ N_by_2_bus_ratio = (prfsts & bit(46)) != 0;
} else {
panic("rtclock_init: unknown CPU family: 0x%X\n",
}
if (N_by_2_bus_ratio)
- tscFCvtt2n = busFCvtt2n * 2 / (uint64_t)tscGranularity;
+ tscFCvtt2n = busFCvtt2n * 2 / (1 + 2*tscGranularity);
else
- tscFCvtt2n = busFCvtt2n / (uint64_t)tscGranularity;
+ tscFCvtt2n = busFCvtt2n / tscGranularity;
tscFreq = ((1 * Giga) << 32) / tscFCvtt2n;
tscFCvtn2t = 0xFFFFFFFFFFFFFFFFULL / tscFCvtt2n;
kprintf(" TSC: Frequency = %6d.%04dMHz, "
- "cvtt2n = %08X.%08X, cvtn2t = %08X.%08X, gran = %lld\n",
+ "cvtt2n = %08X.%08X, cvtn2t = %08X.%08X, gran = %lld%s\n",
(uint32_t)(tscFreq / Mega),
(uint32_t)(tscFreq % Mega),
(uint32_t)(tscFCvtt2n >> 32), (uint32_t)tscFCvtt2n,
(uint32_t)(tscFCvtn2t >> 32), (uint32_t)tscFCvtn2t,
- tscGranularity);
+ tscGranularity, N_by_2_bus_ratio ? " (N/2)" : "");
/*
* Calculate conversion from BUS to TSC
extern void etimer_resync_deadlines(void);
-extern uint32_t rtclock_tick_interval;
-
#if 0 /* this is currently still MD */
#pragma pack(push,4)
struct rtclock_timer_t {
priority = self->sched_pri;
if (priority < self->priority)
priority = self->priority;
- if (priority > MINPRI_KERNEL)
- priority = MINPRI_KERNEL;
- else
if (priority < BASEPRI_DEFAULT)
priority = BASEPRI_DEFAULT;
thread_lock(holder);
if (mutex->lck_mtx_pri == 0)
holder->promotions++;
- if (holder->priority < MINPRI_KERNEL) {
- holder->sched_mode |= TH_MODE_PROMOTED;
- if ( mutex->lck_mtx_pri < priority &&
+ holder->sched_mode |= TH_MODE_PROMOTED;
+ if ( mutex->lck_mtx_pri < priority &&
holder->sched_pri < priority ) {
- KERNEL_DEBUG_CONSTANT(
- MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
+ KERNEL_DEBUG_CONSTANT(
+ MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
holder->sched_pri, priority, (int)holder, (int)lck, 0);
- set_sched_pri(holder, priority);
- }
+ set_sched_pri(holder, priority);
}
thread_unlock(holder);
splx(s);
thread_lock(thread);
thread->promotions++;
- if (thread->priority < MINPRI_KERNEL) {
- thread->sched_mode |= TH_MODE_PROMOTED;
- if (thread->sched_pri < priority) {
- KERNEL_DEBUG_CONSTANT(
- MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
+ thread->sched_mode |= TH_MODE_PROMOTED;
+ if (thread->sched_pri < priority) {
+ KERNEL_DEBUG_CONSTANT(
+ MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
thread->sched_pri, priority, 0, (int)lck, 0);
- set_sched_pri(thread, priority);
- }
+ set_sched_pri(thread, priority);
}
thread_unlock(thread);
splx(s);
#endif
{
processor_t processor = current_processor();
-#if !GPROF
thread_t thread = current_thread();
-#endif
timer_t state;
if (usermode) {
state = &PROCESSOR_DATA(processor, user_state);
}
else {
- TIMER_BUMP(&thread->system_timer, ticks);
-
+ /* If this thread is idling, do not charge that time as system time */
+ if ((thread->state & TH_IDLE) == 0) {
+ TIMER_BUMP(&thread->system_timer, ticks);
+ }
+
if (processor->state == PROCESSOR_IDLE)
state = &PROCESSOR_DATA(processor, idle_state);
else
/*
* Check for fail-safe trip.
*/
- if (!(thread->sched_mode & TH_MODE_TIMESHARE)) {
+ if (!(thread->sched_mode & (TH_MODE_TIMESHARE|TH_MODE_PROMOTED))) {
uint64_t new_computation;
new_computation = processor->quantum_end;
thread->safe_release = sched_tick + sched_safe_duration;
thread->sched_mode |= (TH_MODE_FAILSAFE|TH_MODE_TIMESHARE);
- thread->sched_mode &= ~TH_MODE_PREEMPT;
}
}
void wait_queues_init(void) __attribute__((section("__TEXT, initcode")));
static void load_shift_init(void) __attribute__((section("__TEXT, initcode")));
+static void preempt_pri_init(void) __attribute__((section("__TEXT, initcode")));
static thread_t thread_select_idle(
thread_t thread,
#endif /*DEBUG*/
-
-
/*
* State machine
*
((((int)(event) < 0)? ~(int)(event): (int)(event)) % NUMQUEUES)
int8_t sched_load_shifts[NRQS];
+int sched_preempt_pri[NRQBM];
void
sched_init(void)
wait_queues_init();
load_shift_init();
+ preempt_pri_init();
simple_lock_init(&rt_lock, 0);
run_queue_init(&rt_runq);
sched_tick = 0;
/* scheduler tick interval */
clock_interval_to_absolutetime_interval(USEC_PER_SEC >> SCHED_TICK_SHIFT,
NSEC_PER_USEC, &abstime);
- assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
sched_tick_interval = abstime;
+#if DEBUG
+ printf("Quantum: %d. Smallest quantum: %d. Min Rt/Max Rt: %d/%d."
+ " Tick: %d.\n",
+ std_quantum, min_std_quantum, min_rt_quantum, max_rt_quantum,
+ sched_tick_interval);
+#endif
+
/*
* Compute conversion factor from usage to
* timesharing priorities with 5/8 ** n aging.
}
}
+static void
+preempt_pri_init(void)
+{
+ int i, *p = sched_preempt_pri;
+
+ for (i = BASEPRI_FOREGROUND + 1; i < MINPRI_KERNEL; ++i)
+ setbit(i, p);
+
+ for (i = BASEPRI_PREEMPT; i <= MAXPRI; ++i)
+ setbit(i, p);
+}
+
/*
* Thread wait timer expiration.
*/
((queue_entry_t)thread)->next->prev = q;
q->next = ((queue_entry_t)thread)->next;
thread->runq = PROCESSOR_NULL;
- assert(thread->sched_mode & TH_MODE_PREEMPT);
runq->count--; runq->urgency--;
+ assert(runq->urgency >= 0);
if (queue_empty(q)) {
if (runq->highq != IDLEPRI)
clrbit(MAXPRI - runq->highq, runq->bitmap);
thread->runq = PROCESSOR_NULL;
rq->count--;
- if (thread->sched_mode & TH_MODE_PREEMPT)
- rq->urgency--;
+ if (testbit(rq->highq, sched_preempt_pri)) {
+ rq->urgency--; assert(rq->urgency >= 0);
+ }
if (queue_empty(queue)) {
if (rq->highq != IDLEPRI)
clrbit(MAXPRI - rq->highq, rq->bitmap);
}
thread->runq = RT_RUNQ;
- assert(thread->sched_mode & TH_MODE_PREEMPT);
rq->count++; rq->urgency++;
simple_unlock(&rt_lock);
enqueue_head(queue, (queue_entry_t)thread);
thread->runq = processor;
- if (thread->sched_mode & TH_MODE_PREEMPT)
+ if (testbit(thread->sched_pri, sched_preempt_pri))
rq->urgency++;
rq->count++;
/*
* Set preemption mode.
*/
- if (thread->sched_mode & TH_MODE_PREEMPT)
+ if (testbit(thread->sched_pri, sched_preempt_pri))
preempt = (AST_PREEMPT | AST_URGENT);
else
if (thread->sched_mode & TH_MODE_TIMESHARE && thread->priority < BASEPRI_BACKGROUND)
thread->runq = PROCESSOR_NULL;
rq->count--;
- if (thread->sched_mode & TH_MODE_PREEMPT)
- rq->urgency--;
+ if (testbit(pri, sched_preempt_pri)) {
+ rq->urgency--; assert(rq->urgency >= 0);
+ }
if (queue_empty(queue)) {
if (pri != IDLEPRI)
clrbit(MAXPRI - pri, rq->bitmap);
{
boolean_t removed = run_queue_remove(thread);
- if ( !(thread->sched_mode & TH_MODE_TIMESHARE) &&
- (priority >= BASEPRI_PREEMPT ||
- (thread->task_priority < MINPRI_KERNEL &&
- thread->task_priority >= BASEPRI_BACKGROUND &&
- priority > thread->task_priority) ) )
- thread->sched_mode |= TH_MODE_PREEMPT;
- else
- thread->sched_mode &= ~TH_MODE_PREEMPT;
-
thread->sched_pri = priority;
if (removed)
thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
*/
remqueue(&rq->queues[0], (queue_entry_t)thread);
rq->count--;
- if (thread->sched_mode & TH_MODE_PREEMPT)
- rq->urgency--;
- assert(rq->urgency >= 0);
+ if (testbit(thread->sched_pri, sched_preempt_pri)) {
+ rq->urgency--; assert(rq->urgency >= 0);
+ }
if (queue_empty(rq->queues + thread->sched_pri)) {
/* update run queue status */
thread->runq = PROCESSOR_NULL;
rq->count--;
- if (thread->sched_mode & TH_MODE_PREEMPT)
- rq->urgency--;
+ if (testbit(pri, sched_preempt_pri)) {
+ rq->urgency--; assert(rq->urgency >= 0);
+ }
if (queue_empty(queue)) {
if (pri != IDLEPRI)
clrbit(MAXPRI - pri, rq->bitmap);
break;
}
- KERNEL_DEBUG_CONSTANT(
- MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (int)thread, 0, 0, 0, 0);
-
timer_switch(&PROCESSOR_DATA(processor, idle_state),
mach_absolute_time(), &PROCESSOR_DATA(processor, system_state));
PROCESSOR_DATA(processor, current_state) = &PROCESSOR_DATA(processor, system_state);
processor->next_thread = THREAD_NULL;
processor->state = PROCESSOR_RUNNING;
- if ( processor->runq.highq > new_thread->sched_pri ||
- rt_runq.highq >= new_thread->sched_pri ) {
+ if ( processor->runq.highq > new_thread->sched_pri ||
+ (rt_runq.highq > 0 && rt_runq.highq >= new_thread->sched_pri) ) {
processor->deadline = UINT64_MAX;
pset_unlock(pset);
thread_setrun(new_thread, SCHED_HEADQ);
thread_unlock(new_thread);
+ KERNEL_DEBUG_CONSTANT(
+ MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (int)thread, (int)state, 0, 0, 0);
+
return (THREAD_NULL);
}
pset_unlock(pset);
+ KERNEL_DEBUG_CONSTANT(
+ MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (int)thread, (int)state, (int)new_thread, 0, 0);
+
return (new_thread);
}
else
thread_setrun(new_thread, SCHED_HEADQ);
thread_unlock(new_thread);
+ KERNEL_DEBUG_CONSTANT(
+ MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (int)thread, (int)state, 0, 0, 0);
+
return (THREAD_NULL);
}
}
pset_unlock(pset);
+ KERNEL_DEBUG_CONSTANT(
+ MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (int)thread, (int)state, 0, 0, 0);
+
return (THREAD_NULL);
}
self->sched_pri = DEPRESSPRI;
myprocessor->current_pri = self->sched_pri;
- self->sched_mode &= ~TH_MODE_PREEMPT;
self->sched_mode |= TH_MODE_DEPRESS;
if (interval != 0) {
if (!(self->sched_mode & TH_MODE_ISDEPRESSED)) {
self->sched_pri = DEPRESSPRI;
myprocessor->current_pri = self->sched_pri;
- self->sched_mode &= ~TH_MODE_PREEMPT;
}
self->computation_epoch = abstime;
self->computation_metered = 0;
integer_t sched_mode; /* scheduling mode bits */
#define TH_MODE_REALTIME 0x0001 /* time constraints supplied */
#define TH_MODE_TIMESHARE 0x0002 /* use timesharing algorithm */
-#define TH_MODE_PREEMPT 0x0004 /* can preempt kernel contexts */
-#define TH_MODE_FAILSAFE 0x0008 /* fail-safe has tripped */
-#define TH_MODE_PROMOTED 0x0010 /* sched pri has been promoted */
-#define TH_MODE_ABORT 0x0020 /* abort interruptible waits */
-#define TH_MODE_ABORTSAFELY 0x0040 /* ... but only those at safe point */
+#define TH_MODE_FAILSAFE 0x0004 /* fail-safe has tripped */
+#define TH_MODE_PROMOTED 0x0008 /* sched pri has been promoted */
+#define TH_MODE_ABORT 0x0010 /* abort interruptible waits */
+#define TH_MODE_ABORTSAFELY 0x0020 /* ... but only those at safe point */
#define TH_MODE_ISABORTED (TH_MODE_ABORT | TH_MODE_ABORTSAFELY)
-#define TH_MODE_DEPRESS 0x0080 /* normal depress yield */
-#define TH_MODE_POLLDEPRESS 0x0100 /* polled depress yield */
+#define TH_MODE_DEPRESS 0x0040 /* normal depress yield */
+#define TH_MODE_POLLDEPRESS 0x0080 /* polled depress yield */
#define TH_MODE_ISDEPRESSED (TH_MODE_DEPRESS | TH_MODE_POLLDEPRESS)
integer_t sched_pri; /* scheduled (current) priority */
thread->sched_pri = DEPRESSPRI;
myprocessor->current_pri = thread->sched_pri;
- thread->sched_mode &= ~TH_MODE_PREEMPT;
}
thread_unlock(thread);
splx(s);
typedef struct x86_saved_state32_tagged x86_saved_state32_tagged_t;
struct x86_sframe32 {
-/*
+ /*
* in case we throw a fault reloading
* segment registers on a return out of
* the kernel... the 'slf' state is only kept
#define CPU_SUBTYPE_ARM_ALL ((cpu_subtype_t) 0)
#define CPU_SUBTYPE_ARM_V4T ((cpu_subtype_t) 5)
#define CPU_SUBTYPE_ARM_V6 ((cpu_subtype_t) 6)
+#define CPU_SUBTYPE_ARM_V5TEJ ((cpu_subtype_t) 7)
+#define CPU_SUBTYPE_ARM_XSCALE ((cpu_subtype_t) 8)
/*
* CPU families (sysctl hw.cpufamily)
#define CPUFAMILY_INTEL_6_26 0x6b5a4cd2 /* Nehalem */
#define CPUFAMILY_ARM_9 0xe73283ae
#define CPUFAMILY_ARM_11 0x8ff620d8
+#define CPUFAMILY_ARM_XSCALE 0x53b005f5
#define CPUFAMILY_INTEL_YONAH CPUFAMILY_INTEL_6_14
#define CPUFAMILY_INTEL_MEROM CPUFAMILY_INTEL_6_15
extern int uiomove64(addr64_t, int, void *);
#define MAX_RUN 32
+unsigned long vm_cs_tainted_forces = 0;
+
int
memory_object_control_uiomove(
memory_object_control_t control,
*/
assert(!dst_page->encrypted);
- if (mark_dirty)
+ if (mark_dirty) {
dst_page->dirty = TRUE;
+ if (dst_page->cs_validated) {
+ /*
+ * CODE SIGNING:
+ * We're modifying a code-signed
+ * page: assume that it is now tainted.
+ */
+ dst_page->cs_tainted = TRUE;
+ vm_cs_tainted_forces++;
+ }
+ }
dst_page->busy = TRUE;
page_run[cur_run++] = dst_page;
extern void vm_fault_classify_init(void);
#endif
+
+unsigned long vm_cs_validates = 0;
+unsigned long vm_cs_revalidates = 0;
+unsigned long vm_cs_query_modified = 0;
+unsigned long vm_cs_validated_dirtied = 0;
+
/*
* Routine: vm_fault_init
* Purpose:
cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
- if (m->object->code_signed && !m->cs_validated &&
- pmap != kernel_pmap) {
- /*
- * CODE SIGNING:
- * This page comes from a VM object backed by a
- * signed memory object and it hasn't been validated yet.
- * We're about to enter it into a process address space,
- * so we need to validate its signature now.
- */
+ if (m->object->code_signed && pmap != kernel_pmap &&
+ (!m->cs_validated || m->wpmapped)) {
vm_object_lock_assert_exclusive(m->object);
- /* VM map still locked, so 1 ref will remain on VM object */
+ if (m->cs_validated && m->wpmapped) {
+ vm_cs_revalidates++;
+ }
+ /*
+ * CODE SIGNING:
+ * This page comes from a VM object backed by a signed
+ * memory object. We are about to enter it into a process
+ * address space, so we need to validate its signature.
+ */
+ /* VM map is locked, so 1 ref will remain on VM object */
vm_page_validate_cs(m);
}
* that's needed for an AtomicCompareAndSwap
*/
m->pmapped = TRUE;
+ if (prot & VM_PROT_WRITE) {
+ vm_object_lock_assert_exclusive(m->object);
+ m->wpmapped = TRUE;
+ }
PMAP_ENTER(pmap, vaddr, m, prot, cache_attr, wired);
}
*/
if (wired) {
fault_type = prot | VM_PROT_WRITE;
-
/*
* since we're treating this fault as a 'write'
* we must hold the top object lock exclusively
}
ASSERT_PAGE_DECRYPTED(m);
- if (m->object->code_signed && !m->cs_validated) {
+ if (m->object->code_signed && map != kernel_map &&
+ (!m->cs_validated || m->wpmapped)) {
/*
- * We will need to validate this page
+ * We might need to validate this page
* against its code signature, so we
* want to hold the VM object exclusively.
*/
* --> must disallow write.
*/
- if (object == cur_object && object->copy == VM_OBJECT_NULL)
+ if (object == cur_object && object->copy == VM_OBJECT_NULL) {
+ if ((fault_type & VM_PROT_WRITE) == 0) {
+ /*
+ * This is not a "write" fault, so we
+ * might not have taken the object lock
+ * exclusively and we might not be able
+ * to update the "wpmapped" bit in
+ * vm_fault_enter().
+ * Let's just grant read access to
+ * the page for now and we'll
+ * soft-fault again if we need write
+ * access later...
+ */
+ prot &= ~VM_PROT_WRITE;
+ }
goto FastPmapEnter;
+ }
if ((fault_type & VM_PROT_WRITE) == 0) {
boolean_t validated, tainted;
boolean_t busy_page;
- vm_object_lock_assert_exclusive(page->object);
- assert(!page->cs_validated);
+ vm_object_lock_assert_held(page->object);
if (!cs_validation) {
return;
}
+ if (page->cs_validated && !page->cs_tainted && page->wpmapped) {
+ vm_object_lock_assert_exclusive(page->object);
+
+ /*
+ * This page has already been validated and found to
+ * be valid. However, it was mapped for "write" access
+ * sometime in the past, so we have to check if it was
+ * modified. If so, it needs to be revalidated.
+ * If the page was already found to be "tainted", no
+ * need to re-validate.
+ */
+ if (!page->dirty) {
+ vm_cs_query_modified++;
+ page->dirty = pmap_is_modified(page->phys_page);
+ }
+ if (page->dirty) {
+ /*
+ * The page is dirty, so let's clear its
+ * "validated" bit and re-validate it.
+ */
+ if (cs_debug) {
+ printf("CODESIGNING: vm_page_validate_cs: "
+ "page %p obj %p off 0x%llx "
+ "was modified\n",
+ page, page->object, page->offset);
+ }
+ page->cs_validated = FALSE;
+ vm_cs_validated_dirtied++;
+ }
+ }
+
+ if (page->cs_validated) {
+ return;
+ }
+
+ vm_object_lock_assert_exclusive(page->object);
+
+ vm_cs_validates++;
+
object = page->object;
assert(object->code_signed);
offset = page->offset;
}
for (; entry->vme_start < end;
entry = entry->vme_next) {
+ /*
+ * Check if the mapping's attributes
+ * match the existing map entry.
+ */
if (entry == vm_map_to_entry(map) ||
entry->vme_start != tmp_start ||
entry->is_sub_map != is_submap ||
- entry->object.vm_object != object ||
entry->offset != tmp_offset ||
entry->needs_copy != needs_copy ||
entry->protection != cur_protection ||
/* not the same mapping ! */
RETURN(KERN_NO_SPACE);
}
+ /*
+ * Check if the same object is being mapped.
+ */
+ if (is_submap) {
+ if (entry->object.sub_map !=
+ (vm_map_t) object) {
+ /* not the same submap */
+ RETURN(KERN_NO_SPACE);
+ }
+ } else {
+ if (entry->object.vm_object != object) {
+ /* not the same VM object... */
+ vm_object_t obj2;
+
+ obj2 = entry->object.vm_object;
+ if ((obj2 == VM_OBJECT_NULL ||
+ obj2->internal) &&
+ (object == VM_OBJECT_NULL ||
+ object->internal)) {
+ /*
+ * ... but both are
+ * anonymous memory,
+ * so equivalent.
+ */
+ } else {
+ RETURN(KERN_NO_SPACE);
+ }
+ }
+ }
+
tmp_offset += entry->vme_end - entry->vme_start;
tmp_start += entry->vme_end - entry->vme_start;
if (entry->vme_end >= end) {
if(submap_entry->wired_count != 0 ||
- (sub_object->copy_strategy !=
- MEMORY_OBJECT_COPY_SYMMETRIC)) {
+ (sub_object->copy_strategy ==
+ MEMORY_OBJECT_COPY_NONE)) {
vm_object_lock(sub_object);
vm_object_copy_slowly(sub_object,
submap_entry->offset,
entry->max_protection |= submap_entry->max_protection;
if(copied_slowly) {
- entry->offset = 0;
+ entry->offset = local_start - old_start;
entry->needs_copy = FALSE;
entry->is_shared = FALSE;
} else {
panic("vm_object_terminate.4 %p %p", object, p);
}
- if (!p->dirty && p->pmapped)
+ if (!p->dirty && p->wpmapped)
p->dirty = pmap_is_modified(p->phys_page);
if ((p->dirty || p->precious) && !p->error && object->alive) {
fictitious:1, /* Physical page doesn't exist (O) */
pmapped:1, /* page has been entered at some
* point into a pmap (O) */
+ wpmapped:1, /* page has been entered at some
+ * point into a pmap for write (O) */
absent:1, /* Data has been requested, but is
* not yet available (O) */
error:1, /* Data manager was unable to provide
/* other pages */
deactivated:1,
zero_fill:1,
- __unused_object_bits:9; /* 9 bits available here */
+ __unused_object_bits:8; /* 8 bits available here */
ppnum_t phys_page; /* Physical address of page, passed
* to pmap_enter (read-only) */
vm_object_t object,
vm_object_offset_t offset);
+extern void vm_page_insert_internal(
+ vm_page_t page,
+ vm_object_t object,
+ vm_object_offset_t offset,
+ boolean_t queues_lock_held);
+
extern void vm_page_replace(
vm_page_t mem,
vm_object_t object,
vm_object_t vm_pageout_scan_wants_object = VM_OBJECT_NULL;
+unsigned long vm_cs_validated_resets = 0;
/*
* Routine: vm_backing_store_disable
vm_purgeable_q_advance_all(1);
}
- if (object->copy == VM_OBJECT_NULL &&
- (object->purgable == VM_PURGABLE_EMPTY ||
- object->purgable == VM_PURGABLE_VOLATILE)) {
- assert(m->wire_count == 0); /* if it's wired, we can't put it on our queue */
- /* just stick it back on! */
- goto reactivate_page;
+ /* If the object is empty, the page must be reclaimed even if dirty or used. */
+ /* If the page belongs to a volatile object, we stick it back on. */
+ if (object->copy == VM_OBJECT_NULL) {
+ if(object->purgable == VM_PURGABLE_EMPTY && !m->cleaning) {
+ m->busy = TRUE;
+ if (m->pmapped == TRUE) {
+ /* unmap the page */
+ refmod_state = pmap_disconnect(m->phys_page);
+ if (refmod_state & VM_MEM_MODIFIED) {
+ m->dirty = TRUE;
+ }
+ }
+ if (m->dirty || m->precious) {
+ /* we saved the cost of cleaning this page ! */
+ vm_page_purged_count++;
+ }
+ goto reclaim_page;
+ }
+ if (object->purgable == VM_PURGABLE_VOLATILE) {
+ /* if it's wired, we can't put it on our queue */
+ assert(m->wire_count == 0);
+ /* just stick it back on! */
+ goto reactivate_page;
+ }
}
m->pageq.next = NULL;
m->pageq.prev = NULL;
wpl_array_t lite_list = NULL;
vm_object_t last_copy_object;
int delayed_unlock = 0;
+ int j;
if (cntrl_flags & ~UPL_VALID_FLAGS) {
/*
}
vm_object_unlock(object);
VM_PAGE_GRAB_FICTITIOUS(alias_page);
- vm_object_lock(object);
+ goto relock;
}
- if (delayed_unlock == 0)
- vm_page_lock_queues();
+ if (delayed_unlock == 0) {
+ /*
+ * pageout_scan takes the vm_page_lock_queues first
+ * then tries for the object lock... to avoid what
+ * is effectively a lock inversion, we'll go to the
+ * trouble of taking them in that same order... otherwise
+ * if this object contains the majority of the pages resident
+ * in the UBC (or a small set of large objects actively being
+ * worked on contain the majority of the pages), we could
+ * cause the pageout_scan thread to 'starve' in its attempt
+ * to find pages to move to the free queue, since it has to
+ * successfully acquire the object lock of any candidate page
+ * before it can steal/clean it.
+ */
+ vm_object_unlock(object);
+relock:
+ for (j = 0; ; j++) {
+ vm_page_lock_queues();
+ if (vm_object_lock_try(object))
+ break;
+ vm_page_unlock_queues();
+ mutex_pause(j);
+ }
+ delayed_unlock = 1;
+ }
if (cntrl_flags & UPL_COPYOUT_FROM) {
upl->flags |= UPL_PAGE_SYNC_DONE;
dst_page->busy = was_busy;
vm_page_lock_queues();
+ delayed_unlock = 1;
}
if (dst_page->pageout_queue == TRUE)
/*
upl_cow_again_pages += xfer_size >> PAGE_SHIFT;
vm_page_lock_queues();
+ delayed_unlock = 1;
}
/*
* remember the copy object we synced with
}
/*
* need to allocate a page
- * vm_page_alloc may grab the
- * queues lock for a purgeable object
- * so drop it
*/
- delayed_unlock = 0;
- vm_page_unlock_queues();
-
- dst_page = vm_page_alloc(object, dst_offset);
+ dst_page = vm_page_grab();
if (dst_page == VM_PAGE_NULL) {
if ( (cntrl_flags & (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) {
* then try again for the same
* offset...
*/
+ delayed_unlock = 0;
+ vm_page_unlock_queues();
+
vm_object_unlock(object);
VM_PAGE_WAIT();
- vm_object_lock(object);
+
+ /*
+ * pageout_scan takes the vm_page_lock_queues first
+ * then tries for the object lock... to avoid what
+ * is effectively a lock inversion, we'll go to the
+ * trouble of taking them in that same order... otherwise
+ * if this object contains the majority of the pages resident
+ * in the UBC (or a small set of large objects actively being
+ * worked on contain the majority of the pages), we could
+ * cause the pageout_scan thread to 'starve' in its attempt
+ * to find pages to move to the free queue, since it has to
+ * successfully acquire the object lock of any candidate page
+ * before it can steal/clean it.
+ */
+ for (j = 0; ; j++) {
+ vm_page_lock_queues();
+
+ if (vm_object_lock_try(object))
+ break;
+ vm_page_unlock_queues();
+ mutex_pause(j);
+ }
+ delayed_unlock = 1;
continue;
}
- dst_page->busy = FALSE;
+ vm_page_insert_internal(dst_page, object, dst_offset, TRUE);
+
dst_page->absent = TRUE;
+ dst_page->busy = FALSE;
if (cntrl_flags & UPL_RET_ONLY_ABSENT) {
/*
*/
dst_page->clustered = TRUE;
}
- vm_page_lock_queues();
}
/*
* ENCRYPTED SWAP:
}
delay_unlock_queues:
if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) {
+ /*
+ * pageout_scan takes the vm_page_lock_queues first
+ * then tries for the object lock... to avoid what
+ * is effectively a lock inversion, we'll go to the
+ * trouble of taking them in that same order... otherwise
+ * if this object contains the majority of the pages resident
+ * in the UBC (or a small set of large objects actively being
+ * worked on contain the majority of the pages), we could
+ * cause the pageout_scan thread to 'starve' in its attempt
+ * to find pages to move to the free queue, since it has to
+ * successfully acquire the object lock of any candidate page
+ * before it can steal/clean it.
+ */
+ vm_object_unlock(object);
mutex_yield(&vm_page_queue_lock);
+
+ for (j = 0; ; j++) {
+ if (vm_object_lock_try(object))
+ break;
+ vm_page_unlock_queues();
+ mutex_pause(j);
+ vm_page_lock_queues();
+ }
delayed_unlock = 1;
}
try_next_page:
if (alias_page != NULL) {
if (delayed_unlock == 0) {
vm_page_lock_queues();
- delayed_unlock++;
+ delayed_unlock = 1;
}
vm_page_free(alias_page);
}
cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
m->pmapped = TRUE;
+ m->wpmapped = TRUE;
PMAP_ENTER(map->pmap, addr, m, VM_PROT_ALL, cache_attr, TRUE);
}
int delayed_unlock = 0;
int clear_refmod = 0;
int pgpgout_count = 0;
+ int j;
*empty = FALSE;
} else {
shadow_object = object;
}
- vm_object_lock(shadow_object);
-
entry = offset/PAGE_SIZE;
target_offset = (vm_object_offset_t)offset;
+ /*
+ * pageout_scan takes the vm_page_lock_queues first
+ * then tries for the object lock... to avoid what
+ * is effectively a lock inversion, we'll go to the
+ * trouble of taking them in that same order... otherwise
+ * if this object contains the majority of the pages resident
+ * in the UBC (or a small set of large objects actively being
+ * worked on contain the majority of the pages), we could
+ * cause the pageout_scan thread to 'starve' in its attempt
+ * to find pages to move to the free queue, since it has to
+ * successfully acquire the object lock of any candidate page
+ * before it can steal/clean it.
+ */
+ for (j = 0; ; j++) {
+ vm_page_lock_queues();
+
+ if (vm_object_lock_try(shadow_object))
+ break;
+ vm_page_unlock_queues();
+ mutex_pause(j);
+ }
+ delayed_unlock = 1;
+
while (xfer_size) {
vm_page_t t, m;
- if (delayed_unlock == 0)
- vm_page_lock_queues();
-
m = VM_PAGE_NULL;
if (upl->flags & UPL_LITE) {
m->dirty = TRUE;
else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
m->dirty = FALSE;
+ if (m->cs_validated && !m->cs_tainted) {
+ /*
+ * CODE SIGNING:
+ * This page is no longer dirty
+ * but could have been modified,
+ * so it will need to be
+ * re-validated.
+ */
+ m->cs_validated = FALSE;
+ vm_cs_validated_resets++;
+ }
clear_refmod |= VM_MEM_MODIFIED;
}
if (flags & UPL_COMMIT_INACTIVATE)
*/
if (flags & UPL_COMMIT_CLEAR_DIRTY) {
m->dirty = FALSE;
+ if (m->cs_validated && !m->cs_tainted) {
+ /*
+ * CODE SIGNING:
+ * This page is no longer dirty
+ * but could have been modified,
+ * so it will need to be
+ * re-validated.
+ */
+ m->cs_validated = FALSE;
+ vm_cs_validated_resets++;
+ }
clear_refmod |= VM_MEM_MODIFIED;
}
if (clear_refmod)
if (m->wanted) vm_pageout_target_collisions++;
#endif
m->dirty = FALSE;
+ if (m->cs_validated && !m->cs_tainted) {
+ /*
+ * CODE SIGNING:
+ * This page is no longer dirty
+ * but could have been modified,
+ * so it will need to be
+ * re-validated.
+ */
+ m->cs_validated = FALSE;
+ vm_cs_validated_resets++;
+ }
if (m->pmapped && (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED))
m->dirty = TRUE;
goto commit_next_page;
}
#if MACH_CLUSTER_STATS
- if (m->pmapped)
+ if (m->wpmapped)
m->dirty = pmap_is_modified(m->phys_page);
if (m->dirty) vm_pageout_cluster_dirtied++;
if (m->wanted) vm_pageout_cluster_collisions++;
#endif
m->dirty = FALSE;
+ if (m->cs_validated && !m->cs_tainted) {
+ /*
+ * CODE SIGNING:
+ * This page is no longer dirty
+ * but could have been modified,
+ * so it will need to be
+ * re-validated.
+ */
+ m->cs_validated = FALSE;
+ vm_cs_validated_resets++;
+ }
if ((m->busy) && (m->cleaning)) {
/*
entry++;
if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) {
+ /*
+ * pageout_scan takes the vm_page_lock_queues first
+ * then tries for the object lock... to avoid what
+ * is effectively a lock inversion, we'll go to the
+ * trouble of taking them in that same order... otherwise
+ * if this object contains the majority of the pages resident
+ * in the UBC (or a small set of large objects actively being
+ * worked on contain the majority of the pages), we could
+ * cause the pageout_scan thread to 'starve' in its attempt
+ * to find pages to move to the free queue, since it has to
+ * successfully acquire the object lock of any candidate page
+ * before it can steal/clean it.
+ */
+ vm_object_unlock(shadow_object);
mutex_yield(&vm_page_queue_lock);
+
+ for (j = 0; ; j++) {
+ if (vm_object_lock_try(shadow_object))
+ break;
+ vm_page_unlock_queues();
+ mutex_pause(j);
+ vm_page_lock_queues();
+ }
delayed_unlock = 1;
}
}
wpl_array_t lite_list;
int occupied;
int delayed_unlock = 0;
+ int j;
*empty = FALSE;
} else
shadow_object = object;
- vm_object_lock(shadow_object);
-
entry = offset/PAGE_SIZE;
target_offset = (vm_object_offset_t)offset;
+ /*
+ * pageout_scan takes the vm_page_lock_queues first
+ * then tries for the object lock... to avoid what
+ * is effectively a lock inversion, we'll go to the
+ * trouble of taking them in that same order... otherwise
+ * if this object contains the majority of the pages resident
+ * in the UBC (or a small set of large objects actively being
+ * worked on contain the majority of the pages), we could
+ * cause the pageout_scan thread to 'starve' in its attempt
+ * to find pages to move to the free queue, since it has to
+ * successfully acquire the object lock of any candidate page
+ * before it can steal/clean it.
+ */
+ for (j = 0; ; j++) {
+ vm_page_lock_queues();
+
+ if (vm_object_lock_try(shadow_object))
+ break;
+ vm_page_unlock_queues();
+ mutex_pause(j);
+ }
+ delayed_unlock = 1;
+
while (xfer_size) {
vm_page_t t, m;
- if (delayed_unlock == 0)
- vm_page_lock_queues();
-
m = VM_PAGE_NULL;
if (upl->flags & UPL_LITE) {
}
}
if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) {
+ /*
+ * pageout_scan takes the vm_page_lock_queues first
+ * then tries for the object lock... to avoid what
+ * is effectively a lock inversion, we'll go to the
+ * trouble of taking them in that same order... otherwise
+ * if this object contains the majority of the pages resident
+ * in the UBC (or a small set of large objects actively being
+ * worked on contain the majority of the pages), we could
+ * cause the pageout_scan thread to 'starve' in its attempt
+ * to find pages to move to the free queue, since it has to
+ * successfully acquire the object lock of any candidate page
+ * before it can steal/clean it.
+ */
+ vm_object_unlock(shadow_object);
mutex_yield(&vm_page_queue_lock);
+
+ for (j = 0; ; j++) {
+ if (vm_object_lock_try(shadow_object))
+ break;
+ vm_page_unlock_queues();
+ mutex_pause(j);
+ vm_page_lock_queues();
+ }
delayed_unlock = 1;
}
target_offset += PAGE_SIZE_64;
pmap_sync_page_data_phys(page->phys_page);
}
page->pmapped = TRUE;
+ page->wpmapped = TRUE;
cache_attr = ((unsigned int) object->wimg_bits) & VM_WIMG_MASK;
//assert(pmap_verify_free(page->phys_page));
* and the decryption doesn't count.
*/
page->dirty = FALSE;
+ if (page->cs_validated && !page->cs_tainted) {
+ /*
+ * CODE SIGNING:
+ * This page is no longer dirty
+ * but could have been modified,
+ * so it will need to be
+ * re-validated.
+ */
+ page->cs_validated = FALSE;
+ vm_cs_validated_resets++;
+ }
pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
page->encrypted = FALSE;
*/
assert(pmap_verify_free(page->phys_page));
page->pmapped = FALSE;
+ page->wpmapped = FALSE;
vm_object_paging_end(page->object);
}
struct token tokens[MAX_VOLATILE];
-token_idx_t token_free_idx = 0; /* head of free queue */
-token_cnt_t token_init_count = 1; /* token 0 is reserved!! */
-token_cnt_t token_new_pagecount = 0; /* count of pages that will
+token_idx_t token_free_idx = 0; /* head of free queue */
+token_idx_t token_init_idx = 1; /* token 0 is reserved!! */
+int32_t token_new_pagecount = 0; /* count of pages that will
* be added onto token queue */
int available_for_purge = 0; /* increase when ripe token
token_idx_t token;
enum purgeable_q_type i;
- if (token_init_count < MAX_VOLATILE) { /* lazy token array init */
- token = token_init_count;
- token_init_count++;
+ if (token_init_idx < MAX_VOLATILE) { /* lazy token array init */
+ token = token_init_idx;
+ token_init_idx++;
} else if (token_free_idx) {
token = token_free_idx;
token_free_idx = tokens[token_free_idx].next;
* obsolete
*/
for (i = PURGEABLE_Q_TYPE_FIFO; i < PURGEABLE_Q_TYPE_MAX; i++) {
- purgeable_queues[i].new_pages += token_new_pagecount;
- assert(purgeable_queues[i].new_pages >= 0);
- assert((uint64_t) (purgeable_queues[i].new_pages) <= TOKEN_COUNT_MAX);
+ int64_t pages = purgeable_queues[i].new_pages += token_new_pagecount;
+ assert(pages >= 0);
+ assert(pages <= TOKEN_COUNT_MAX);
+ purgeable_queues[i].new_pages=pages;
}
token_new_pagecount = 0;
void
vm_purgeable_q_advance_all(uint32_t num_pages)
{
+ /* check queue counters - if they get really large, scale them back.
+ * They tend to get that large when there is no purgeable queue action */
+ int i;
+ if(token_new_pagecount > (INT32_MAX >> 1)) /* a system idling years might get there */
+ {
+ for (i = PURGEABLE_Q_TYPE_FIFO; i < PURGEABLE_Q_TYPE_MAX; i++) {
+ int64_t pages = purgeable_queues[i].new_pages += token_new_pagecount;
+ assert(pages >= 0);
+ assert(pages <= TOKEN_COUNT_MAX);
+ purgeable_queues[i].new_pages=pages;
+ }
+ token_new_pagecount = 0;
+ }
+
/*
* don't need to advance obsolete queue - all items are ripe there,
* always
PURGEABLE_Q_TYPE_MAX
};
-/*
- * It appears there's a 16 vs 32 size mismatch when using
- * CONFIG_TOKEN_QUEUE_SMALL and the resulting math can lead to a large
- * negative value for new_pages in vm_purgeable.c.
- */
-#if (CONFIG_TOKEN_QUEUE_SMALL == 1) && 0
+#if (CONFIG_TOKEN_QUEUE_SMALL == 1)
typedef uint16_t token_idx_t;
typedef uint16_t token_cnt_t;
#define MAX_VOLATILE 0x01000
typedef struct purgeable_q * purgeable_q_t;
extern struct purgeable_q purgeable_queues[PURGEABLE_Q_TYPE_MAX];
-extern token_cnt_t token_new_pagecount;
+extern int32_t token_new_pagecount;
extern int available_for_purge;
struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
-static void vm_page_insert_internal(vm_page_t, vm_object_t, vm_object_offset_t, boolean_t);
-
/*
* Associated with page of user-allocatable memory is a
m->laundry = FALSE;
m->free = FALSE;
m->pmapped = FALSE;
+ m->wpmapped = FALSE;
m->reference = FALSE;
m->pageout = FALSE;
m->dump_cleaning = FALSE;
}
-static void
+void
vm_page_insert_internal(
vm_page_t mem,
vm_object_t object,
assert(mem->free);
assert(mem->busy);
assert(!mem->pmapped);
+ assert(!mem->wpmapped);
mem->pageq.next = NULL;
mem->pageq.prev = NULL;
assert(mem->busy);
assert(!mem->encrypted);
assert(!mem->pmapped);
+ assert(!mem->wpmapped);
return mem;
}
assert(!mem->free);
assert(!mem->encrypted);
assert(!mem->pmapped);
+ assert(!mem->wpmapped);
}
PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
PROCESSOR_DATA(current_processor(), start_color) = color;
mem->encrypted_cleaning = FALSE;
mem->deactivated = FALSE;
mem->pmapped = FALSE;
+ mem->wpmapped = FALSE;
if (mem->private) {
mem->private = FALSE;
dest_m->encrypted = FALSE;
if (src_m->object != VM_OBJECT_NULL &&
- src_m->object->code_signed &&
- !src_m->cs_validated) {
+ src_m->object->code_signed) {
/*
- * We're copying a not-yet-validated page from a
- * code-signed object.
+ * We're copying a page from a code-signed object.
* Whoever ends up mapping the copy page might care about
* the original page's integrity, so let's validate the
* source page now.
#include <kern/ipc_tt.h>
#include <kern/kalloc.h>
+#include <mach/mach_vm.h>
+
#include <vm/vm_map.h>
#include <vm/vm_shared_region.h>
unsigned int i;
mach_port_t map_port;
mach_vm_offset_t target_address;
+ vm_object_t object;
+ vm_object_size_t obj_size;
+
kr = KERN_SUCCESS;
target_address =
mappings[i].sfm_address - sr_base_address;
- /* establish that mapping, OK if it's to "already" there */
- kr = vm_map_enter_mem_object(
- sr_map,
- &target_address,
- vm_map_round_page(mappings[i].sfm_size),
- 0,
- VM_FLAGS_FIXED | VM_FLAGS_ALREADY,
- map_port,
- mappings[i].sfm_file_offset,
- TRUE,
- mappings[i].sfm_init_prot & VM_PROT_ALL,
- mappings[i].sfm_max_prot & VM_PROT_ALL,
- VM_INHERIT_DEFAULT);
- if (kr == KERN_MEMORY_PRESENT) {
- /* this exact mapping was already there: that's fine */
- SHARED_REGION_TRACE_INFO(
- ("shared_region: mapping[%d]: "
- "address:0x%016llx size:0x%016llx "
- "offset:0x%016llx "
- "maxprot:0x%x prot:0x%x already mapped...\n",
- i,
- (long long)mappings[i].sfm_address,
- (long long)mappings[i].sfm_size,
- (long long)mappings[i].sfm_file_offset,
- mappings[i].sfm_max_prot,
- mappings[i].sfm_init_prot));
- kr = KERN_SUCCESS;
- } else if (kr != KERN_SUCCESS) {
- /* this mapping failed ! */
- SHARED_REGION_TRACE_ERROR(
- ("shared_region: mapping[%d]: "
- "address:0x%016llx size:0x%016llx "
- "offset:0x%016llx "
- "maxprot:0x%x prot:0x%x failed 0x%x\n",
- i,
- (long long)mappings[i].sfm_address,
- (long long)mappings[i].sfm_size,
- (long long)mappings[i].sfm_file_offset,
- mappings[i].sfm_max_prot,
- mappings[i].sfm_init_prot,
- kr));
- break;
+ /* establish that mapping, OK if it's "already" there */
+ if (map_port == MACH_PORT_NULL) {
+ /*
+ * We want to map some anonymous memory in a
+ * shared region.
+ * We have to create the VM object now, so that it
+ * can be mapped "copy-on-write".
+ */
+ obj_size = vm_map_round_page(mappings[i].sfm_size);
+ object = vm_object_allocate(obj_size);
+ if (object == VM_OBJECT_NULL) {
+ kr = KERN_RESOURCE_SHORTAGE;
+ } else {
+ kr = vm_map_enter(
+ sr_map,
+ &target_address,
+ vm_map_round_page(mappings[i].sfm_size),
+ 0,
+ VM_FLAGS_FIXED | VM_FLAGS_ALREADY,
+ object,
+ 0,
+ TRUE,
+ mappings[i].sfm_init_prot & VM_PROT_ALL,
+ mappings[i].sfm_max_prot & VM_PROT_ALL,
+ VM_INHERIT_DEFAULT);
+ }
+ } else {
+ object = VM_OBJECT_NULL; /* no anonymous memory here */
+ kr = vm_map_enter_mem_object(
+ sr_map,
+ &target_address,
+ vm_map_round_page(mappings[i].sfm_size),
+ 0,
+ VM_FLAGS_FIXED | VM_FLAGS_ALREADY,
+ map_port,
+ mappings[i].sfm_file_offset,
+ TRUE,
+ mappings[i].sfm_init_prot & VM_PROT_ALL,
+ mappings[i].sfm_max_prot & VM_PROT_ALL,
+ VM_INHERIT_DEFAULT);
}
- /* we're protected by "sr_mapping_in_progress" */
+ if (kr != KERN_SUCCESS) {
+ if (map_port == MACH_PORT_NULL) {
+ /*
+ * Get rid of the VM object we just created
+ * but failed to map.
+ */
+ vm_object_deallocate(object);
+ object = VM_OBJECT_NULL;
+ }
+ if (kr == KERN_MEMORY_PRESENT) {
+ /*
+ * This exact mapping was already there:
+ * that's fine.
+ */
+ SHARED_REGION_TRACE_INFO(
+ ("shared_region: mapping[%d]: "
+ "address:0x%016llx size:0x%016llx "
+ "offset:0x%016llx "
+ "maxprot:0x%x prot:0x%x "
+ "already mapped...\n",
+ i,
+ (long long)mappings[i].sfm_address,
+ (long long)mappings[i].sfm_size,
+ (long long)mappings[i].sfm_file_offset,
+ mappings[i].sfm_max_prot,
+ mappings[i].sfm_init_prot));
+ /*
+ * We didn't establish this mapping ourselves;
+ * let's reset its size, so that we do not
+ * attempt to undo it if an error occurs later.
+ */
+ mappings[i].sfm_size = 0;
+ kr = KERN_SUCCESS;
+ } else {
+ unsigned int j;
+
+ /* this mapping failed ! */
+ SHARED_REGION_TRACE_ERROR(
+ ("shared_region: mapping[%d]: "
+ "address:0x%016llx size:0x%016llx "
+ "offset:0x%016llx "
+ "maxprot:0x%x prot:0x%x failed 0x%x\n",
+ i,
+ (long long)mappings[i].sfm_address,
+ (long long)mappings[i].sfm_size,
+ (long long)mappings[i].sfm_file_offset,
+ mappings[i].sfm_max_prot,
+ mappings[i].sfm_init_prot,
+ kr));
+
+ /*
+ * Undo the mappings we've established so far.
+ */
+ for (j = 0; j < i; j++) {
+ kern_return_t kr2;
+
+ if (mappings[j].sfm_size == 0) {
+ /*
+ * We didn't establish this
+ * mapping, so nothing to undo.
+ */
+ continue;
+ }
+ SHARED_REGION_TRACE_INFO(
+ ("shared_region: mapping[%d]: "
+ "address:0x%016llx "
+ "size:0x%016llx "
+ "offset:0x%016llx "
+ "maxprot:0x%x prot:0x%x: "
+ "undoing...\n",
+ j,
+ (long long)mappings[j].sfm_address,
+ (long long)mappings[j].sfm_size,
+ (long long)mappings[j].sfm_file_offset,
+ mappings[j].sfm_max_prot,
+ mappings[j].sfm_init_prot));
+ kr2 = mach_vm_deallocate(
+ sr_map,
+ (mappings[j].sfm_address -
+ sr_base_address),
+ mappings[j].sfm_size);
+ assert(kr2 == KERN_SUCCESS);
+ }
+
+ break;
+ }
+
+ }
+
+ /*
+ * Record the first (chronologically) mapping in
+ * this shared region.
+ * We're protected by "sr_mapping_in_progress" here,
+ * so no need to lock "shared_region".
+ */
if (shared_region->sr_first_mapping == (mach_vm_offset_t) -1) {
shared_region->sr_first_mapping = target_address;
}
# Note: MAC options must be set in both bsd/conf and security/conf MASTER files
#
options KDEBUG # kernel tracing # <kdebug>
-options AUDIT # Security event auditing
+options AUDIT # Security event auditing # <audit>
options CONFIG_LCTX # Login Context
options CONFIG_DTRACE # dtrace support # <config_dtrace>
######################################################################
#
-# RELEASE = [ intel mach libkerncpp config_dtrace ]
+# RELEASE = [ intel mach libkerncpp config_dtrace audit ]
# PROFILE = [ RELEASE profile ]
# DEBUG = [ RELEASE debug ]
#
-# EMBEDDED = [ intel mach libkerncpp ]
+# EMBEDDED = [ intel mach libkerncpp audit ]
# DEVELOPMENT = [ EMBEDDED config_dtrace ]
#
######################################################################
#
-# Note: MAC options must be set in both bsd/conf and security/conf MASTER files
+# Note: MAC options must be set in all the bsd/conf, osfmk/conf, and
+# security/conf MASTER files.
#
options CONFIG_MACF # Mandatory Access Control Framework
options CONFIG_MACF_SOCKET_SUBSET # MACF subset of socket support
# Standard Apple MacOS X Configurations:
# -------- ---- -------- ---------------
#
-# RELEASE = [ppc mach libkerncpp config_dtrace]
+# RELEASE = [ppc mach libkerncpp config_dtrace audit]
# DEVELOPMENT = [RELEASE]
# PROFILE = [RELEASE]
# DEBUG = [RELEASE debug]
######################################################################
#
-# Note: corresponding MACF options must be set in both security/conf
-# bsd/conf and/or osfmk/conf MASTER files (depending upon the option)
+# Note: MAC options must be set in all the bsd/conf, osfmk/conf, and
+# security/conf MASTER files.
#
options CONFIG_MACF # Mandatory Access Control Framework
options CONFIG_MACF_SOCKET_SUBSET # MACF subset of socket support
#
# XXX: CFLAGS
#
-CFLAGS+= -DKERNEL -DBSD_KERNEL_PRIVATE \
+CFLAGS+= -I. -imacros meta_features.h -DKERNEL -DBSD_KERNEL_PRIVATE \
-Wall -Wno-four-char-constants -fno-common
#
# options
# OPTIONS/kdebug optional kdebug
+OPTIONS/audit optional audit
+OPTIONS/config_macf optional config_macf
+OPTIONS/config_macf_socket_subset optional config_macf_socket_subset
+OPTIONS/config_macf_socket optional config_macf_socket
+OPTIONS/config_macf_net optional config_macf_net
+
# security
security/mac_alloc.c optional config_macf
#include <kern/kalloc.h>
#include <kern/zalloc.h>
-#ifdef AUDIT
+#if AUDIT
/* The zone allocator is initialized in mac_base.c. */
zone_t mac_audit_data_zone;
return (0);
}
+
+int
+mac_audit_text(__unused char *text, __unused mac_policy_handle_t handle)
+{
+ return (0);
+}
#endif /* !AUDIT */
&mac_label_mbufs, 0, "Label all MBUFs");
#endif
+#if AUDIT
/*
* mac_audit_data_zone is the zone used for data pushed into the audit
* record by policies. Using a zone simplifies memory management of this
* data, and allows tracking of the amount of data in flight.
*/
extern zone_t mac_audit_data_zone;
+#endif
/*
* mac_policy_list holds the list of policy modules. Modules with a
struct mac_policy_conf *mpc;
u_int i;
+#if AUDIT
mac_audit_data_zone = zinit(MAC_AUDIT_DATA_LIMIT,
AQ_HIWATER * MAC_AUDIT_DATA_LIMIT,
8192, "mac_audit_data_zone");
+#endif
printf("MAC Framework successfully initialized\n");
my_aiocbp = &my_aiocbs[ 0 ];
my_aiocbp->aio_fildes = my_fd_list[ 0 ];
- my_aiocbp->aio_offset = 0;
+ my_aiocbp->aio_offset = 4096;
my_aiocbp->aio_buf = my_buffers[ 0 ];
my_aiocbp->aio_nbytes = AIO_TESTS_BUFFER_SIZE;
my_aiocbp->aio_reqprio = 0;