]> git.saurik.com Git - apple/xnu.git/commitdiff
xnu-1228.3.13.tar.gz mac-os-x-1052 v1228.3.13
authorApple <opensource@apple.com>
Tue, 12 Feb 2008 10:17:42 +0000 (10:17 +0000)
committerApple <opensource@apple.com>
Tue, 12 Feb 2008 10:17:42 +0000 (10:17 +0000)
123 files changed:
README
bsd/conf/MASTER.i386
bsd/conf/MASTER.ppc
bsd/conf/files
bsd/dev/dtrace/dtrace_glue.c
bsd/dev/dtrace/lockstat.c
bsd/dev/memdev.c
bsd/dev/random/randomdev.c
bsd/hfs/hfs_catalog.c
bsd/hfs/hfs_cnode.c
bsd/hfs/hfs_link.c
bsd/hfs/hfs_vfsops.c
bsd/hfs/hfs_vnops.c
bsd/hfs/hfs_xattr.c
bsd/kern/bsd_init.c
bsd/kern/kern_exec.c
bsd/kern/kern_exit.c
bsd/kern/kern_sysctl.c
bsd/kern/kpi_socketfilter.c
bsd/kern/pthread_synch.c
bsd/kern/sys_generic.c
bsd/kern/uipc_socket.c
bsd/kern/uipc_socket2.c
bsd/kern/uipc_syscalls.c
bsd/net/dlil.c
bsd/net/dlil.h
bsd/net/if.c
bsd/net/kext_net.h
bsd/net/kpi_interface.c
bsd/net/kpi_interface.h
bsd/net/kpi_protocol.c
bsd/net/route.c
bsd/netinet/igmp.c
bsd/netinet/in.h
bsd/netinet/ip_fw2.h
bsd/netinet/ip_input.c
bsd/netinet/ip_output.c
bsd/netinet/raw_ip.c
bsd/netinet/tcp_input.c
bsd/netinet/tcp_output.c
bsd/netinet/tcp_timer.c
bsd/netinet6/nd6.c
bsd/nfs/nfs_socket.c
bsd/nfs/nfs_syscalls.c
bsd/nfs/nfsm_subs.h
bsd/sys/aio.h
bsd/sys/dtrace_glue.h
bsd/sys/errno.h
bsd/sys/namei.h
bsd/sys/reboot.h
bsd/sys/socket.h
bsd/sys/socketvar.h
bsd/vfs/vfs_cache.c
bsd/vfs/vfs_journal.c
bsd/vfs/vfs_lookup.c
bsd/vfs/vfs_subr.c
bsd/vfs/vfs_xattr.c
bsd/vm/vm_unix.c
config/BSDKernel.exports
config/IOKit.exports
config/MasterVersion
config/System6.0.exports
iokit/IOKit/pwr_mgt/IOPM.h
iokit/IOKit/pwr_mgt/RootDomain.h
iokit/Kernel/IOBufferMemoryDescriptor.cpp
iokit/Kernel/IODMACommand.cpp
iokit/Kernel/IODeviceTreeSupport.cpp
iokit/Kernel/IOHibernateIO.cpp
iokit/Kernel/IOKitKernelInternal.h
iokit/Kernel/IOMemoryDescriptor.cpp
iokit/Kernel/IOPMrootDomain.cpp
iokit/Kernel/IOPlatformExpert.cpp
iokit/bsddev/IOKitBSDInit.cpp
iokit/conf/MASTER
iokit/conf/files
kgmacros
libsyscall/Makefile
libsyscall/Makefile.xbs
libsyscall/create-syscalls.pl
libsyscall/custom/SYS.h
libsyscall/mach/Makefile.inc
makedefs/MakeInc.def
osfmk/conf/MASTER.i386
osfmk/conf/MASTER.ppc
osfmk/i386/AT386/model_dep.c
osfmk/i386/acpi.c
osfmk/i386/hpet.c
osfmk/i386/machine_check.c
osfmk/i386/misc_protos.h
osfmk/i386/mp_desc.c
osfmk/i386/pmap.c
osfmk/i386/rtclock.c
osfmk/i386/startup64.c
osfmk/i386/thread.h
osfmk/i386/tsc.c
osfmk/kern/etimer.h
osfmk/kern/locks.c
osfmk/kern/mach_clock.c
osfmk/kern/priority.c
osfmk/kern/sched_prim.c
osfmk/kern/syscall_subr.c
osfmk/kern/thread.h
osfmk/kern/thread_act.c
osfmk/mach/i386/thread_status.h
osfmk/mach/machine.h
osfmk/vm/bsd_vm.c
osfmk/vm/vm_fault.c
osfmk/vm/vm_map.c
osfmk/vm/vm_object.c
osfmk/vm/vm_page.h
osfmk/vm/vm_pageout.c
osfmk/vm/vm_purgeable.c
osfmk/vm/vm_purgeable_internal.h
osfmk/vm/vm_resident.c
osfmk/vm/vm_shared_region.c
security/conf/MASTER
security/conf/MASTER.i386
security/conf/MASTER.ppc
security/conf/Makefile.template
security/conf/files
security/mac_audit.c
security/mac_base.c
tools/tests/xnu_quick_test/tests.c

diff --git a/README b/README
index 9ab5b012d8d964afedfa405ce815f02804ac9cb5..76ea08c3829cb1ebca801ab44a5282f7e852df2e 100644 (file)
--- a/README
+++ b/README
@@ -15,31 +15,27 @@ A. How to build XNU:
 
   By default, architecture defaults to the build machine 
   architecture, and the kernel configuration is set to build for DEVELOPMENT.
-  The machine configuration defaults to MX31ADS for arm and nothing for i386 and ppc.
+  The machine configuration defaults to S5L8900XRB for arm and default for i386 and ppc.
   
   This will also create a bootable image, mach_kernel,  and a kernel binary 
   with symbols, mach_kernel.sys.
-
-  Here are the valid arm machine configs:
-       LN2410SBC MX31ADS INTEGRATORCP S5I3000SMDK S5L8900XFPGA S5L8900XRB
-       OLOCREEK
        
   Examples:
-       /* make a debug kernel for MX31 arm board */
-       make TARGET_CONFIGS="debug arm MX31ADS"
+       /* make a debug kernel for H1 arm board */
+       make TARGET_CONFIGS="debug arm s5l8900xrb"
        
-    $(OBJROOT)/DEBUG_ARM_MX31ADS/osfmk/DEBUG/osfmk.o: pre-linked object for osfmk component
-    $(OBJROOT)/DEBUG_ARM_MX31ADS/mach_kernel: bootable image
+    $(OBJROOT)/DEBUG_ARM_S5L8900XRB/osfmk/DEBUG/osfmk.o: pre-linked object for osfmk component
+    $(OBJROOT)/DEBUG_ARM_S5L8900XRB/mach_kernel: bootable image
 
-       /* make debug and development kernels for MX31 arm board */
-       make TARGET_CONFIGS="debug arm MX31ADS  development arm MX31ADS"
+       /* make debug and development kernels for H1 arm board */
+       make TARGET_CONFIGS="debug arm s5l8900xrb  development arm s5l8900xrb"
        
-    $(OBJROOT)/DEBUG_ARM_MX31ADS/osfmk/DEBUG/osfmk.o: pre-linked object for osfmk component
-    $(OBJROOT)/DEBUG_ARM_MX31ADS/mach_kernel: bootable image
+    $(OBJROOT)/DEBUG_ARM_S5L8900XRB/osfmk/DEBUG/osfmk.o: pre-linked object for osfmk component
+    $(OBJROOT)/DEBUG_ARM_S5L8900XRB/mach_kernel: bootable image
     $(OBJROOT)/DEVELOPMENT_ARM/osfmk/DEVELOPMENT/osfmk.o: pre-linked object for osfmk component
     $(OBJROOT)/DEVELOPMENT_ARM/mach_kernel: bootable image
 
-       /* this is all you need to do to build MX31ADS arm with DEVELOPMENT kernel configuration  */
+       /* this is all you need to do to build H1 arm with DEVELOPMENT kernel configuration  */
        make TARGET_CONFIGS="default arm default"
        
        or the following is equivalent
index 24125e3ce6c5d4522eea6e2dc2cc075787c2dab7..a4504b8a8ad55874f21a96f61f7abcf2da72fb20 100644 (file)
@@ -55,7 +55,7 @@
 #
 #  EMBEDDED_BASE =     [ intel mach bsmall vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug compat_43_tty sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot ]
 #  EMBEDDED_FILESYS =  [ devfs hfs journaling fdesc fifo ]
-#  EMBEDDED_NET =      [ inet compat_oldsock mrouting tcpdrop_synfin bpfilter ipdivert config_mbuf_noexpand dummynet ipfirewall ipfw2 zlib ifnet_input_chk ]
+#  EMBEDDED_NET =      [ inet compat_oldsock mrouting tcpdrop_synfin bpfilter config_mbuf_noexpand ]
 #  EMBEDDED =          [ EMBEDDED_BASE EMBEDDED_NET VPN EMBEDDED_FILESYS libdriver no_printf_str no_kprintf_str no_kdebug ]
 #  DEVELOPMENT =       [ EMBEDDED_BASE EMBEDDED_NET NFS VPN EMBEDDED_FILESYS libdriver netmibs development mach_assert config_dtrace ]
 #
@@ -79,7 +79,8 @@ config                mach_kernel     swap generic                    # <mach>
 options                EVENT                                           # <event>
 
 #
-# Note: MAC options must be set in both bsd/conf and security/conf MASTER files
+# Note: MAC options must be set in all the bsd/conf, osfmk/conf, and 
+# security/conf MASTER files.
 #
 options                CONFIG_MACF                                     # Mandatory Access Control Framework
 options                CONFIG_MACF_SOCKET_SUBSET       # MAC socket subest (no labels)
index 4e1513cad173060534edd3afa65c735dde2b83e5..9f4a08d6d25bd7feb923383a51d2dcf5febffd72 100644 (file)
@@ -69,7 +69,8 @@ options               UXPR            # user-level XPR package        # <uxpr>
 config         mach_kernel     swap generic                    # <mach>
 
 #
-# Note: MAC options must be set in both bsd/conf and security/conf MASTER files
+# Note: MAC options must be set in all the bsd/conf, osfmk/conf, and 
+# security/conf MASTER files.
 #
 options                CONFIG_MACF                     # Mandatory Access Control Framework
 options                CONFIG_MACF_SOCKET_SUBSET       # MAC socket subest (no labels)
index 502307c920728c71c0694d91df66646add471b26..4f927bcbac2a21541d801de980fbee6b99669162 100644 (file)
@@ -67,7 +67,6 @@ OPTIONS/vndevice                      optional vndevice
 OPTIONS/audit                          optional audit
 OPTIONS/config_fse                     optional config_fse
 OPTIONS/sockets                                optional sockets
-OPTIONS/kpidirect                      optional kpidirect
 OPTIONS/development                    optional development
 OPTIONS/sysv_sem                       optional sysv_sem
 OPTIONS/sysv_msg                       optional sysv_msg
index 035150aa799f0a0fd5511737813161d8a06cbd5d..1ef883569ca730ec83c5b8634272a28e6380f015 100644 (file)
@@ -1218,7 +1218,16 @@ dtrace_copyinstr(user_addr_t src, uintptr_t dst, size_t len)
        size_t actual;
        
        if (dtrace_copycheck( src, dst, len )) {
-               if (copyinstr((const user_addr_t)src, (char *)dst, (vm_size_t)len, &actual)) {
+               /*  copyin as many as 'len' bytes. */
+               int error = copyinstr((const user_addr_t)src, (char *)dst, (vm_size_t)len, &actual);
+
+               /*
+                * ENAMETOOLONG is returned when 'len' bytes have been copied in but the NUL terminator was
+                * not encountered. That does not require raising CPU_DTRACE_BADADDR, and we press on.
+                * Note that we do *not* stuff a NUL terminator when returning ENAMETOOLONG, that's left
+                * to the caller.
+                */
+               if (error && error != ENAMETOOLONG) {
                        DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
                        cpu_core[CPU->cpu_id].cpuc_dtrace_illval = src;
                }
@@ -1244,6 +1253,13 @@ dtrace_copyoutstr(uintptr_t src, user_addr_t dst, size_t len)
        size_t actual;
 
        if (dtrace_copycheck( dst, src, len )) {
+
+               /*
+                * ENAMETOOLONG is returned when 'len' bytes have been copied out but the NUL terminator was
+                * not encountered. We raise CPU_DTRACE_BADADDR in that case.
+                * Note that we do *not* stuff a NUL terminator when returning ENAMETOOLONG, that's left
+                * to the caller.
+                */
                if (copyoutstr((const void *)src, dst, (size_t)len, &actual)) {
                        DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
                        cpu_core[CPU->cpu_id].cpuc_dtrace_illval = dst;
index f466c873e5e149e1eaa78a10bf53dc947c34daf4..3c5602be9d45bdb86be9857d67baad26bcddcd4a 100644 (file)
@@ -77,7 +77,7 @@ typedef struct lockstat_probe {
 
 lockstat_probe_t lockstat_probes[] =
 {
-#ifndef        __PPC__
+#ifdef __i386__
        /* Not implemented yet on PPC... */
        { LS_LCK_MTX_LOCK,      LSA_ACQUIRE,    LS_LCK_MTX_LOCK_ACQUIRE, DTRACE_IDNONE },
        { LS_LCK_MTX_LOCK,      LSA_SPIN,       LS_LCK_MTX_LOCK_SPIN, DTRACE_IDNONE },
index 307ad77d9949cead937863ce7f5835fe25c7e0c4..f957be33c52a71e9f8109403b51827479d6fb488 100644 (file)
@@ -172,6 +172,7 @@ int mdevCMajor = -1;
 static int mdevioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p, int is_char);
 dev_t mdevadd(int devid, ppnum_t base, unsigned int size, int phys);
 dev_t mdevlookup(int devid);
+void mdevremoveall(void);
 
 static int mdevclose(__unused dev_t dev, __unused int flags, 
                                          __unused int devtype, __unused struct proc *p) {
@@ -609,3 +610,24 @@ dev_t mdevlookup(int devid) {
        if(!(mdev[devid].mdFlags & mdInited)) return -1;        /* This one hasn't been defined */
        return mdev[devid].mdBDev;                                                      /* Return the device number */
 }
+
+void mdevremoveall(void) {
+
+       int i;
+
+       for(i = 0; i < 16; i++) {
+               if(!(mdev[i].mdFlags & mdInited)) continue;     /* Ignore unused mdevs */
+
+               devfs_remove(mdev[i].mdbdevb);                  /* Remove the block device */
+               devfs_remove(mdev[i].mdcdevb);                  /* Remove the character device */
+
+               mdev[i].mdBase = 0;                             /* Clear the mdev's storage */
+               mdev[i].mdSize = 0;
+               mdev[i].mdSecsize = 0;
+               mdev[i].mdFlags = 0;
+               mdev[i].mdBDev = 0;
+               mdev[i].mdCDev = 0;
+               mdev[i].mdbdevb = 0;
+               mdev[i].mdcdevb = 0;
+       }
+}
index 4a7741e2a78c551c47ebda5d9517d05b8feeed08..9208ff6b69e6cb449fc8c34be2b29e310b2e6c1d 100644 (file)
@@ -99,7 +99,7 @@ typedef BlockWord Block[kBSize];
 
 void add_blocks(Block a, Block b, BlockWord carry);
 void fips_initialize(void);
-void random_block(Block b);
+void random_block(Block b, int addOptional);
 u_int32_t CalculateCRC(u_int8_t* buffer, size_t length);
 
 /*
@@ -194,18 +194,22 @@ u_int32_t CalculateCRC(u_int8_t* buffer, size_t length)
  * get a random block of data per fips 186-2
  */
 void
-random_block(Block b)
+random_block(Block b, int addOptional)
 {
        int repeatCount = 0;
        do
        {
                // do one iteration
-               Block xSeed;
-               prngOutput (gPrngRef, (BYTE*) &xSeed, sizeof (xSeed));
                
-               // add the seed to the previous value of g_xkey
-               add_blocks (g_xkey, xSeed, 0);
-
+               if (addOptional)
+               {
+                       Block xSeed;
+                       prngOutput (gPrngRef, (BYTE*) &xSeed, sizeof (xSeed));
+                       
+                       // add the seed to the previous value of g_xkey
+                       add_blocks (g_xkey, xSeed, 0);
+               }
+               
                // compute "G"
                SHA1Update (&g_sha1_ctx, (const u_int8_t *) &g_xkey, sizeof (g_xkey));
                
@@ -309,11 +313,13 @@ PreliminarySetup(void)
        fips_initialize ();
 }
 
+const Block kKnownAnswer = {0x92b404e5, 0x56588ced, 0x6c1acd4e, 0xbf053f68, 0x9f73a93};
+
 void
 fips_initialize(void)
 {
-       /* Read the initial value of g_xkey from yarrow */
-       prngOutput (gPrngRef, (BYTE*) &g_xkey, sizeof (g_xkey));
+       /* So that we can do the self test, set the seed to zero */
+       memset(&g_xkey, 0, sizeof(g_xkey));
        
        /* initialize our SHA1 generator */
        SHA1Init (&g_sha1_ctx);
@@ -321,7 +327,20 @@ fips_initialize(void)
        /* other initializations */
        memset (zeros, 0, sizeof (zeros));
        g_bytes_used = 0;
-       random_block(g_random_data);
+       random_block(g_random_data, FALSE);
+       
+       // check here to see if we got the initial data we were expecting
+       int i;
+       for (i = 0; i < kBSize; ++i)
+       {
+               if (kKnownAnswer[i] != g_random_data[i])
+               {
+                       panic("FIPS random self test failed");
+               }
+       }
+       
+       // now do the random block again to make sure that userland doesn't get predicatable data
+       random_block(g_random_data, TRUE);
 }
 
 /*
@@ -490,7 +509,7 @@ random_read(__unused dev_t dev, struct uio *uio, __unused int ioflag)
                int bytes_available = kBSizeInBytes - g_bytes_used;
         if (bytes_available == 0)
                {
-                       random_block(g_random_data);
+                       random_block(g_random_data, TRUE);
                        g_bytes_used = 0;
                        bytes_available = kBSizeInBytes;
                }
@@ -533,7 +552,7 @@ read_random(void* buffer, u_int numbytes)
         int bytes_to_read = min(bytes_remaining, kBSizeInBytes - g_bytes_used);
         if (bytes_to_read == 0)
                {
-                       random_block(g_random_data);
+                       random_block(g_random_data, TRUE);
                        g_bytes_used = 0;
                        bytes_to_read = min(bytes_remaining, kBSizeInBytes);
                }
index 0a4953cbd2f6909b687c182af14d26b1ca35faf8..b52a0cd22f30dc3c74cfa9c21ad30b28fa23b541 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -226,6 +226,11 @@ cat_convertattr(
        }
 }
 
+/*
+ * Convert a raw catalog key and record into an in-core catalog descriptor.
+ *
+ * Note: The caller is responsible for releasing the catalog descriptor.
+ */
 __private_extern__
 int
 cat_convertkey(
@@ -286,6 +291,9 @@ cat_releasedesc(struct cat_desc *descp)
 
 /*
  * cat_lookup - lookup a catalog node using a cnode decriptor
+ *
+ * Note: The caller is responsible for releasing the output
+ * catalog descriptor (when supplied outdescp is non-null).
  */
 __private_extern__
 int
@@ -394,6 +402,10 @@ exit:
  * cat_findname - obtain a descriptor from cnid
  *
  * Only a thread lookup is performed.
+ *
+ * Note: The caller is responsible for releasing the output
+ * catalog descriptor (when supplied outdescp is non-null).
+
  */
 __private_extern__
 int
@@ -464,6 +476,9 @@ exit:
 
 /*
  * cat_idlookup - lookup a catalog node using a cnode id
+ *
+ * Note: The caller is responsible for releasing the output
+ * catalog descriptor (when supplied outdescp is non-null).
  */
 __private_extern__
 int
@@ -765,6 +780,9 @@ exit:
  *
  * NOTE: both the catalog file and attribute file locks must
  *       be held before calling this function.
+ *
+ * The caller is responsible for releasing the output
+ * catalog descriptor (when supplied outdescp is non-null).
  */
 __private_extern__
 int
@@ -937,6 +955,9 @@ exit:
  *     3. BTDeleteRecord(from_cnode);
  *     4. BTDeleteRecord(from_thread);
  *     5. BTInsertRecord(to_thread);
+ *
+ * Note: The caller is responsible for releasing the output
+ * catalog descriptor (when supplied out_cdp is non-null).
  */
 __private_extern__
 int 
@@ -1690,6 +1711,7 @@ cat_set_childlinkbit(struct hfsmount *hfsmp, cnid_t cnid)
                if (retval) {
                        hfs_systemfile_unlock(hfsmp, lockflags);
                        hfs_end_transaction(hfsmp);
+                       cat_releasedesc(&desc);
                        break;
                }
 
@@ -1697,6 +1719,7 @@ cat_set_childlinkbit(struct hfsmount *hfsmp, cnid_t cnid)
                hfs_end_transaction(hfsmp);
 
                cnid = desc.cd_parentcnid;
+               cat_releasedesc(&desc);
        }
 
        return retval;
index 1f434da3dc3047abbf3a0dd44d43c360048431dc..c09c058c66cb525e18eb1a882ba198d59165c90e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2002-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -105,6 +105,14 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap)
 
        (void) hfs_lock(cp, HFS_FORCE_LOCK);
 
+       /*
+        * Recycle named streams quickly so that the data fork vnode can
+        * go inactive in a timely manner (so that it can be zero filled
+        * or truncated if needed).
+        */
+       if (vnode_isnamedstream(vp))
+               recycle = 1;
+
        /*
         * We should lock cnode before checking the flags in the 
         * condition below and should unlock the cnode before calling 
@@ -219,9 +227,11 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap)
 
                lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
 
-               if (cp->c_blocks > 0)
-                       printf("hfs_inactive: attempting to delete a non-empty file!");
-
+               if (cp->c_blocks > 0) {
+                       printf("hfs_inactive: deleting non-empty%sfile %d, "
+                              "blks %d\n", VNODE_IS_RSRC(vp) ? " rsrc " : " ",
+                              (int)cp->c_fileid, (int)cp->c_blocks);
+               }
 
                //
                // release the name pointer in the descriptor so that
@@ -270,8 +280,15 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap)
                        hfs_volupdate(hfsmp, (v_type == VDIR) ? VOL_RMDIR : VOL_RMFILE, 0);
        }
 
+       /*
+        * A file may have had delayed allocations, in which case hfs_update
+        * would not have updated the catalog record (cat_update).  We need
+        * to do that now, before we lose our fork data.  We also need to
+        * force the update, or hfs_update will again skip the cat_update.
+        */
        if ((cp->c_flag & C_MODIFIED) ||
            cp->c_touch_acctime || cp->c_touch_chgtime || cp->c_touch_modtime) {
+               cp->c_flag |= C_FORCEUPDATE;
                hfs_update(vp, 0);
        }
 out:
@@ -388,6 +405,35 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap)
        (void) hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
        cp = VTOC(vp);
 
+       /*
+        * Check if a deleted resource fork vnode missed a
+        * VNOP_INACTIVE call and requires truncation.
+        */
+       if (VNODE_IS_RSRC(vp) &&
+           (cp->c_flag & C_DELETED) &&
+           (VTOF(vp)->ff_blocks != 0)) {
+               hfs_unlock(cp);
+               ubc_setsize(vp, 0);
+
+               hfs_lock_truncate(cp, TRUE);
+               (void) hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
+
+               (void) hfs_truncate(vp, (off_t)0, IO_NDELAY, 1, ap->a_context);
+
+               hfs_unlock_truncate(cp, TRUE);
+       }
+       /*
+        * A file may have had delayed allocations, in which case hfs_update
+        * would not have updated the catalog record (cat_update).  We need
+        * to do that now, before we lose our fork data.  We also need to
+        * force the update, or hfs_update will again skip the cat_update.
+        */
+       if ((cp->c_flag & C_MODIFIED) ||
+           cp->c_touch_acctime || cp->c_touch_chgtime || cp->c_touch_modtime) {
+               cp->c_flag |= C_FORCEUPDATE;
+               hfs_update(vp, 0);
+       }
+
        /*
         * Keep track of an inactive hot file.
         */
@@ -742,6 +788,16 @@ hfs_getnewvnode(
        if (cp->c_flag & C_HARDLINK) {
                vnode_setmultipath(vp);
        }
+       /*
+        * Tag resource fork vnodes as needing an VNOP_INACTIVE
+        * so that any deferred removes (open unlinked files)
+        * have the chance to process the resource fork.
+        */
+       if (VNODE_IS_RSRC(vp)) {
+               /* Force VL_NEEDINACTIVE on this vnode */
+               vnode_ref(vp);
+               vnode_rele(vp);
+       }
        hfs_chashwakeup(cp, H_ALLOC | H_ATTACH);
 
        /*
index 65f5e9ee8567ca273d3ca7939ed79442ad613626..a2e08a098713c662ed7d0ecc9e928166f3d567d7 100644 (file)
@@ -1059,13 +1059,14 @@ __private_extern__
 void
 hfs_relorigin(struct cnode *cp, cnid_t parentcnid)
 {
-       linkorigin_t *origin = NULL;
+       linkorigin_t *origin, *prev;
        void * thread = current_thread();
 
-       TAILQ_FOREACH(origin, &cp->c_originlist, lo_link) {
+       TAILQ_FOREACH_SAFE(origin, &cp->c_originlist, lo_link, prev) {
                if ((origin->lo_thread == thread) ||
                    (origin->lo_parentcnid == parentcnid)) {
                        TAILQ_REMOVE(&cp->c_originlist, origin, lo_link);
+                       FREE(origin, M_TEMP);
                        break;
                }
        }
index d5a05045bce0619d5f1b0d943ee6118fefeb2386..c0dc7253d7da460c8887b5b21d5a1c4643eb4167 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 1999-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -2398,6 +2398,7 @@ hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock)
                } else if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
                           (bcmp(nameptr, HFS_DELETE_PREFIX, HFS_DELETE_PREFIX_LEN) == 0)) {
                        *vpp = NULL;
+                       cat_releasedesc(&cndesc);
                        return (ENOENT);  /* open unlinked file */
                }
        }
@@ -3313,6 +3314,12 @@ out:
                VTOC(vp)->c_blocks = fp->ff_blocks;
 
        }
+       /*
+          Regardless of whether or not the totalblocks actually increased,
+          we should reset the allocLimit field. If it changed, it will
+          get updated; if not, it will remain the same.
+       */
+       hfsmp->allocLimit = vcb->totalBlocks;
        hfs_systemfile_unlock(hfsmp, lockflags);
        hfs_end_transaction(hfsmp);
 
@@ -4026,6 +4033,7 @@ hfs_reclaim_journal_file(struct hfsmount *hfsmp, vfs_context_t context)
        journal_fork.cf_extents[0].blockCount = newBlockCount;
        journal_fork.cf_blocks = newBlockCount;
        error = cat_update(hfsmp, &journal_desc, &journal_attr, &journal_fork, NULL);
+       cat_releasedesc(&journal_desc);  /* all done with cat descriptor */
        if (error) {
                printf("hfs_reclaim_journal_file: cat_update returned %d\n", error);
                goto free_fail;
@@ -4140,6 +4148,7 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, vfs_context_t context)
        jib_fork.cf_extents[0].blockCount = 1;
        jib_fork.cf_blocks = 1;
        error = cat_update(hfsmp, &jib_desc, &jib_attr, &jib_fork, NULL);
+       cat_releasedesc(&jib_desc);  /* all done with cat descriptor */
        if (error) {
                printf("hfs_reclaim_journal_info_block: cat_update returned %d\n", error);
                goto fail;
index cac1f5b755faec3ab3757d0243dd65f50bcfa4de..d8350638aa2f170dcadf3d521c8991efb13cc3cb 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -1728,9 +1728,10 @@ hfs_vnop_remove(ap)
 
        hfs_lock_truncate(cp, TRUE);
 
-       if ((error = hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK)))
-               goto out;
-
+       if ((error = hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK))) {
+               hfs_unlock_truncate(cp, TRUE);
+               return (error);
+       }
        error = hfs_removefile(dvp, vp, ap->a_cnp, ap->a_flags, 0, 0);
 
        //
@@ -1748,9 +1749,14 @@ hfs_vnop_remove(ap)
            recycle_rsrc = 1;
        }
 
-       hfs_unlockpair(dcp, cp);
-out:
+       /*
+        * Drop the truncate lock before unlocking the cnode
+        * (which can potentially perform a vnode_put and
+        * recycle the vnode which in turn might require the
+        * truncate lock)
+        */
        hfs_unlock_truncate(cp, TRUE);
+       hfs_unlockpair(dcp, cp);
 
        if (recycle_rsrc && vnode_getwithvid(rvp, rvid) == 0) {
                vnode_recycle(rvp);
@@ -1798,7 +1804,7 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
        int lockflags;
        int error = 0;
        int started_tr = 0;
-       int isbigfile = 0, hasxattrs=0, isdir=0;
+       int isbigfile = 0, defer_remove=0, isdir=0;
 
        cp = VTOC(vp);
        dcp = VTOC(dvp);
@@ -1866,11 +1872,22 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
         * (needed for hfs_truncate)
         */
        if (isdir == 0 && (cp->c_blocks - VTOF(vp)->ff_blocks)) {
-               error = hfs_vgetrsrc(hfsmp, vp, &rvp, FALSE);
-               if (error)
-                       goto out;
-               /* Defer the vnode_put on rvp until the hfs_unlock(). */
-               cp->c_flag |= C_NEED_RVNODE_PUT;
+               /*
+                * We must avoid calling hfs_vgetrsrc() when we have
+                * an active resource fork vnode to avoid deadlocks
+                * when that vnode is in the VL_TERMINATE state. We
+                * can defer removing the file and its resource fork
+                * until the call to hfs_vnop_inactive() occurs.
+                */
+               if (cp->c_rsrc_vp) {
+                       defer_remove = 1;
+               } else {
+                       error = hfs_vgetrsrc(hfsmp, vp, &rvp, FALSE);
+                       if (error)
+                               goto out;
+                       /* Defer the vnode_put on rvp until the hfs_unlock(). */
+                       cp->c_flag |= C_NEED_RVNODE_PUT;
+               }
        }
        /* Check if this file is being used. */
        if (isdir == 0) {
@@ -1887,7 +1904,7 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
           individual transactions in case there are too many */
        if ((hfsmp->hfs_attribute_vp != NULL) &&
            (cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0) {
-           hasxattrs = 1;
+           defer_remove = 1;
        }
 
        /*
@@ -1976,10 +1993,10 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
 
        /*
         * There are two cases to consider:
-        *  1. File is busy/big   ==> move/rename the file
+        *  1. File is busy/big/defer_remove ==> move/rename the file
         *  2. File is not in use ==> remove the file
         */
-       if (dataforkbusy || rsrcforkbusy || isbigfile || hasxattrs) {
+       if (dataforkbusy || rsrcforkbusy || isbigfile || defer_remove) {
                char delname[32];
                struct cat_desc to_desc;
                struct cat_desc todir_desc;
@@ -3191,6 +3208,7 @@ hfs_update(struct vnode *vp, __unused int waitfor)
        struct cat_fork *dataforkp = NULL;
        struct cat_fork *rsrcforkp = NULL;
        struct cat_fork datafork;
+       struct cat_fork rsrcfork;
        struct hfsmount *hfsmp;
        int lockflags;
        int error;
@@ -3272,6 +3290,18 @@ hfs_update(struct vnode *vp, __unused int waitfor)
                dataforkp = &datafork;
        }
 
+       /*
+        * For resource forks with delayed allocations, make sure
+        * the block count and file size match the number of blocks
+        * actually allocated to the file on disk.
+        */
+       if (rsrcforkp && (cp->c_rsrcfork->ff_unallocblocks != 0)) {
+               bcopy(rsrcforkp, &rsrcfork, sizeof(rsrcfork));
+               rsrcfork.cf_blocks = (cp->c_rsrcfork->ff_blocks - cp->c_rsrcfork->ff_unallocblocks);
+               rsrcfork.cf_size   = rsrcfork.cf_blocks * HFSTOVCB(hfsmp)->blockSize;
+               rsrcforkp = &rsrcfork;
+       }
+
        /*
         * Lock the Catalog b-tree file.
         */
@@ -3585,6 +3615,7 @@ hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, int
        int error;
        int vid;
 
+restart:
        /* Attempt to use exising vnode */
        if ((rvp = cp->c_rsrc_vp)) {
                vid = vnode_vid(rvp);
@@ -3607,15 +3638,22 @@ hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, int
 
                error = vnode_getwithvid(rvp, vid);
 
-               if (can_drop_lock)
+               if (can_drop_lock) {
                        (void) hfs_lock(cp, HFS_FORCE_LOCK);
-
+                       /*
+                        * When our lock was relinquished, the resource fork
+                        * could have been recycled.  Check for this and try
+                        * again.
+                        */
+                       if (error == ENOENT)
+                               goto restart;
+               }
                if (error) {
                        const char * name = (const char *)VTOC(vp)->c_desc.cd_nameptr;
 
                        if (name)
-                               printf("hfs_vgetrsrc: couldn't get"
-                                       " resource fork for %s\n", name);
+                               printf("hfs_vgetrsrc: couldn't get resource"
+                                      " fork for %s, err %d\n", name, error);
                        return (error);
                }
        } else {
index 37dca768bd6a6a8c3905882e7f0bdaf7ff312fe0..d025ae1cf5d6fe97ff17088e03504db0071fac5a 100644 (file)
@@ -1504,6 +1504,8 @@ hfs_removeallattr(struct hfsmount *hfsmp, u_int32_t fileid)
 #endif
                hfs_systemfile_unlock(hfsmp, lockflags);
                hfs_end_transaction(hfsmp);
+               if (result)
+                       break;
        }
 exit:
        FREE(iterator, M_TEMP);
index 2ea3d6377226591794b60cfaf3087f4332696e42..2a04d688caec3fcb294839c60271d9a6ebafd3b8 100644 (file)
@@ -569,6 +569,7 @@ bsd_init(void)
        /*
         * Initialize the calendar.
         */
+       bsd_init_kprintf("calling IOKitInitializeTime\n");
        IOKitInitializeTime();
 
        if (turn_on_log_leaks && !new_nkdbufs)
@@ -1031,7 +1032,9 @@ parse_bsd_args(void)
        if (PE_parse_boot_arg("nbuf", &max_nbuf_headers)) {
                customnbuf = 1;
        }
+#if !defined(SECURE_KERNEL)
        PE_parse_boot_arg("kmem", &setup_kmem);
+#endif
        PE_parse_boot_arg("trace", &new_nkdbufs);
 
        if (PE_parse_boot_arg("msgbuf", &msgbuf)) {
index ded4a1dcf2c1aa4e532f2102101be9602a681bf7..6b2702d7b63e0b9475ccd52f5b2d6b79ba6c394d 100644 (file)
@@ -1548,6 +1548,7 @@ int
 posix_spawn(proc_t ap, struct posix_spawn_args *uap, register_t *retval)
 {
        proc_t p = ap;          /* quiet bogus GCC vfork() warning */
+       user_addr_t pid = uap->pid;
        register_t ival[2];             /* dummy retval for vfork() */
        struct image_params image_params, *imgp;
        struct vnode_attr va;
@@ -1809,8 +1810,8 @@ bad:
                 *
                 * If the parent wants the pid, copy it out
                 */
-               if (uap->pid != USER_ADDR_NULL)
-                       (void)suword(uap->pid, p->p_pid);
+               if (pid != USER_ADDR_NULL)
+                       (void)suword(pid, p->p_pid);
                retval[0] = error;
                /*
                 * Override inherited code signing flags with the
index 7bc8b1d74c19a74a3baff3416e175cd25c734d39..27f98defbaf6af68a9d6e0c667af0f4ba0ab235c 100644 (file)
@@ -246,8 +246,7 @@ exit1(proc_t p, int rv, int *retval)
                }
                sig_lock_to_exit(p);
        }
-#if !CONFIG_EMBEDDED /* BER_XXX */
-       if (p->p_pid == 1) {
+       if (p == initproc) {
                proc_unlock(p);
                printf("pid 1 exited (signal %d, exit %d)",
                    WTERMSIG(rv), WEXITSTATUS(rv));
@@ -257,7 +256,6 @@ exit1(proc_t p, int rv, int *retval)
                                                                "launchd"),
                                                        init_task_failure_data);
        }
-#endif
 
        p->p_lflag |= P_LEXIT;
        p->p_xstat = rv;
index 029fddc8ed35aa02a36835f52d7d611b5660b241..27f0e09066f220aa3a25dbdeabf1317a372f7899 100644 (file)
@@ -2415,23 +2415,26 @@ static int
 sysctl_nx
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
+#ifdef SECURE_KERNEL
+       return ENOTSUP;
+#endif
        int new_value, changed;
        int error;
 
        error = sysctl_io_number(req, nx_enabled, sizeof(nx_enabled), &new_value, &changed);
-    if (error)
-        return error;
+       if (error)
+               return error;
 
-    if (changed) {
+       if (changed) {
 #ifdef __i386__
                /*
                 * Only allow setting if NX is supported on the chip
                 */
                if (!(cpuid_extfeatures() & CPUID_EXTFEATURE_XD))
-            return ENOTSUP;
+                       return ENOTSUP;
 #endif
-        nx_enabled = new_value;
-    }
+               nx_enabled = new_value;
+       }
        return(error);
 }
 
index 377bb2e849ad36f8fa30346408bad99e56a02ef6..cefe3047326b8b6be6f5173cf816845d714c0312 100644 (file)
@@ -298,10 +298,19 @@ sflt_detach_private(
        if (!unregistering) {
                if ((entry->sfe_flags & SFEF_UNREGISTERING) != 0) {
                        /*
-                        * Another thread is unregistering the filter, we need to
-                        * avoid detaching the filter here so the socket won't go
-                        * away.
+                        * Another thread is unregistering the filter, we
+                        * need to avoid detaching the filter here so the
+                        * socket won't go away.  Bump up the socket's
+                        * usecount so that it won't be freed until after
+                        * the filter unregistration has been completed;
+                        * at this point the caller has already held the
+                        * socket's lock, so we can directly modify the
+                        * usecount.
                         */
+                       if (!(entry->sfe_flags & SFEF_DETACHXREF)) {
+                               entry->sfe_socket->so_usecount++;
+                               entry->sfe_flags |= SFEF_DETACHXREF;
+                       }
                        lck_mtx_unlock(sock_filter_lock);
                        return;
                }
@@ -322,9 +331,14 @@ sflt_detach_private(
        else {
                /*
                 * Clear the removing flag. We will perform the detach here or
-                * request a delayed deatch.
+                * request a delayed detach.  Since we do an extra ref release
+                * below, bump up the usecount if we haven't done so.
                 */
                entry->sfe_flags &= ~SFEF_UNREGISTERING;
+               if (!(entry->sfe_flags & SFEF_DETACHXREF)) {
+                       entry->sfe_socket->so_usecount++;
+                       entry->sfe_flags |= SFEF_DETACHXREF;
+               }
        }
 
        if (entry->sfe_socket->so_filteruse != 0) {
@@ -510,10 +524,22 @@ sflt_unregister(
                filter->sf_flags |= SFF_DETACHING;
        
                for (next_entry = entry_head; next_entry;
-                        next_entry = next_entry->sfe_next_onfilter) {
-                       socket_lock(next_entry->sfe_socket, 1);
+                   next_entry = next_entry->sfe_next_onfilter) {
+                       /*
+                        * Mark this as "unregistering"; upon dropping the
+                        * lock, another thread may win the race and attempt
+                        * to detach a socket from it (e.g. as part of close)
+                        * before we get a chance to detach.  Setting this
+                        * flag practically tells the other thread to go away.
+                        * If the other thread wins, this causes an extra
+                        * reference hold on the socket so that it won't be
+                        * deallocated until after we finish with the detach
+                        * for it below.  If we win the race, the extra
+                        * reference hold is also taken to compensate for the
+                        * extra reference release when detach is called
+                        * with a "1" for its second parameter.
+                        */
                        next_entry->sfe_flags |= SFEF_UNREGISTERING;
-                       socket_unlock(next_entry->sfe_socket, 0);       /* Radar 4201550: prevents the socket from being deleted while being unregistered */
                }
        }
        
index 980c1ad892c9ff2c1d243347d3a116ff84a7bc74..9ccbc9a9e50a61d1829d32214f69d58aff844835 100644 (file)
@@ -159,7 +159,7 @@ void _pthread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), voi
 #define PTHREAD_START_SETSCHED 0x02000000
 #define PTHREAD_START_DETACHED 0x04000000
 #define PTHREAD_START_POLICY_BITSHIFT 16
-#define PTHREAD_START_POLICY_MASK 0xffff
+#define PTHREAD_START_POLICY_MASK 0xff
 #define PTHREAD_START_IMPORTANCE_MASK 0xffff
 
 #define SCHED_OTHER      POLICY_TIMESHARE
@@ -958,7 +958,8 @@ bsdthread_create(__unused struct proc *p, struct bsdthread_create_args  *uap, us
                        extinfo.timeshare = 0;
                thread_policy_set(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extinfo, THREAD_EXTENDED_POLICY_COUNT);
 
-               precedinfo.importance = importance;
+#define BASEPRI_DEFAULT 31
+               precedinfo.importance = (importance - BASEPRI_DEFAULT);
                thread_policy_set(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT);
        }
 
index 0fe948aaed433cfcc7f547d929b912e7e3e31d09..509468087c76eb31fcbfd922eb3c29a9e1d3c256 100644 (file)
@@ -224,7 +224,7 @@ pread_nocancel(struct proc *p, struct pread_nocancel_args *uap, user_ssize_t *re
        int error;
 
        if ( (error = preparefileread(p, &fp, fd, 1)) )
-               return (error);
+               goto out;
 
        error = dofileread(vfs_context_current(), fp, uap->buf, uap->nbyte,
                        uap->offset, FOF_OFFSET, retval);
@@ -234,7 +234,8 @@ pread_nocancel(struct proc *p, struct pread_nocancel_args *uap, user_ssize_t *re
        if (!error)
            KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE),
              uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
-       
+
+out:
        return (error);
 }
 
index e178c29b478e8cb7b341bcaecdb0ac23df4b11a7..7b259ec9f657e36a049d8383732bf7f5118088c7 100644 (file)
@@ -837,8 +837,12 @@ soclose_wait_locked(struct socket *so)
                mutex_held = so->so_proto->pr_domain->dom_mtx;
        lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
 
-       /* Double check here and return if there's no outstanding upcall */
-       if (!(so->so_flags & SOF_UPCALLINUSE))
+       /*
+        * Double check here and return if there's no outstanding upcall;
+        * otherwise proceed further only if SOF_UPCALLCLOSEWAIT is set.
+        */
+       if (!(so->so_flags & SOF_UPCALLINUSE) ||
+           !(so->so_flags & SOF_UPCALLCLOSEWAIT))
                return;
 
        so->so_flags |= SOF_CLOSEWAIT;
@@ -3195,6 +3199,19 @@ sosetopt(struct socket *so, struct sockopt *sopt)
 #endif /* MAC_SOCKET */
                        break;
 
+#ifdef __APPLE_API_PRIVATE
+               case SO_UPCALLCLOSEWAIT:
+                       error = sooptcopyin(sopt, &optval, sizeof (optval),
+                           sizeof (optval));
+                       if (error)
+                               goto bad;
+                       if (optval)
+                               so->so_flags |= SOF_UPCALLCLOSEWAIT;
+                       else
+                               so->so_flags &= ~SOF_UPCALLCLOSEWAIT;
+                       break;
+#endif
+
                default:
                        error = ENOPROTOOPT;
                        break;
@@ -3463,6 +3480,12 @@ integer:
 #endif /* MAC_SOCKET */
                        break;
 
+#ifdef __APPLE_API_PRIVATE
+               case SO_UPCALLCLOSEWAIT:
+                       optval = (so->so_flags & SOF_UPCALLCLOSEWAIT);
+                       goto integer;
+#endif
+
                default:
                        error = ENOPROTOOPT;
                        break;
index 379b9afd616bcdabe1e2ddd5905b78c8226cfec6..41a606ca39fa35818172d8411a845b334cbd7452 100644 (file)
@@ -843,6 +843,7 @@ sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
                sb->sb_mb = m0;
        }
        sb->sb_lastrecord = m0;
+       sb->sb_mbtail = m0;
 
        m = m0->m_next;
        m0->m_next = 0;
index 1126e7955d59b249dfa1ffe7bc9142ac5edcd244..7e9cafa358223b23584c9c65562a642a216aff8e 100644 (file)
@@ -137,9 +137,9 @@ static int sendit(struct proc *, int, struct user_msghdr *, uio_t, int,
 static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
     register_t *);
 static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
-    size_t);
+    size_t, boolean_t);
 static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
-    user_addr_t, size_t);
+    user_addr_t, size_t, boolean_t);
 #if SENDFILE
 static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
     boolean_t);
@@ -251,9 +251,9 @@ bind(__unused proc_t p, struct bind_args *uap, __unused register_t *retval)
                goto out;
        }
        if (uap->namelen > sizeof (ss)) {
-               error = getsockaddr(so, &sa, uap->name, uap->namelen);
+               error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
        } else {
-               error = getsockaddr_s(so, &ss, uap->name, uap->namelen);
+               error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
                if (error == 0) {
                        sa = (struct sockaddr *)&ss;
                        want_free = FALSE;
@@ -595,6 +595,7 @@ connect_nocancel(__unused proc_t p, struct connect_nocancel_args *uap, __unused
        boolean_t want_free = TRUE;
        int error;
        int fd = uap->s;
+       boolean_t dgram;
 
        AUDIT_ARG(fd, uap->s);
        error = file_socket(fd, &so);
@@ -605,11 +606,17 @@ connect_nocancel(__unused proc_t p, struct connect_nocancel_args *uap, __unused
                goto out;
        }
 
+       /*
+        * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
+        * if this is a datagram socket; translate for other types.
+        */
+       dgram = (so->so_type == SOCK_DGRAM);
+
        /* Get socket address now before we obtain socket lock */
        if (uap->namelen > sizeof (ss)) {
-               error = getsockaddr(so, &sa, uap->name, uap->namelen);
+               error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
        } else {
-               error = getsockaddr_s(so, &ss, uap->name, uap->namelen);
+               error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
                if (error == 0) {
                        sa = (struct sockaddr *)&ss;
                        want_free = FALSE;
@@ -827,10 +834,10 @@ sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
        if (mp->msg_name != USER_ADDR_NULL) {
                if (mp->msg_namelen > sizeof (ss)) {
                        error = getsockaddr(so, &to, mp->msg_name,
-                           mp->msg_namelen);
+                           mp->msg_namelen, TRUE);
                } else {
                        error = getsockaddr_s(so, &ss, mp->msg_name,
-                           mp->msg_namelen);
+                           mp->msg_namelen, TRUE);
                        if (error == 0) {
                                to = (struct sockaddr *)&ss;
                                want_free = FALSE;
@@ -1840,7 +1847,7 @@ sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type)
  */
 static int
 getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
-    size_t len)
+    size_t len, boolean_t translate_unspec)
 {
        struct sockaddr *sa;
        int error;
@@ -1865,7 +1872,7 @@ getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
                 * sockets we leave it unchanged and let the lower layer
                 * handle it.
                 */
-               if (sa->sa_family == AF_UNSPEC &&
+               if (translate_unspec && sa->sa_family == AF_UNSPEC &&
                    INP_CHECK_SOCKAF(so, AF_INET) &&
                    len == sizeof (struct sockaddr_in))
                        sa->sa_family = AF_INET;
@@ -1878,7 +1885,7 @@ getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
 
 static int
 getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
-    user_addr_t uaddr, size_t len)
+    user_addr_t uaddr, size_t len, boolean_t translate_unspec)
 {
        int error;
 
@@ -1902,7 +1909,7 @@ getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
                 * sockets we leave it unchanged and let the lower layer
                 * handle it.
                 */
-               if (ss->ss_family == AF_UNSPEC &&
+               if (translate_unspec && ss->ss_family == AF_UNSPEC &&
                    INP_CHECK_SOCKAF(so, AF_INET) &&
                    len == sizeof (struct sockaddr_in))
                        ss->ss_family = AF_INET;
index 47690269a4562fcb87f9ad58f831080cd12d50c9..e3b16f486462417c600feacbe27bf90bab241be2 100644 (file)
@@ -310,6 +310,11 @@ dlil_write_end(void)
 static int
 proto_hash_value(u_long protocol_family)
 {
+       /*
+        * dlil_proto_unplumb_all() depends on the mapping between
+        * the hash bucket index and the protocol family defined
+        * here; future changes must be applied there as well.
+        */
        switch(protocol_family) {
                case PF_INET:
                        return 0;
@@ -2852,3 +2857,24 @@ dlil_if_release(
                ifnet_lock_done(ifp);
     
 }
+
+__private_extern__ void
+dlil_proto_unplumb_all(struct ifnet *ifp)
+{
+       /*
+        * if_proto_hash[0-3] are for PF_INET, PF_INET6, PF_APPLETALK
+        * and PF_VLAN, where each bucket contains exactly one entry;
+        * PF_VLAN does not need an explicit unplumb.
+        *
+        * if_proto_hash[4] is for other protocols; we expect anything
+        * in this bucket to respond to the DETACHING event (which would
+        * have happened by now) and do the unplumb then.
+        */
+       (void) proto_unplumb(PF_INET, ifp);
+#if INET6
+       (void) proto_unplumb(PF_INET6, ifp);
+#endif /* INET6 */
+#if NETAT
+       (void) proto_unplumb(PF_APPLETALK, ifp);
+#endif /* NETAT */
+}
index 3f19f7108871ea6ec1b5926f2883da9d695a0455..6e3872b799f5700eff4745d30a87418d8f3020a1 100644 (file)
@@ -161,6 +161,7 @@ int dlil_attach_filter(ifnet_t ifp, const struct iff_filter *if_filter,
                                           interface_filter_t *filter_ref);
 void dlil_detach_filter(interface_filter_t filter);
 int dlil_detach_protocol(ifnet_t ifp, u_long protocol);
+extern void dlil_proto_unplumb_all(ifnet_t);
 
 #endif /* BSD_KERNEL_PRIVATE */
 
index 04b3cadf6087de50ae88a6fc63a0d8008520a6e4..499b4790c617e349e9d1f6471fdc7378c35fc832 100644 (file)
@@ -2048,13 +2048,14 @@ if_down_all(void)
        u_int32_t       count;
        u_int32_t       i;
 
-       if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp, &count) != 0) {
+       if (ifnet_list_get_all(IFNET_FAMILY_ANY, &ifp, &count) == 0) {
                for (i = 0; i < count; i++) {
                        if_down(ifp[i]);
+                       dlil_proto_unplumb_all(ifp[i]);
                }
                ifnet_list_free(ifp);
        }
-       
+
        return 0;
 }
 
index b7b98dd00923a8eef384a94aaa1e1c5a5d1b8e1c..6215515a3d64ca695798f3afdea3dd38983c0241 100644 (file)
@@ -48,8 +48,9 @@
 
 struct socket_filter;
 
-#define        SFEF_DETACHUSEZERO      0x1     // Detach when use reaches zero
-#define        SFEF_UNREGISTERING      0x2     // Remove due to unregister
+#define        SFEF_DETACHUSEZERO      0x1     /* Detach when use reaches zero */
+#define        SFEF_UNREGISTERING      0x2     /* Remove due to unregister */
+#define        SFEF_DETACHXREF         0x4     /* Extra reference held for detach */
 
 struct socket_filter_entry {
        struct socket_filter_entry      *sfe_next_onsocket;
index 1878cde4618715121c256234054a0c566fa5467d..d9dfca3f3eca4e48e740005575d3252a06dab467 100644 (file)
@@ -56,6 +56,9 @@
 extern struct dlil_threading_info *dlil_lo_thread_ptr;
 extern int dlil_multithreaded_input;
 
+static errno_t
+ifnet_list_get_common(ifnet_family_t, boolean_t, ifnet_t **, u_int32_t *);
+
 /*
        Temporary work around until we have real reference counting
        
@@ -1084,42 +1087,55 @@ ifnet_find_by_name(
 }
 
 errno_t
-ifnet_list_get(
-       ifnet_family_t family,
-       ifnet_t **list,
-       u_int32_t *count)
+ifnet_list_get(ifnet_family_t family, ifnet_t **list, u_int32_t *count)
+{
+       return (ifnet_list_get_common(family, FALSE, list, count));
+}
+
+__private_extern__ errno_t
+ifnet_list_get_all(ifnet_family_t family, ifnet_t **list, u_int32_t *count)
+{
+       return (ifnet_list_get_common(family, TRUE, list, count));
+}
+
+static errno_t
+ifnet_list_get_common(ifnet_family_t family, boolean_t get_all, ifnet_t **list,
+    u_int32_t *count)
 {
        struct ifnet *ifp;
        u_int32_t cmax = 0;
        *count = 0;
        errno_t result = 0;
-       
-       if (list == NULL || count == NULL) return EINVAL;
-       
+
+       if (list == NULL || count == NULL)
+               return (EINVAL);
+
        ifnet_head_lock_shared();
-       TAILQ_FOREACH(ifp, &ifnet, if_link)
-       {
-               if (ifp->if_eflags & IFEF_DETACHING) continue;
-               if (family == 0 || ifp->if_family == family)
+       TAILQ_FOREACH(ifp, &ifnet, if_link) {
+               if ((ifp->if_eflags & IFEF_DETACHING) && !get_all)
+                       continue;
+               if (family == IFNET_FAMILY_ANY || ifp->if_family == family)
                        cmax++;
        }
-       
+
        if (cmax == 0)
                result = ENXIO;
-       
+
        if (result == 0) {
-               MALLOC(*list, ifnet_t*, sizeof(ifnet_t) * (cmax + 1), M_TEMP, M_NOWAIT);
+               MALLOC(*list, ifnet_t*, sizeof(ifnet_t) * (cmax + 1),
+                   M_TEMP, M_NOWAIT);
                if (*list == NULL)
                        result = ENOMEM;
        }
 
        if (result == 0) {
-               TAILQ_FOREACH(ifp, &ifnet, if_link)
-               {
-                       if (ifp->if_eflags & IFEF_DETACHING) continue;
-                       if (*count + 1 > cmax) break;
-                       if (family == 0 || ((ifnet_family_t)ifp->if_family) == family)
-                       {
+               TAILQ_FOREACH(ifp, &ifnet, if_link) {
+                       if ((ifp->if_eflags & IFEF_DETACHING) && !get_all)
+                               continue;
+                       if (*count + 1 > cmax)
+                               break;
+                       if (family == IFNET_FAMILY_ANY ||
+                           ((ifnet_family_t)ifp->if_family) == family) {
                                (*list)[*count] = (ifnet_t)ifp;
                                ifnet_reference((*list)[*count]);
                                (*count)++;
@@ -1128,23 +1144,22 @@ ifnet_list_get(
                (*list)[*count] = NULL;
        }
        ifnet_head_done();
-       
-       return 0;
+
+       return (result);
 }
 
 void
-ifnet_list_free(
-       ifnet_t *interfaces)
+ifnet_list_free(ifnet_t *interfaces)
 {
        int i;
-       
-       if (interfaces == NULL) return;
-       
-       for (i = 0; interfaces[i]; i++)
-       {
+
+       if (interfaces == NULL)
+               return;
+
+       for (i = 0; interfaces[i]; i++) {
                ifnet_release(interfaces[i]);
        }
-       
+
        FREE(interfaces, M_TEMP);
 }
 
index 8a0cd2b7c5151479ac8391762d4b6d59f40e485e..dd3101b4ad3481e94c962ea9798379742309f6da 100644 (file)
@@ -1505,6 +1505,25 @@ errno_t ifnet_find_by_name(const char *ifname, ifnet_t *interface);
  */
 errno_t ifnet_list_get(ifnet_family_t family, ifnet_t **interfaces, u_int32_t *count);
 
+#ifdef KERNEL_PRIVATE
+/*!
+       @function ifnet_list_get_all
+       @discussion Get a list of attached interfaces. List will be set to
+               point to an array allocated by ifnet_list_get. The interfaces
+               are refcounted and the counts will be incremented before the
+               function returns. The list of interfaces must be freed using
+               ifnet_list_free.  This is similar to ifnet_list_get, except
+               that it includes interfaces that are detaching.
+       @param family The interface family (i.e. IFNET_FAMILY_ETHERNET). To
+               find interfaces of all families, use IFNET_FAMILY_ANY.
+       @param interfaces A pointer to an array of interface references.
+       @param count A pointer that will be filled in with the number of
+               matching interfaces in the array.
+       @result 0 on success otherwise the errno error.
+ */
+errno_t ifnet_list_get_all(ifnet_family_t family, ifnet_t **interfaces, u_int32_t *count);
+#endif /* KERNEL_PRIVATE */
+
 /*!
        @function ifnet_list_free
        @discussion Free a list of interfaces returned by ifnet_list_get.
index 8b3614a49a365da5fea3585bc190266a31381e3f..9b63ec84032abc2051c5d2963f8ca28eb7a3a0bb 100644 (file)
@@ -266,6 +266,7 @@ proto_input_run(void)
                                        }
                                }
                                if (locked) {
+                                       locked = 0;
                                        lck_mtx_unlock(entry->domain->dom_mtx);
                                }       
                }
index ffd62033f8be663e111509e365154bbe9d6f8509..e00ce3eaa10976ab53cd3922a2438c7cfd8c9ec8 100644 (file)
@@ -382,7 +382,7 @@ rtfree_locked(struct rtentry *rt)
         * close routine typically issues RTM_DELETE which clears the RTF_UP
         * flag on the entry so that the code below reclaims the storage.
         */
-       if (rnh->rnh_close && rt->rt_refcnt == 0)
+       if (rnh && rnh->rnh_close && rt->rt_refcnt == 0)
                rnh->rnh_close((struct radix_node *)rt, rnh);
 
        /*
index 961549d608bbb7e942c937bfd4f73d45d75c5079..1889c7125d3608d3091061c33734a908ab8f45f2 100644 (file)
@@ -110,7 +110,7 @@ static MALLOC_DEFINE(M_IGMP, "igmp", "igmp state");
 #endif
 
 static struct router_info *
-               find_rti(struct ifnet *ifp);
+               find_rti(struct ifnet *ifp, int wait);
 
 static struct igmpstat igmpstat;
 
@@ -155,7 +155,7 @@ igmp_init(void)
 
 static struct router_info *
 find_rti(
-       struct ifnet *ifp)
+       struct ifnet *ifp, int wait)
 {
        struct router_info *rti = Head;
        
@@ -173,7 +173,7 @@ find_rti(
                rti = rti->rti_next;
        }
        
-       MALLOC(rti, struct router_info *, sizeof *rti, M_IGMP, M_NOWAIT);
+       MALLOC(rti, struct router_info *, sizeof *rti, M_IGMP, wait);
        if (rti != NULL)
        {
                rti->rti_ifp = ifp;
@@ -243,7 +243,7 @@ igmp_input(
        timer = igmp->igmp_code * PR_FASTHZ / IGMP_TIMER_SCALE;
        if (timer == 0)
                timer = 1;
-       rti = find_rti(ifp);
+       rti = find_rti(ifp, M_NOWAIT);
        if (rti == NULL) {
                m_freem(m);
                return;
@@ -398,7 +398,7 @@ igmp_joingroup(struct in_multi *inm)
                inm->inm_timer = 0;
                inm->inm_state = IGMP_OTHERMEMBER;
        } else {
-               inm->inm_rti = find_rti(inm->inm_ifp);
+               inm->inm_rti = find_rti(inm->inm_ifp, M_WAITOK);
                if (inm->inm_rti == NULL) return ENOMEM;
                igmp_sendpkt(inm, inm->inm_rti->rti_type, 0);
                inm->inm_timer = IGMP_RANDOM_DELAY(
@@ -438,7 +438,7 @@ igmp_fasttimo(void)
        while (inm != NULL) {
                if (inm->inm_timer == 0) {
                        /* do nothing */
-               } else if (--inm->inm_timer == 0) {
+               } else if ((--inm->inm_timer == 0) && (inm->inm_rti != NULL)) {
                        igmp_sendpkt(inm, inm->inm_rti->rti_type, 0);
                        inm->inm_state = IGMP_IREPORTEDLAST;
                } else {
index 8ed004c8244c780c6aba7ab139556f122b6089b0..7f23a9e6a6168eb1020c76fa158aa36f6cbd5a32 100644 (file)
@@ -440,7 +440,7 @@ struct ip_opts {
 
 #define        IP_TRAFFIC_MGT_BACKGROUND       65   /* int*; get background IO flags; set background IO */
 
-#if CONFIG_FORCE_OUT_IFP
+#ifdef PRIVATE
 /* This is a hack, this is only a hack. */
 #define        IP_FORCE_OUT_IFP        69      /* char ifname[] - send traffic on this interface */
 #endif
index cd1514ffd7769ef76599b2ed2f526bad8b64fd76..1e36b65a9f70a4006a7527641036faecd9fe5015 100644 (file)
@@ -432,6 +432,7 @@ struct _ipfw_dyn_rule {
  * Main firewall chains definitions and global var's definitions.
  */
 #ifdef KERNEL
+#if IPFIREWALL
 
 #define        IP_FW_PORT_DYNT_FLAG    0x10000
 #define        IP_FW_PORT_TEE_FLAG     0x20000
@@ -457,6 +458,7 @@ struct ip_fw_args {
        u_int16_t       divert_rule;    /* divert cookie                */
        u_int32_t       retval;
 };
+//struct ip_fw_args;
 
 /*
  * Function definitions.
@@ -476,6 +478,7 @@ extern ip_fw_ctl_t *ip_fw_ctl_ptr;
 extern int fw_one_pass;
 extern int fw_enable;
 #define        IPFW_LOADED     (ip_fw_chk_ptr != NULL)
+#endif /* IPFIREWALL */
 #endif /* KERNEL */
 
 #endif /* !__LP64__ */
index 225164fd6bbb60558abce9a3630f9e7c67ef76b9..8743d9178d9f31d484c35d3ae619d1c513c5c406 100644 (file)
@@ -258,6 +258,7 @@ SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
 
 
 /* Firewall hooks */
+#if IPFIREWALL
 ip_fw_chk_t *ip_fw_chk_ptr;
 int fw_enable = 1;
 int fw_bypass = 1;
@@ -268,6 +269,7 @@ ip_dn_io_t *ip_dn_io_ptr;
 #endif
 
 int (*fr_checkp)(struct ip *, int, struct ifnet *, int, struct mbuf **) = NULL;
+#endif /* IPFIREWALL */
 
 SYSCTL_NODE(_net_inet_ip, OID_AUTO, linklocal, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "link local");
 
@@ -531,7 +533,9 @@ ip_input(struct mbuf *m)
        u_short sum;
        struct in_addr pkt_dst;
        u_int32_t div_info = 0;         /* packet divert/tee info */
+#if IPFIREWALL
        struct ip_fw_args args;
+#endif
        ipfilter_t inject_filter_ref = 0;
        struct m_tag    *tag;
        struct route    ipforward_rt;
@@ -557,6 +561,7 @@ ip_input(struct mbuf *m)
        }
 #endif /* DUMMYNET */
 
+#if IPDIVERT
        if ((tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DIVERT, NULL)) != NULL) {
                struct divert_tag       *div_tag;
                
@@ -565,6 +570,8 @@ ip_input(struct mbuf *m)
 
                m_tag_delete(m, tag);
        }
+#endif
+
        if ((tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) {
                struct ip_fwd_tag       *ipfwd_tag;
                
@@ -815,7 +822,11 @@ pass:
         * to be sent and the original packet to be freed).
         */
        ip_nhops = 0;           /* for source routed packets */
+#if IPFIREWALL
        if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, args.next_hop, &ipforward_rt)) {
+#else
+       if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, NULL, &ipforward_rt)) {
+#endif
                return;
        }
 
@@ -842,8 +853,12 @@ pass:
         * Cache the destination address of the packet; this may be
         * changed by use of 'ipfw fwd'.
         */
+#if IPFIREWALL
        pkt_dst = args.next_hop == NULL ?
            ip->ip_dst : args.next_hop->sin_addr;
+#else
+       pkt_dst = ip->ip_dst;
+#endif
 
        /*
         * Enable a consistency check between the destination address
@@ -860,8 +875,12 @@ pass:
         * the packets are received.
         */
        checkif = ip_checkinterface && (ipforwarding == 0) && 
-           ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) &&
-           (args.next_hop == NULL);
+           ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0)
+#if IPFIREWALL
+           && (args.next_hop == NULL);
+#else
+               ;
+#endif
 
        lck_mtx_lock(rt_mtx);
        TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
@@ -989,7 +1008,11 @@ pass:
                OSAddAtomic(1, (SInt32*)&ipstat.ips_cantforward);
                m_freem(m);
        } else {
+#if IPFIREWALL
                ip_forward(m, 0, args.next_hop, &ipforward_rt);
+#else
+               ip_forward(m, 0, NULL, &ipforward_rt);
+#endif
                if (ipforward_rt.ro_rt != NULL) {
                        rtfree(ipforward_rt.ro_rt);
                        ipforward_rt.ro_rt = NULL;
@@ -1184,6 +1207,7 @@ found:
         */
        OSAddAtomic(1, (SInt32*)&ipstat.ips_delivered);
        {
+#if IPFIREWALL
                if (args.next_hop && ip->ip_p == IPPROTO_TCP) {
                        /* TCP needs IPFORWARD info if available */
                        struct m_tag *fwd_tag;
@@ -1212,6 +1236,9 @@ found:
                
                        ip_proto_dispatch_in(m, hlen, ip->ip_p, 0);
                }
+#else
+               ip_proto_dispatch_in(m, hlen, ip->ip_p, 0);
+#endif
                
                return;
        }
index c065797e73441f6bc823ffbbb070fb48abb4a16e..db39fe174d3ca7277baad8bd55a0b98a89bb8c62 100644 (file)
@@ -249,7 +249,9 @@ ip_output_list(
 #if IPFIREWALL_FORWARD
        int fwd_rewrite_src = 0;
 #endif
+#if IPFIREWALL
        struct ip_fw_args args;
+#endif
        int didfilter = 0;
        ipfilter_t inject_filter_ref = 0;
        struct m_tag    *tag;
@@ -261,8 +263,8 @@ ip_output_list(
        KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
 
        packetlist = m0;
-       args.next_hop = NULL;
 #if IPFIREWALL
+       args.next_hop = NULL;
        args.eh = NULL;
        args.rule = NULL;
        args.divert_rule = 0;                   /* divert cookie */
@@ -297,7 +299,6 @@ ip_output_list(
                m_tag_delete(m0, tag);
        }
 #endif /* IPDIVERT */
-#endif /* IPFIREWALL */
 
        if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) {
                struct ip_fwd_tag       *ipfwd_tag;
@@ -307,6 +308,7 @@ ip_output_list(
                
                m_tag_delete(m0, tag);
        }
+#endif /* IPFIREWALL */
 
        m = m0;
        
@@ -356,7 +358,11 @@ loopit:
                hlen = len;
        }
        ip = mtod(m, struct ip *);
+#if IPFIREWALL
        pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
+#else
+       pkt_dst = ip->ip_dst;
+#endif
 
        /*
         * Fill in IP header.
index df763aac54358bdc804ad3c13c30289d95b88e5a..e30687513e2f8ecd23b1ba5dc1f47024e94a8385 100644 (file)
@@ -125,10 +125,12 @@ struct    inpcbhead ripcb;
 struct inpcbinfo ripcbinfo;
 
 /* control hooks for ipfw and dummynet */
+#if IPFIREWALL
 ip_fw_ctl_t *ip_fw_ctl_ptr;
 #if DUMMYNET
 ip_dn_ctl_t *ip_dn_ctl_ptr;
 #endif /* DUMMYNET */
+#endif /* IPFIREWALL */
 
 /*
  * Nominal space allocated to a raw ip socket.
index 39a5fc252cce1ceebf3cc90487a04efadf3d48f8..302ab9431bb26343bbcb76f8af1466c45529c2ef 100644 (file)
@@ -1462,13 +1462,6 @@ findpcb:
                                 * Grow the congestion window, if the
                                 * connection is cwnd bound.
                                 */
-                               if (tp->snd_cwnd < tp->snd_wnd) {
-                                       tp->t_bytes_acked += acked;
-                                       if (tp->t_bytes_acked > tp->snd_cwnd) {
-                                               tp->t_bytes_acked -= tp->snd_cwnd;
-                                               tp->snd_cwnd += tp->t_maxseg;
-                                       }
-                               }
                                sbdrop(&so->so_snd, acked);
                                if (SEQ_GT(tp->snd_una, tp->snd_recover) &&
                                    SEQ_LEQ(th->th_ack, tp->snd_recover))
@@ -1794,7 +1787,6 @@ findpcb:
                                tp->ecn_flags &= ~TE_SENDIPECT;
                        }
                        
-                       soisconnected(so);
 #if CONFIG_MACF_NET && CONFIG_MACF_SOCKET
                        /* XXXMAC: recursive lock: SOCK_LOCK(so); */
                        mac_socketpeer_label_associate_mbuf(m, so);
@@ -1835,6 +1827,10 @@ findpcb:
                                tp->t_state = TCPS_ESTABLISHED;
                                tp->t_timer[TCPT_KEEP] = TCP_KEEPIDLE(tp);
                        }
+                       /* soisconnected may lead to socket_unlock in case of upcalls,
+                        * make sure this is done when everything is setup.
+                        */
+                       soisconnected(so);
                } else {
                /*
                 *  Received initial SYN in SYN-SENT[*] state => simul-
@@ -2223,7 +2219,6 @@ trimthenstep6:
        case TCPS_SYN_RECEIVED:
 
                tcpstat.tcps_connects++;
-               soisconnected(so);
 
                /* Do window scaling? */
                if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
@@ -2252,8 +2247,14 @@ trimthenstep6:
                        (void) tcp_reass(tp, (struct tcphdr *)0, &tlen,
                            (struct mbuf *)0);
                tp->snd_wl1 = th->th_seq - 1;
+
                /* FALLTHROUGH */
 
+               /* soisconnected may lead to socket_unlock in case of upcalls,
+                * make sure this is done when everything is setup.
+                */
+               soisconnected(so);
+
        /*
         * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
         * ACKs.  If the ack is in the range
@@ -2542,30 +2543,45 @@ process_ACK:
                        register u_int cw = tp->snd_cwnd;
                        register u_int incr = tp->t_maxseg;
 
-                       if (cw >= tp->snd_ssthresh) {
-                               tp->t_bytes_acked += acked;
-                               if (tp->t_bytes_acked >= cw) {
+                       if ((acked > incr) && tcp_do_rfc3465) {
+                               if (cw >= tp->snd_ssthresh) {
+                                       tp->t_bytes_acked += acked;
+                                       if (tp->t_bytes_acked >= cw) {
                                        /* Time to increase the window. */
-                                       tp->t_bytes_acked -= cw;
-                               } else {
+                                               tp->t_bytes_acked -= cw;
+                                       } else {
                                        /* No need to increase yet. */
-                                       incr = 0;
+                                               incr = 0;
+                                       }
+                               } else {
+                                       /*
+                                        * If the user explicitly enables RFC3465
+                                        * use 2*SMSS for the "L" param.  Otherwise
+                                        * use the more conservative 1*SMSS.
+                                        *
+                                        * (See RFC 3465 2.3 Choosing the Limit)
+                                        */
+                                       u_int abc_lim;
+
+                                       abc_lim = (tcp_do_rfc3465 == 0) ?
+                                           incr : incr * 2;
+                                       incr = lmin(acked, abc_lim);
                                }
-                       } else {
+                       }
+                       else {
                                /*
-                                * If the user explicitly enables RFC3465
-                                * use 2*SMSS for the "L" param.  Otherwise
-                                * use the more conservative 1*SMSS.
-                                *
-                                * (See RFC 3465 2.3 Choosing the Limit)
+                                * If the window gives us less than ssthresh packets
+                                * in flight, open exponentially (segsz per packet).
+                                * Otherwise open linearly: segsz per window
+                                * (segsz^2 / cwnd per packet).
                                 */
-                               u_int abc_lim;
-
-                               abc_lim = (tcp_do_rfc3465 == 0) ?
-                                   incr : incr * 2;
-                               incr = min(acked, abc_lim);
+               
+                                       if (cw >= tp->snd_ssthresh) {
+                                               incr = incr * incr / cw;
+                                       }
                        }
 
+
                        tp->snd_cwnd = min(cw+incr, TCP_MAXWIN<<tp->snd_scale);
                }
                if (acked > so->so_snd.sb_cc) {
@@ -2577,7 +2593,6 @@ process_ACK:
                        tp->snd_wnd -= acked;
                        ourfinisacked = 0;
                }
-               sowwakeup(so);
                /* detect una wraparound */
                if ((tcp_do_newreno || tp->sack_enable) &&
                    !IN_FASTRECOVERY(tp) &&
@@ -2595,6 +2610,12 @@ process_ACK:
                }
                if (SEQ_LT(tp->snd_nxt, tp->snd_una))
                        tp->snd_nxt = tp->snd_una;
+                       
+               /*
+                * sowwakeup must happen after snd_una, et al. are updated so that
+                * the sequence numbers are in sync with so_snd
+                */
+               sowwakeup(so);
 
                switch (tp->t_state) {
 
@@ -2613,9 +2634,9 @@ process_ACK:
                                 * we'll hang forever.
                                 */
                                if (so->so_state & SS_CANTRCVMORE) {
-                                       soisdisconnected(so);
                                        tp->t_timer[TCPT_2MSL] = tcp_maxidle;
                                        add_to_time_wait(tp);
+                                       soisdisconnected(so);
                                }
                                tp->t_state = TCPS_FIN_WAIT_2;
                                goto drop;
index 250d4a2d66ef6c9582fef571065d6718a3cdf53c..db82d4d90a5517ce13db2c728c088b6d7619ea79 100644 (file)
@@ -167,8 +167,10 @@ extern int ipsec_bypass;
 
 extern int slowlink_wsize;     /* window correction for slow links */
 extern u_long  route_generation;
+#if IPFIREWALL
 extern int fw_enable;          /* firewall check for packet chaining */
 extern int fw_bypass;          /* firewall check: disable packet chaining if there is rules */
+#endif /* IPFIREWALL */
 
 extern vm_size_t       so_cache_zone_element_size;
 
@@ -677,10 +679,19 @@ after_sack_rexmit:
                long adv = lmin(recwin, (long)TCP_MAXWIN << tp->rcv_scale) -
                        (tp->rcv_adv - tp->rcv_nxt);
 
-               if (adv >= (long) (2 * tp->t_maxseg))
-                       goto send;
-               if (2 * adv >= (long) so->so_rcv.sb_hiwat)
-                       goto send;
+               if (adv >= (long) (2 * tp->t_maxseg)) {
+                       
+                       /* 
+                        * Update only if the resulting scaled value of the window changed, or
+                        * if there is a change in the sequence since the last ack.
+                        * This avoids what appears as dupe ACKS (see rdar://5640997)
+                        */
+
+                       if ((tp->last_ack_sent != tp->rcv_nxt) || (((recwin + adv) >> tp->rcv_scale) > recwin)) 
+                               goto send;
+               }
+               if (2 * adv >= (long) so->so_rcv.sb_hiwat) 
+                               goto send;
        }
 
        /*
@@ -1239,6 +1250,8 @@ send:
                tp->sackhint.sack_bytes_rexmit += len;
        }
        th->th_ack = htonl(tp->rcv_nxt);
+       tp->last_ack_sent = tp->rcv_nxt;
+
        if (optlen) {
                bcopy(opt, th + 1, optlen);
                th->th_off = (sizeof (struct tcphdr) + optlen) >> 2;
@@ -1623,6 +1636,11 @@ tcp_ip_output(struct socket *so, struct tcpcb *tp, struct mbuf *pkt,
        boolean_t chain;
        boolean_t unlocked = FALSE;
 
+       /* Make sure ACK/DELACK conditions are cleared before
+        * we unlock the socket.
+        */
+
+       tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
        /*
         * If allowed, unlock TCP socket while in IP 
         * but only if the connection is established and
@@ -1642,11 +1660,15 @@ tcp_ip_output(struct socket *so, struct tcpcb *tp, struct mbuf *pkt,
         * - there is a non default rule set for the firewall
         */
 
-       chain = tcp_packet_chaining > 1 &&
+       chain = tcp_packet_chaining > 1
 #if IPSEC
-               ipsec_bypass &&
+               && ipsec_bypass
+#endif
+#if IPFIREWALL
+               && (fw_enable == 0 || fw_bypass)
 #endif
-               (fw_enable == 0 || fw_bypass);
+               ; // I'm important, not extraneous
+
 
        while (pkt != NULL) {
                struct mbuf *npkt = pkt->m_nextpkt;
index 739e2816bcb1473715ed093075847c90d53d851a..833caaf4cdbe179d937067052c5735ba4ea42c15 100644 (file)
@@ -358,7 +358,7 @@ static int bg_cnt = 0;
 void
 tcp_slowtimo()
 {
-       struct inpcb *inp;
+       struct inpcb *inp, *nxt;
        struct tcpcb *tp;
        struct socket *so;
        int i;
@@ -537,12 +537,12 @@ twunlock:
        }
 
 
-       LIST_FOREACH(inp, &tcb, inp_list) {
+       LIST_FOREACH_SAFE(inp, &tcb, inp_list, nxt) {
                tcp_garbage_collect(inp, 0);
        }
 
        /* Now cleanup the time wait ones */
-       LIST_FOREACH(inp, &time_wait_slots[cur_tw_slot], inp_list) {
+       LIST_FOREACH_SAFE(inp, &time_wait_slots[cur_tw_slot], inp_list, nxt) {
                tcp_garbage_collect(inp, 1);
        }
 
index 1bee938d448a6a40ff83e8c6d0c11ee361b32b8f..f0c838d588a09a52a7009331ab8531f83b23ad96 100644 (file)
@@ -104,7 +104,7 @@ int nd6_debug = 0;
 static int nd6_inuse, nd6_allocated;
 
 struct llinfo_nd6 llinfo_nd6 = {&llinfo_nd6, &llinfo_nd6, NULL, NULL, 0, 0, 0, 0, 0 };
-size_t nd_ifinfo_indexlim = 8;
+size_t nd_ifinfo_indexlim = 32; /* increased for 5589193 */
 struct nd_ifinfo *nd_ifinfo = NULL;
 struct nd_drhead nd_defrouter;
 struct nd_prhead nd_prefix = { 0 };
@@ -166,7 +166,13 @@ nd6_ifattach(
                bzero(q, n);
                if (nd_ifinfo) {
                        bcopy((caddr_t)nd_ifinfo, q, n/2);
+                       /* Radar 5589193:
+                        * SU fix purposely leaks the old nd_ifinfo array
+                        * if we grow the arraw to more than 32 interfaces
+                        * Fix for future release is to use proper locking.
+
                        FREE((caddr_t)nd_ifinfo, M_IP6NDP);
+                       */
                }
                nd_ifinfo = (struct nd_ifinfo *)q;
        }
index fdb3ae1c5a9b2fb9319618206376008bc6adb138..48694c9ee71ff7ab503a8f56463455473d0dcd4f 100644 (file)
@@ -318,6 +318,8 @@ nfs_connect(struct nfsmount *nmp)
                lck_mtx_unlock(&nmp->nm_lock);
                goto bad;
        }
+       /* just playin' it safe */
+       sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
 
        if (!(nmp->nm_flag & NFSMNT_INT))
                sock_nointerrupt(so, 1);
index 1a4f5bb0e926033dcb5373dcc80dbb338966646a..9d4d4f3097ebb1f24898e19d587dd55c61f0d3af 100644 (file)
@@ -781,6 +781,8 @@ nfssvc_addsock(socket_t so, mbuf_t mynam)
        so->so_upcall = nfsrv_rcv;
        so->so_rcv.sb_flags |= SB_UPCALL;
        socket_unlock(so, 1);
+       /* just playin' it safe */
+       sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
 
        /* mark that the socket is not in the nfsrv_sockwg list */
        slp->ns_wgq.tqe_next = SLPNOLIST;
index 3d6d36b6599fc154a09062aaa0ac7492a55864aa..2daeebc9cd80ab68afb7d413b94273db61da23b1 100644 (file)
@@ -513,11 +513,13 @@ int nfsm_chain_trim_data(struct nfsm_chain *, int, int *);
 /* get a pointer to the next consecutive bytes in an mbuf chain */
 #define nfsm_chain_get_opaque_pointer(E, NMC, LEN, PTR) \
        do { \
+               uint32_t rndlen; \
                if (E) break; \
-               if ((NMC)->nmc_left >= (uint32_t)(LEN)) { \
+               rndlen = nfsm_rndup(LEN); \
+               if ((NMC)->nmc_left >= rndlen) { \
                        (PTR) = (void*)(NMC)->nmc_ptr; \
-                       (NMC)->nmc_left -= nfsm_rndup(LEN); \
-                       (NMC)->nmc_ptr += nfsm_rndup(LEN); \
+                       (NMC)->nmc_left -= rndlen; \
+                       (NMC)->nmc_ptr += rndlen; \
                } else { \
                        (E) = nfsm_chain_get_opaque_pointer_f((NMC), (LEN), (u_char**)&(PTR)); \
                } \
@@ -526,11 +528,13 @@ int nfsm_chain_trim_data(struct nfsm_chain *, int, int *);
 /* copy the next consecutive bytes of opaque data from an mbuf chain */
 #define nfsm_chain_get_opaque(E, NMC, LEN, PTR) \
        do { \
+               uint32_t rndlen; \
                if (E) break; \
-               if ((NMC)->nmc_left >= (LEN)) { \
+               rndlen = nfsm_rndup(LEN); \
+               if ((NMC)->nmc_left >= rndlen) { \
                        u_char *__tmpptr = (u_char*)(NMC)->nmc_ptr; \
-                       (NMC)->nmc_left -= nfsm_rndup(LEN); \
-                       (NMC)->nmc_ptr += nfsm_rndup(LEN); \
+                       (NMC)->nmc_left -= rndlen; \
+                       (NMC)->nmc_ptr += rndlen; \
                        bcopy(__tmpptr, (PTR), (LEN)); \
                } else { \
                        (E) = nfsm_chain_get_opaque_f((NMC), (LEN), (u_char*)(PTR)); \
index 93858337300703760fc51a5a094bfa39eb3fd79b..bb0a7d7c52aa6f1d9b75b4769fd26b2f8ab7e2e6 100644 (file)
@@ -75,7 +75,7 @@ struct aiocb {
 
 struct user_aiocb {
        int             aio_fildes;             /* File descriptor */
-       off_t           aio_offset;             /* File offset */
+       off_t           aio_offset __attribute((aligned(8))); /* File offset */
        user_addr_t     aio_buf __attribute((aligned(8)));              /* Location of buffer */
        user_size_t     aio_nbytes;             /* Length of transfer */
        int             aio_reqprio;    /* Request priority offset */
index 754a272dc4e4c3a66db87f02e0ee2fe458fe2a51..b6f9c2cd2ac81688e3ec59c857f7988f167afe6f 100644 (file)
@@ -35,6 +35,7 @@
 #include <kern/lock.h>
 #include <kern/locks.h>
 #include <kern/thread_call.h>
+#include <kern/thread.h>
 #include <machine/machine_routines.h>
 #include <sys/syslog.h>
 #include <sys/ucred.h>
index a11877c5228526fc6a0c5777064aa1d9d6fcf822..67aff41a164a6745d582ed167c6ffc993e57333d 100644 (file)
@@ -257,5 +257,6 @@ __END_DECLS
 /* pseudo-errors returned inside kernel to modify return to process */
 #define        ERESTART        (-1)            /* restart syscall */
 #define        EJUSTRETURN     (-2)            /* don't modify regs, just return */
+#define ERECYCLE    (-5)               /* restart lookup under heavy vnode pressure/recycling */
 #endif
 #endif /* _SYS_ERRNO_H_ */
index c0a8368bad839c52424ac453ed86ca99cd6b7259..50706beec20ec5cec01b306fe427ccf5644482d2 100644 (file)
@@ -220,7 +220,7 @@ int relookup(struct vnode *dvp, struct vnode **vpp,
  */
 void    cache_purgevfs(mount_t mp);
 int            cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp,
-                         vfs_context_t context, int *trailing_slash, int *dp_authorized);
+                         vfs_context_t context, int *trailing_slash, int *dp_authorized, vnode_t last_dp);
 
 void           vnode_cache_authorized_action(vnode_t vp, vfs_context_t context, kauth_action_t action);
 void           vnode_uncache_authorized_action(vnode_t vp, kauth_action_t action);
index 83d48fbc4b66565ea542a3f929b4ee1fe7cff3cc..67b55c7863448a635688c59aece57721ac6f88ef 100644 (file)
@@ -88,6 +88,7 @@
 #define RB_UNIPROC     0x80    /* don't start slaves */
 #define RB_SAFEBOOT    0x100   /* booting safe */
 #define RB_UPSDELAY 0x200   /* Delays restart by 5 minutes */
+#define RB_QUICK       0x400   /* quick and ungraceful reboot with file system caches flushed*/
 #define RB_PANIC       0       /* reboot due to panic */
 #define RB_BOOT                1       /* reboot due to boot() */
 
index 6d7d934658915e6c91da4257cc1b976794dd4f42..4048673b59c7906b41192556af71506f3472ac67 100644 (file)
@@ -193,6 +193,7 @@ struct iovec {
 #define SO_REUSESHAREUID       0x1025          /* APPLE: Allow reuse of port/socket by different userids */
 #ifdef __APPLE_API_PRIVATE
 #define SO_NOTIFYCONFLICT      0x1026  /* APPLE: send notification if there is a bind on a port which is already in use */
+#define        SO_UPCALLCLOSEWAIT      0x1027  /* APPLE: block on close until an upcall returns */
 #endif
 #define SO_LINGER_SEC  0x1080          /* linger on close if data present (in seconds) */
 #define SO_RESTRICTIONS        0x1081  /* APPLE: deny inbound/outbound/both/flag set */
index 988ec8d82a53b5d8dbcdc1d02535ad05089fab74..9f55d37a6d2adafb2ce7565dae292b260bcffb3b 100644 (file)
@@ -229,6 +229,7 @@ struct socket {
 #ifdef __APPLE_API_PRIVATE
 #define SOF_NOTIFYCONFLICT 0x400       /* notify that a bind was done on a port already in use */
 #endif
+#define        SOF_UPCALLCLOSEWAIT 0x800 /* block on close until an upcall returns  */
        int     so_usecount;    /* refcounting of socket use */;
        int     so_retaincnt;
        u_int32_t so_filteruse; /* usecount for the socket filters */
index ed6ea3203bbe1d81ad5e404abf171121b79e841d..c4e93ab8e4742db543f2d0f298c94792b4c2b958 100644 (file)
@@ -827,10 +827,12 @@ boolean_t vnode_cache_is_stale(vnode_t vp)
 
 /*
  * Returns:    0                       Success
- *             ENOENT                  No such file or directory
+ *             ERECYCLE                vnode was recycled from underneath us.  Force lookup to be re-driven from namei.
+ *                                             This errno value should not be seen by anyone outside of the kernel.
  */
 int 
-cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, vfs_context_t ctx, int *trailing_slash, int *dp_authorized)
+cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, 
+               vfs_context_t ctx, int *trailing_slash, int *dp_authorized, vnode_t last_dp)
 {
        char            *cp;            /* pointer into pathname argument */
        int             vid;
@@ -840,11 +842,9 @@ cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp,
        kauth_cred_t    ucred;
        boolean_t       ttl_enabled = FALSE;
        struct timeval  tv;
-        mount_t                mp;
+    mount_t            mp;
        unsigned int    hash;
-#if CONFIG_MACF
-       int             error;
-#endif
+       int             error = 0;
 
        ucred = vfs_context_ucred(ctx);
        *trailing_slash = 0;
@@ -945,7 +945,7 @@ skiprsrcfork:
                        error = mac_vnode_check_lookup(ctx, dp, cnp);
                        if (error) {
                                name_cache_unlock();
-                               return (error);
+                               goto errorout;
                        }
                }
 #endif /* MAC */
@@ -1052,35 +1052,41 @@ skiprsrcfork:
                dp = NULLVP;
        } else {
 need_dp:
-               /*
+               /*
                 * return the last directory we looked at
-                * with an io reference held
+                * with an io reference held. If it was the one passed
+                * in as a result of the last iteration of VNOP_LOOKUP,
+                * it should already hold an io ref. No need to increase ref.
                 */
-               if (dp == ndp->ni_usedvp) {
-                       /*
-                        * if this vnode matches the one passed in via USEDVP
-                        * than this context already holds an io_count... just
-                        * use vnode_get to get an extra ref for lookup to play
-                        * with... can't use the getwithvid variant here because
-                        * it will block behind a vnode_drain which would result
-                        * in a deadlock (since we already own an io_count that the
-                        * vnode_drain is waiting on)... vnode_get grabs the io_count
-                        * immediately w/o waiting... it always succeeds
-                        */
-                       vnode_get(dp);
-               } else if ( (vnode_getwithvid(dp, vid)) ) {
-                       /*
-                        * failure indicates the vnode
-                        * changed identity or is being
-                        * TERMINATED... in either case
-                        * punt this lookup.
-                        * 
-                        * don't necessarily return ENOENT, though, because
-                        * we really want to go back to disk and make sure it's
-                        * there or not if someone else is changing this
-                        * vnode.
-                        */
-                       return (ERESTART);
+               if (last_dp != dp){
+                       
+                       if (dp == ndp->ni_usedvp) {
+                               /*
+                                * if this vnode matches the one passed in via USEDVP
+                                * than this context already holds an io_count... just
+                                * use vnode_get to get an extra ref for lookup to play
+                                * with... can't use the getwithvid variant here because
+                                * it will block behind a vnode_drain which would result
+                                * in a deadlock (since we already own an io_count that the
+                                * vnode_drain is waiting on)... vnode_get grabs the io_count
+                                * immediately w/o waiting... it always succeeds
+                                */
+                               vnode_get(dp);
+                       } else if ( (vnode_getwithvid(dp, vid)) ) {
+                               /*
+                                * failure indicates the vnode
+                                * changed identity or is being
+                                * TERMINATED... in either case
+                                * punt this lookup.
+                                * 
+                                * don't necessarily return ENOENT, though, because
+                                * we really want to go back to disk and make sure it's
+                                * there or not if someone else is changing this
+                                * vnode.
+                                */
+                               error = ERECYCLE;
+                               goto errorout;
+                       }
                }
        }
        if (vp != NULLVP) {
@@ -1104,7 +1110,22 @@ need_dp:
        ndp->ni_dvp = dp;
        ndp->ni_vp  = vp;
 
-       return (0);
+errorout:
+       /* 
+        * If we came into cache_lookup_path after an iteration of the lookup loop that
+        * resulted in a call to VNOP_LOOKUP, then VNOP_LOOKUP returned a vnode with a io ref
+        * on it.  It is now the job of cache_lookup_path to drop the ref on this vnode 
+        * when it is no longer needed.  If we get to this point, and last_dp is not NULL
+        * and it is ALSO not the dvp we want to return to caller of this function, it MUST be
+        * the case that we got to a subsequent path component and this previous vnode is 
+        * no longer needed.  We can then drop the io ref on it.
+        */
+       if ((last_dp != NULLVP) && (last_dp != ndp->ni_dvp)){
+               vnode_put(last_dp);
+       }
+       
+       //initialized to 0, should be the same if no error cases occurred.
+       return error;
 }
 
 
index 8f7145d19d5826bd3d40b1bc8e843f651d88f7f6..df54d26b1db4456b1b5e751ec7428c2f1c91329d 100644 (file)
@@ -2843,8 +2843,8 @@ end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void
                blhdr->checksum = 0;
                blhdr->checksum = calc_checksum((char *)blhdr, BLHDR_CHECKSUM_SIZE);
 
-               if (kmem_alloc(kernel_map, (vm_offset_t *)&bparray, tr->blhdr->num_blocks * sizeof(struct buf *))) {
-                   panic("can't allocate %lu bytes for bparray\n", tr->blhdr->num_blocks * sizeof(struct buf *));
+               if (kmem_alloc(kernel_map, (vm_offset_t *)&bparray, blhdr->num_blocks * sizeof(struct buf *))) {
+                   panic("can't allocate %lu bytes for bparray\n", blhdr->num_blocks * sizeof(struct buf *));
                }
 
                // calculate individual block checksums
@@ -2867,7 +2867,7 @@ end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void
                    blhdr->binfo[i].b.bp = bparray[i];
                }
 
-               kmem_free(kernel_map, (vm_offset_t)bparray, tr->blhdr->num_blocks * sizeof(struct buf *));
+               kmem_free(kernel_map, (vm_offset_t)bparray, blhdr->num_blocks * sizeof(struct buf *));
 
                if (ret != amt) {
                        printf("jnl: %s: end_transaction: only wrote %d of %d bytes to the journal!\n",
index aaabf7bc100137097945deeffab293f60705de16..bb8c5dd2bec6624db0d7cd283560a261a9f42b42 100644 (file)
@@ -141,7 +141,9 @@ static int vfs_getrealpath(const char * path, char * realpath, size_t bufsize, v
  *             lookup:EROFS
  *             lookup:EACCES
  *             lookup:EPERM
- *             lookup:???
+ *             lookup:ERECYCLE  vnode was recycled from underneath us in lookup.
+ *                                              This means we should re-drive lookup from this point.
+ *             lookup: ???
  *             VNOP_READLINK:???
  */
 int
@@ -150,6 +152,9 @@ namei(struct nameidata *ndp)
        struct filedesc *fdp;   /* pointer to file descriptor state */
        char *cp;               /* pointer into pathname argument */
        struct vnode *dp;       /* the directory we are searching */
+       struct vnode *usedvp = ndp->ni_dvp;  /* store pointer to vp in case we must loop due to
+                                                                                       heavy vnode pressure */
+       u_long cnpflags = ndp->ni_cnd.cn_flags; /* store in case we have to restore after loop */
        uio_t auio;
        int error;
        struct componentname *cnp = &ndp->ni_cnd;
@@ -170,6 +175,8 @@ namei(struct nameidata *ndp)
 #endif
        fdp = p->p_fd;
 
+vnode_recycled:
+
        /*
         * Get a buffer for the name to be translated, and copy the
         * name into the buffer.
@@ -413,6 +420,14 @@ retry_copy:
        }
        cnp->cn_pnbuf = NULL;
        ndp->ni_vp = NULLVP;
+       if (error == ERECYCLE){
+               /* vnode was recycled underneath us. re-drive lookup to start at 
+                  the beginning again, since recycling invalidated last lookup*/
+               ndp->ni_cnd.cn_flags = cnpflags;
+               ndp->ni_dvp = usedvp;
+               goto vnode_recycled;
+       }
+
 
        return (error);
 }
@@ -462,7 +477,7 @@ retry_copy:
  *             ENOTDIR                 Not a directory
  *             EROFS                   Read-only file system [CREATE]
  *             EISDIR                  Is a directory [CREATE]
- *             cache_lookup_path:ENOENT
+ *             cache_lookup_path:ERECYCLE  (vnode was recycled from underneath us, redrive lookup again)
  *             vnode_authorize:EROFS
  *             vnode_authorize:EACCES
  *             vnode_authorize:EPERM
@@ -495,6 +510,7 @@ lookup(struct nameidata *ndp)
        int current_mount_generation = 0;
        int vbusyflags = 0;
        int nc_generation = 0;
+       vnode_t last_dp = NULLVP;
 
        /*
         * Setup: break out flag bits into variables.
@@ -526,7 +542,7 @@ lookup(struct nameidata *ndp)
 dirloop: 
        ndp->ni_vp = NULLVP;
 
-       if ( (error = cache_lookup_path(ndp, cnp, dp, ctx, &trailing_slash, &dp_authorized)) ) {
+       if ( (error = cache_lookup_path(ndp, cnp, dp, ctx, &trailing_slash, &dp_authorized, last_dp)) ) {
                dp = NULLVP;
                goto bad;
        }
@@ -865,7 +881,12 @@ nextname:
                if (*cp == '\0')
                        goto emptyname;
 
-               vnode_put(dp);
+               /*
+                * cache_lookup_path is now responsible for dropping io ref on dp
+                * when it is called again in the dirloop.  This ensures we hold
+                * a ref on dp until we complete the next round of lookup.
+                */
+               last_dp = dp;
                goto dirloop;
        }
                                  
index be1ba3291a06ae7516bf95c53d6c888458854bc0..535603224f504b7e13817bc507a199f781f0b75e 100644 (file)
@@ -2785,19 +2785,23 @@ vfs_init_io_attributes(vnode_t devvp, mount_t mp)
 }
 
 static struct klist fs_klist;
+lck_grp_t *fs_klist_lck_grp;
+lck_mtx_t *fs_klist_lock;
 
 void
 vfs_event_init(void)
 {
-
        klist_init(&fs_klist);
+       fs_klist_lck_grp = lck_grp_alloc_init("fs_klist", NULL);
+       fs_klist_lock = lck_mtx_alloc_init(fs_klist_lck_grp, NULL);
 }
 
 void
 vfs_event_signal(__unused fsid_t *fsid, u_int32_t event, __unused intptr_t data)
 {
-
+       lck_mtx_lock(fs_klist_lock);
        KNOTE(&fs_klist, event);
+       lck_mtx_unlock(fs_klist_lock);
 }
 
 /*
@@ -3124,16 +3128,19 @@ static int
 filt_fsattach(struct knote *kn)
 {
 
+       lck_mtx_lock(fs_klist_lock);
        kn->kn_flags |= EV_CLEAR;
        KNOTE_ATTACH(&fs_klist, kn);
+       lck_mtx_unlock(fs_klist_lock);
        return (0);
 }
 
 static void
 filt_fsdetach(struct knote *kn)
 {
-
+       lck_mtx_lock(fs_klist_lock);
        KNOTE_DETACH(&fs_klist, kn);
+       lck_mtx_unlock(fs_klist_lock);
 }
 
 static int
@@ -3794,11 +3801,18 @@ vnode_reclaim_internal(struct vnode * vp, int locked, int reuse, int flags)
                vgone(vp, flags);               /* clean and reclaim the vnode */
 
        /*
-        * give the vnode a new identity so
-        * that vnode_getwithvid will fail
-        * on any stale cache accesses
+        * give the vnode a new identity so that vnode_getwithvid will fail
+        * on any stale cache accesses...
+        * grab the list_lock so that if we're in "new_vnode"
+        * behind the list_lock trying to steal this vnode, the v_id is stable...
+        * once new_vnode drops the list_lock, it will block trying to take
+        * the vnode lock until we release it... at that point it will evaluate
+        * whether the v_vid has changed
         */
+       vnode_list_lock();
        vp->v_id++;
+       vnode_list_unlock();
+
        if (isfifo) {
                struct fifoinfo * fip;
 
index e9ef928505d0de12024f89716b0c06131c32cc49..30e62d8ce8c2a9f989d0a7f92a3cd92b1b82434a 100644 (file)
@@ -3041,7 +3041,7 @@ lock_xattrfile(vnode_t xvp, short locktype, vfs_context_t context)
        lf.l_len = 0;
        lf.l_type = locktype; /* F_WRLCK or F_RDLCK */
        /* Note: id is just a kernel address that's not a proc */
-       error = VNOP_ADVLOCK(xvp, (caddr_t)xvp, F_SETLK, &lf, F_FLOCK, context);
+       error = VNOP_ADVLOCK(xvp, (caddr_t)xvp, F_SETLK, &lf, F_FLOCK|F_WAIT, context);
        return (error == ENOTSUP ? 0 : error);
 }
 
index 56c2201dca6e94c18bce4fd5560f3a471e2a4238..54e6d30c6f1ea5c7c5358da6a38d1f661a94550d 100644 (file)
  * Sysctl's related to data/stack execution.  See osfmk/vm/vm_map.c
  */
 
+#ifndef SECURE_KERNEL
 extern int allow_stack_exec, allow_data_exec;
 
 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW, &allow_stack_exec, 0, "");
 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW, &allow_data_exec, 0, "");
+#endif /* !SECURE_KERNEL */
 
 #if CONFIG_NO_PRINTF_STRINGS
 void
@@ -819,7 +821,7 @@ shared_region_map_np(
        memory_object_size_t            file_size;
        user_addr_t                     user_mappings;
        struct shared_file_mapping_np   *mappings;
-#define SFM_MAX_STACK  4
+#define SFM_MAX_STACK  8
        struct shared_file_mapping_np   stack_mappings[SFM_MAX_STACK];
        unsigned int                    mappings_count;
        vm_size_t                       mappings_size;
index 678fe7d7a653daa0998de489a07c52b7a35806d8..745ad66f22f6471f1f17c156668cf63ae193893d 100644 (file)
@@ -597,6 +597,7 @@ _ubc_isinuse
 _ubc_msync
 _ubc_offtoblk
 _ubc_page_op
+_ubc_pages_resident
 _ubc_range_op
 _ubc_setcred
 _ubc_setsize
index fd0f8de5983005dfc6dc52c40330c1692ec3ab49..f14615395d67a43c0fe03c04b6b46ece26979427 100644 (file)
@@ -119,7 +119,6 @@ __Z17IODTMapInterruptsP15IORegistryEntry
 __Z17IODeviceTreeAllocPv
 __Z17IOServiceOrderingPK15OSMetaClassBaseS1_Pv
 __Z18IODTCompareNubNamePK15IORegistryEntryP8OSStringPS3_
-__Z19IODTMapOneInterruptP15IORegistryEntryPmPP6OSDataPPK8OSSymbol
 __Z19printDictionaryKeysP12OSDictionaryPc
 __Z19tellAppWithResponseP8OSObjectPv
 __Z20IODTMakeNVDescriptorP15IORegistryEntryP17IONVRAMDescriptor
@@ -127,7 +126,6 @@ __Z20IODTMatchNubWithKeysP15IORegistryEntryPKc
 __Z21IODTResolveAddressingP15IORegistryEntryPKcP14IODeviceMemory
 __Z22IODTResolveAddressCellP15IORegistryEntryPmS1_S1_
 __Z22tellClientWithResponseP8OSObjectPv
-__Z23IODTFindInterruptParentP15IORegistryEntry
 __Z23IODTFindMatchingEntriesP15IORegistryEntrymPKc
 __Z24broadcast_aggressivenessP8OSObjectPvS1_S1_S1_
 __Z26serializedAllowPowerChangeP8OSObjectPvS1_S1_S1_
@@ -561,6 +559,8 @@ __ZN14IOPMrootDomain23requestPowerDomainStateEmP17IOPowerConnectionm
 __ZN14IOPMrootDomain23setQuickSpinDownTimeoutEv
 __ZN14IOPMrootDomain24displayWranglerPublishedEPvS0_P9IOService
 __ZN14IOPMrootDomain24receivePowerNotificationEm
+__ZN14IOPMrootDomain24systemPowerEventOccurredEPK8OSSymbolP8OSObject
+__ZN14IOPMrootDomain24systemPowerEventOccurredEPK8OSSymbolj
 __ZN14IOPMrootDomain25announcePowerSourceChangeEv
 __ZN14IOPMrootDomain26handleSleepTimerExpirationEv
 __ZN14IOPMrootDomain26restoreUserSpinDownTimeoutEv
index dfaa71992b1320d8fb4bbb1300cb22ebbda05869..6f5ee0f98e7b00589edec043211905663dd66dd3 100644 (file)
@@ -1,4 +1,4 @@
-9.1.0
+9.2.0
 
 # The first line of this file contains the master version number for the kernel.
 # All other instances of the kernel version in xnu are derived from this file.
index 71c1ae0fd2e51a76394b2b501b33c69366646b9b..efaa7c6053caa82caeda15e425e997f7c4059e21 100644 (file)
@@ -186,7 +186,6 @@ __Z17IODTMapInterruptsP15IORegistryEntry
 __Z17IODeviceTreeAllocPv
 __Z17IOServiceOrderingPK15OSMetaClassBaseS1_Pv
 __Z18IODTCompareNubNamePK15IORegistryEntryP8OSStringPS3_
-__Z19IODTMapOneInterruptP15IORegistryEntryPmPP6OSDataPPK8OSSymbol
 __Z19printDictionaryKeysP12OSDictionaryPc
 __Z19tellAppWithResponseP8OSObjectPv
 __Z20IODTMakeNVDescriptorP15IORegistryEntryP17IONVRAMDescriptor
@@ -194,7 +193,6 @@ __Z20IODTMatchNubWithKeysP15IORegistryEntryPKc
 __Z21IODTResolveAddressingP15IORegistryEntryPKcP14IODeviceMemory
 __Z22IODTResolveAddressCellP15IORegistryEntryPmS1_S1_
 __Z22tellClientWithResponseP8OSObjectPv
-__Z23IODTFindInterruptParentP15IORegistryEntry
 __Z23IODTFindMatchingEntriesP15IORegistryEntrymPKc
 __Z24broadcast_aggressivenessP8OSObjectPvS1_S1_S1_
 __Z26serializedAllowPowerChangeP8OSObjectPvS1_S1_S1_
index 362c5ed31de110a9d2b6dd9fbc980f618ee614ec..3e447c36cda531417732007e2f4e0926f21a8244 100644 (file)
@@ -252,13 +252,24 @@ enum {
     kInflowForciblyEnabledBit = (1 << 0)
 };
 
+/* kIOPMMessageInternalBatteryFullyDischarged
+ * The battery has drained completely to its "Fully Discharged" state. 
+ */
 #define kIOPMMessageInternalBatteryFullyDischarged  \
                 iokit_family_msg(sub_iokit_powermanagement, 0x120)
 
+/* kIOPMMessageSystemPowerEventOccurred
+ * Some major system thermal property has changed, and interested clients may
+ * modify their behavior.
+ */
+#define kIOPMMessageSystemPowerEventOccurred  \
+                iokit_family_msg(sub_iokit_powermanagement, 0x130)
+
 
 /*******************************************************************************
  *
  * Power commands issued to root domain
+ * Use with IOPMrootDomain::receivePowerNotification()
  *
  * These commands are issued from system drivers only:
  *      ApplePMU, AppleSMU, IOGraphics, AppleACPIFamily
@@ -278,6 +289,7 @@ enum {
   kIOPMClamshellOpened          = (1<<10)  // clamshell was opened
 };
 
+
 /*******************************************************************************
  *
  * Power Management Return Codes
@@ -378,6 +390,76 @@ enum {
 #define kIOPMPSPostDishargeWaitSecondsKey      "PostDischargeWaitSeconds"
 
 
+/* CPU Power Management status keys
+ * Pass as arguments to IOPMrootDomain::systemPowerEventOccurred
+ * Or as arguments to IOPMSystemPowerEventOccurred()
+ * Or to decode the dictionary obtained from IOPMCopyCPUPowerStatus()
+ * These keys reflect restrictions placed on the CPU by the system
+ * to bring the CPU's power consumption within allowable thermal and 
+ * power constraints.
+ */
+
+
+/* kIOPMGraphicsPowerLimitsKey
+ *   The key representing the dictionary of graphics power limits.
+ *   The dictionary contains the other kIOPMCPUPower keys & their associated
+ *   values (e.g. Speed limit, Processor Count, and Schedule limits).
+ */
+#define kIOPMGraphicsPowerLimitsKey                     "Graphics_Power_Limits"
+
+/* kIOPMGraphicsPowerLimitPerformanceKey
+ *   The key representing the percent of overall performance made available
+ *   by the graphics chip as a percentage (integer 0 - 100).
+ */
+#define kIOPMGraphicsPowerLimitPerformanceKey           "Graphics_Power_Performance"
+
+
+
+/* kIOPMCPUPowerLimitsKey
+ *   The key representing the dictionary of CPU Power Limits.
+ *   The dictionary contains the other kIOPMCPUPower keys & their associated
+ *   values (e.g. Speed limit, Processor Count, and Schedule limits).
+ */
+#define kIOPMCPUPowerLimitsKey                          "CPU_Power_Limits"
+
+/* kIOPMCPUPowerLimitProcessorSpeedKey defines the speed & voltage limits placed 
+ *   on the CPU.
+ *   Represented as a percentage (0-100) of maximum CPU speed.
+ */
+#define kIOPMCPUPowerLimitProcessorSpeedKey             "CPU_Speed_Limit"
+
+/* kIOPMCPUPowerLimitProcessorCountKey reflects how many, if any, CPUs have been
+ *   taken offline. Represented as an integer number of CPUs (0 - Max CPUs).
+ */
+#define kIOPMCPUPowerLimitProcessorCountKey             "CPU_Available_CPUs"
+
+/* kIOPMCPUPowerLimitSchedulerTimeKey represents the percentage (0-100) of CPU time 
+ *   available. 100% at normal operation. The OS may limit this time for a percentage
+ *   less than 100%.
+ */
+#define kIOPMCPUPowerLimitSchedulerTimeKey              "CPU_Scheduler_Limit"
+
+
+/* Thermal Level Warning Key
+ * Indicates the thermal constraints placed on the system. This value may 
+ * cause clients to action to consume fewer system resources.
+ * The value associated with this warning is defined by the platform.
+ */
+#define kIOPMThermalLevelWarningKey                     "Thermal_Level_Warning"
+
+/* Thermal Warning Level values
+ *      kIOPMThermalWarningLevelNormal - under normal operating conditions
+ *      kIOPMThermalWarningLevelDanger - thermal pressure may cause system slowdown
+ *      kIOPMThermalWarningLevelCrisis - thermal conditions may cause imminent shutdown
+ *
+ * The platform may define additional thermal levels if necessary.
+ */
+enum {
+  kIOPMThermalWarningLevelNormal    = 0,
+  kIOPMThermalWarningLevelDanger    = 5,
+  kIOPMThermalWarningLevelCrisis    = 10
+};
+
 
 // PM Settings Controller setting types
 // Settings types used primarily with:
index 2605169c399554bee0a0ebe7a932b2ac00eda6e8..c528b8c3eb51ef47dd84e671d43997d72b1967bd 100644 (file)
@@ -41,12 +41,15 @@ enum {
     kPCICantSleep                      = 0x00000004
 };
 
+
+
 /* 
  *IOPMrootDomain registry property keys
  */
 #define kRootDomainSupportedFeatures        "Supported Features"
 #define kRootDomainSleepReasonKey           "Last Sleep Reason"
 #define kRootDomainSleepOptionsKey          "Last Sleep Options"
+#define kIOPMRootDomainPowerStatusKey       "Power Status"
 
 /*
  * Possible sleep reasons found under kRootDomainSleepReasonsKey
@@ -115,6 +118,22 @@ public:
     virtual IOReturn setProperties ( OSObject * );
     IOReturn shutdownSystem ( void );
     IOReturn restartSystem ( void );
+
+/*! @function systemPowerEventOccurred
+    @abstract Other drivers may inform IOPMrootDomain of system PM events
+    @discussion systemPowerEventOccurred is a richer alternative to receivePowerNotification()
+        Only Apple-owned kexts should have reason to call systemPowerEventOccurred.
+    @param event An OSSymbol describing the type of power event.
+    @param value A 32-bit integer value associated with the event.
+    @param shouldUpdate indicates whether the root domain should send a notification
+        to interested parties. Pass false if you're calling systemPowerEventOccurred
+        several times in succession; and pass true only on the last invocatino.
+    @result kIOReturnSuccess on success */
+    IOReturn systemPowerEventOccurred(const OSSymbol *event, 
+                                    uint32_t intValue);
+    IOReturn systemPowerEventOccurred(const OSSymbol *event, 
+                                    OSObject *value);
+    
     virtual IOReturn receivePowerNotification (UInt32 msg);
     virtual void setSleepSupported( IOOptionBits flags );
     virtual IOOptionBits getSleepSupported();
index efe64454d498cb6b59fcf4045b50a9b2bea3dda6..5fbfc6715ce16a1f8bb984f31e48bac41f2e7e6d 100644 (file)
@@ -500,7 +500,7 @@ void IOBufferMemoryDescriptor::free()
     IOOptionBits     options   = _options;
     vm_size_t        size      = _capacity;
     void *           buffer    = _buffer;
-    IOVirtualAddress source    = _ranges.v64->address;
+    mach_vm_address_t source   = (_ranges.v) ? _ranges.v64->address : 0;
     IOMemoryMap *    map       = 0;
     vm_offset_t      alignment = _alignment;
 
@@ -524,7 +524,7 @@ void IOBufferMemoryDescriptor::free()
     else if (buffer)
     {
        if (kIOMemoryTypePhysical64 == (flags & kIOMemoryTypeMask))
-           IOFreePhysical((mach_vm_address_t) source, size);
+           IOFreePhysical(source, size);
         else if (options & kIOMemoryPhysicallyContiguous)
             IOKernelFreeContiguous((mach_vm_address_t) buffer, size);
         else if (alignment > 1)
index 9aece35ff8e194ab4e730c36daa1eb869409f06c..75d751afe8b1e2f35508bf44cf3deca8444bccbb 100644 (file)
@@ -263,6 +263,7 @@ IODMACommand::setMemoryDescriptor(const IOMemoryDescriptor *mem, bool autoPrepar
        else
            fInternalState->fCheckAddressing = (fNumAddressBits && (highPage >= (1UL << (fNumAddressBits - PAGE_SHIFT))));
 
+       fInternalState->fNewMD = true;
        mem->retain();
        fMemory = mem;
 
@@ -857,10 +858,11 @@ IODMACommand::genIOVMSegments(InternalSegmentFunction outSegFunc,
     if (offset >= memLength)
        return kIOReturnOverrun;
 
-    if ((offset == internalState->fPreparedOffset) || (offset != state->fOffset)) {
+    if ((offset == internalState->fPreparedOffset) || (offset != state->fOffset) || internalState->fNewMD) {
        state->fOffset                 = 0;
        state->fIOVMAddr               = 0;
        internalState->fNextRemapIndex = 0;
+       internalState->fNewMD          = false;
        state->fMapped                 = (IS_MAPPED(fMappingOptions) && fMapper);
        mdOp                           = kIOMDFirstSegment;
     };
index 4b6a1fdf2ffcce859b225c3dfd200399f52ca840..381022c56a5abe9478b6df56f4919f2946530c74 100644 (file)
@@ -435,15 +435,21 @@ static bool GetUInt32( IORegistryEntry * regEntry, const OSSymbol * name,
         return( false );
 }
 
-IORegistryEntry * IODTFindInterruptParent( IORegistryEntry * regEntry )
+static IORegistryEntry * IODTFindInterruptParent( IORegistryEntry * regEntry, IOItemCount index )
 {
     IORegistryEntry *  parent;
     UInt32             phandle;
+    OSData         *   data;
+    unsigned int       len;
 
-    if( GetUInt32( regEntry, gIODTInterruptParentKey, &phandle))
-        parent = FindPHandle( phandle );
+    if( (data = OSDynamicCast( OSData, regEntry->getProperty( gIODTInterruptParentKey )))
+      && (sizeof(UInt32) <= (len = data->getLength()))) {
+       if (((index + 1) * sizeof(UInt32)) > len)
+           index = 0;
+       phandle = ((UInt32 *) data->getBytesNoCopy())[index];
+       parent = FindPHandle( phandle );
 
-    else if( 0 == regEntry->getProperty( "interrupt-controller"))
+    else if( 0 == regEntry->getProperty( "interrupt-controller"))
         parent = regEntry->getParentEntry( gIODTPlane);
     else
         parent = 0;
@@ -481,8 +487,8 @@ static void IODTGetICellCounts( IORegistryEntry * regEntry,
         *aCellCount = 0;
 }
 
-UInt32 IODTMapOneInterrupt( IORegistryEntry * regEntry, UInt32 * intSpec,
-                               OSData ** spec, const OSSymbol ** controller )
+static UInt32 IODTMapOneInterrupt( IORegistryEntry * regEntry, UInt32 * intSpec, UInt32 index,
+                                   OSData ** spec, const OSSymbol ** controller )
 {
     IORegistryEntry *parent = 0;
     OSData                     *data;
@@ -494,7 +500,7 @@ UInt32 IODTMapOneInterrupt( IORegistryEntry * regEntry, UInt32 * intSpec,
     UInt32                     i, original_icells;
     bool                       cmp, ok = false;
 
-    parent = IODTFindInterruptParent( regEntry );    
+    parent = IODTFindInterruptParent( regEntry, index );    
     IODTGetICellCounts( parent, &icells, &acells );
     addrCmp = 0;
     if( acells) {
@@ -640,11 +646,12 @@ static bool IODTMapInterruptsSharing( IORegistryEntry * regEntry, OSDictionary *
     OSData *           local2;
     UInt32 *           localBits;
     UInt32 *           localEnd;
+    IOItemCount                index;
     OSData *           map;
     OSObject *         oneMap;
     OSArray *          mapped;
     OSArray *          controllerInts;
-    const OSSymbol *   controller;
+    const OSSymbol *   controller = 0;
     OSArray *          controllers;
     UInt32             skip = 1;
     bool               ok, nw;
@@ -666,6 +673,7 @@ static bool IODTMapInterruptsSharing( IORegistryEntry * regEntry, OSDictionary *
 
     localBits = (UInt32 *) local->getBytesNoCopy();
     localEnd = localBits + (local->getLength() / sizeof(UInt32));
+    index = 0;
     mapped = OSArray::withCapacity( 1 );
     controllers = OSArray::withCapacity( 1 );
 
@@ -673,7 +681,7 @@ static bool IODTMapInterruptsSharing( IORegistryEntry * regEntry, OSDictionary *
 
     if( ok) do {
         if( nw) {
-            skip = IODTMapOneInterrupt( regEntry, localBits, &map, &controller );
+            skip = IODTMapOneInterrupt( regEntry, localBits, index, &map, &controller );
             if( 0 == skip) {
                 IOLog("%s: error mapping interrupt[%d]\n",
                         regEntry->getName(), mapped->getCount());
@@ -686,6 +694,7 @@ static bool IODTMapInterruptsSharing( IORegistryEntry * regEntry, OSDictionary *
             controller->retain();
         }
 
+       index++;
         localBits += skip;
         mapped->setObject( map );
         controllers->setObject( controller );
index 030368a72f658f89d554758773eb209f515db31a..ae66cb9b8a91f1deb1f1ca66c46fdd48ad61896d 100644 (file)
@@ -1598,7 +1598,10 @@ IOHibernateSystemWake(void)
                const OSSymbol * sym = OSSymbol::withCStringNoCopy(kIOHibernateRTCVariablesKey);
 
                if (sym) {
-                       gIOOptionsEntry->removeProperty(sym);
+                       if (gIOOptionsEntry->getProperty(sym)) {
+                               gIOOptionsEntry->removeProperty(sym);
+                               gIOOptionsEntry->sync();
+                       }
                        sym->release();
                }
        }
index afa64a600ba7903f76ad52d3342a74a4223b61f8..a21ff00312759dee3e02947fa2844dde2dbe2dfa 100644 (file)
@@ -113,7 +113,7 @@ struct IODMACommandInternal
     UInt8  fCopyContig;
     UInt8  fPrepared;
     UInt8  fDoubleBuffer;
-    UInt8  __pad[1];
+    UInt8  fNewMD;
 
     ppnum_t  fCopyPageAlloc;
     ppnum_t  fCopyPageCount;
index 3c0b8f7e16a09f3bcff06237835240a55efa45da..43321aac12c2367d21165fa7f50bbd9a230fa6fd 100644 (file)
@@ -1017,6 +1017,8 @@ void IOGeneralMemoryDescriptor::free()
            IODelete(_ranges.v64, IOAddressRange, _rangesCount);
        else
            IODelete(_ranges.v, IOVirtualRange, _rangesCount);
+
+       _ranges.v = NULL;
     }
 
     if (reserved && reserved->devicePager)
index 9af5919f41bcdf5ce860109357c90039d9a91eb2..81568ee1e1d5dddbeef7d9361ffd9cd385db595c 100644 (file)
@@ -1076,6 +1076,9 @@ void IOPMrootDomain::powerChangeDone ( unsigned long previousState )
             // re-enable this timer for next sleep
             idleSleepPending = false;
             gSleepOrShutdownPending = 0;
+
+            // Invalidate prior activity tickles to allow wake from doze.
+            if (wrangler) wrangler->changePowerStateTo(0);
             break;
             
        case RESTART_STATE:
@@ -1653,12 +1656,87 @@ void IOPMrootDomain::informCPUStateChange(
 #endif __i386__
 }
 
+//******************************************************************************
+// systemPowerEventOccurred
+//
+// The power controller is notifying us of a hardware-related power management
+// event that we must handle. 
+//
+// systemPowerEventOccurred covers the same functionality that receivePowerNotification
+// does; it simply provides a richer API for conveying more information.
+//******************************************************************************
+IOReturn IOPMrootDomain::systemPowerEventOccurred(
+    const OSSymbol *event,
+    uint32_t intValue)
+{
+    IOReturn        attempt = kIOReturnSuccess;
+    OSNumber        *newNumber = NULL;
+
+    if (!event) 
+        return kIOReturnBadArgument;
+        
+    newNumber = OSNumber::withNumber(intValue, 8*sizeof(intValue));
+    if (!newNumber)
+        return kIOReturnInternalError;
+
+    attempt = systemPowerEventOccurred(event, (OSObject *)newNumber);
+
+    newNumber->release();
+
+    return attempt;
+}
+
+IOReturn IOPMrootDomain::systemPowerEventOccurred(
+    const OSSymbol *event,
+    OSObject *value)
+{
+    OSDictionary *thermalsDict = NULL;
+    bool shouldUpdate = true;
+    
+    if (!event || !value) 
+        return kIOReturnBadArgument;
+
+    // LOCK
+    // We reuse featuresDict Lock because it already exists and guards
+    // the very infrequently used publish/remove feature mechanism; so there's zero rsk
+    // of stepping on that lock.
+    if (featuresDictLock) IOLockLock(featuresDictLock);
+
+    thermalsDict = (OSDictionary *)getProperty(kIOPMRootDomainPowerStatusKey);
+                   
+    if (thermalsDict && OSDynamicCast(OSDictionary, thermalsDict)) {
+        thermalsDict = OSDictionary::withDictionary(thermalsDict);                        
+    } else {
+        thermalsDict = OSDictionary::withCapacity(1);
+    }
+
+    if (!thermalsDict) {
+        shouldUpdate = false;
+        goto exit;
+    }
+
+    thermalsDict->setObject (event, value);
+
+    setProperty (kIOPMRootDomainPowerStatusKey, thermalsDict);
+
+    thermalsDict->release();
+
+exit:
+    // UNLOCK
+    if (featuresDictLock) IOLockUnlock(featuresDictLock);
+
+    if (shouldUpdate)
+        messageClients (kIOPMMessageSystemPowerEventOccurred, (void *)NULL);
+
+    return kIOReturnSuccess;
+}
+
 
 //******************************************************************************
 // receivePowerNotification
 //
 // The power controller is notifying us of a hardware-related power management
-// event that we must handle. This is a result of an 'environment' interrupt from
+// event that we must handle. This may be a result of an 'environment' interrupt from
 // the power mgt micro.
 //******************************************************************************
 
index 03d349e4f8513594a0583e043d665d3d18f548c4..1b53461ecd5b7cf14fb2f7522f1236d0a8be6a86 100644 (file)
@@ -46,6 +46,7 @@
 #include <IOKit/system.h>
 
 #include <libkern/c++/OSContainers.h>
+#include <libkern/crypto/sha1.h>
 
 extern "C" {
 #include <machine/machine_routines.h>
@@ -858,29 +859,57 @@ void PESetGMTTimeOfDay(long secs)
 void IOPlatformExpert::registerNVRAMController(IONVRAMController * caller)
 {
     OSData *          data;
-    IORegistryEntry * nvram;
-    OSString *        string;
+    IORegistryEntry * entry;
+    OSString *        string = 0;
+    char              uuid[ 36 + 1 ];
 
-    nvram = IORegistryEntry::fromPath( "/options", gIODTPlane );
-    if ( nvram )
+    entry = IORegistryEntry::fromPath( "/efi/platform", gIODTPlane );
+    if ( entry )
     {
-        data = OSDynamicCast( OSData, nvram->getProperty( "platform-uuid" ) );
-        if ( data && data->getLength( ) == sizeof( uuid_t ) )
+        data = OSDynamicCast( OSData, entry->getProperty( "system-id" ) );
+        if ( data && data->getLength( ) == 16 )
         {
-            char uuid[ 36 + 1 ];
-            uuid_unparse( ( UInt8 * ) data->getBytesNoCopy( ), uuid );
+            SHA1_CTX     context;
+            uint8_t      digest[ SHA_DIGEST_LENGTH ];
+            const uuid_t space = { 0x2A, 0x06, 0x19, 0x90, 0xD3, 0x8D, 0x44, 0x40, 0xA1, 0x39, 0xC4, 0x97, 0x70, 0x37, 0x65, 0xAC };
 
+            SHA1Init( &context );
+            SHA1Update( &context, space, sizeof( space ) );
+            SHA1Update( &context, data->getBytesNoCopy( ), data->getLength( ) );
+            SHA1Final( digest, &context );
+
+            digest[ 6 ] = ( digest[ 6 ] & 0x0F ) | 0x50;
+            digest[ 8 ] = ( digest[ 8 ] & 0x3F ) | 0x80;
+
+            uuid_unparse( digest, uuid );
             string = OSString::withCString( uuid );
-            if ( string )
-            {
-                getProvider( )->setProperty( kIOPlatformUUIDKey, string );
-                publishResource( kIOPlatformUUIDKey, string );
+        }
 
-                string->release( );
+        entry->release( );
+    }
+
+    if ( string == 0 )
+    {
+        entry = IORegistryEntry::fromPath( "/options", gIODTPlane );
+        if ( entry )
+        {
+            data = OSDynamicCast( OSData, entry->getProperty( "platform-uuid" ) );
+            if ( data && data->getLength( ) == sizeof( uuid_t ) )
+            {
+                uuid_unparse( ( uint8_t * ) data->getBytesNoCopy( ), uuid );
+                string = OSString::withCString( uuid );
             }
+
+            entry->release( );
         }
+    }
+
+    if ( string )
+    {
+        getProvider( )->setProperty( kIOPlatformUUIDKey, string );
+        publishResource( kIOPlatformUUIDKey, string );
 
-        nvram->release( );
+        string->release( );
     }
 
     publishResource("IONVRAM");
@@ -1281,7 +1310,7 @@ IOReturn IOPlatformExpertDevice::setProperties( OSObject * properties )
     object = dictionary->getObject( kIOPlatformUUIDKey );
     if ( object )
     {
-        IORegistryEntry * nvram;
+        IORegistryEntry * entry;
         OSString *        string;
         uuid_t            uuid;
 
@@ -1294,11 +1323,11 @@ IOReturn IOPlatformExpertDevice::setProperties( OSObject * properties )
         status = uuid_parse( string->getCStringNoCopy( ), uuid );
         if ( status != 0 ) return kIOReturnBadArgument;
 
-        nvram = IORegistryEntry::fromPath( "/options", gIODTPlane );
-        if ( nvram )
+        entry = IORegistryEntry::fromPath( "/options", gIODTPlane );
+        if ( entry )
         {
-            nvram->setProperty( "platform-uuid", uuid, sizeof( uuid_t ) );
-            nvram->release( );
+            entry->setProperty( "platform-uuid", uuid, sizeof( uuid_t ) );
+            entry->release( );
         }
 
         setProperty( kIOPlatformUUIDKey, string );
index 7621a257f032f718745f1ea39427ba2a29220f67..895f27b986fe2c1107991ca9540746ea680e6991 100644 (file)
@@ -43,6 +43,7 @@ extern "C" {
 
 extern dev_t mdevadd(int devid, ppnum_t base, unsigned int size, int phys);
 extern dev_t mdevlookup(int devid);
+extern void mdevremoveall(void);
 
 kern_return_t
 IOKitBSDInit( void )
@@ -776,14 +777,19 @@ iofrootx:
 void IOSecureBSDRoot(const char * rootName)
 {
 #if CONFIG_EMBEDDED
+    IOReturn         result;
     IOPlatformExpert *pe;
-    const OSSymbol *functionName = OSSymbol::withCStringNoCopy("SecureRootName");
+    const OSSymbol   *functionName = OSSymbol::withCStringNoCopy("SecureRootName");
     
     while ((pe = IOService::getPlatform()) == 0) IOSleep(1 * 1000);
     
-    pe->callPlatformFunction(functionName, false, (void *)rootName, (void *)0, (void *)0, (void *)0);
+    // Returns kIOReturnNotPrivileged is the root device is not secure.
+    // Returns kIOReturnUnsupported if "SecureRootName" is not implemented.
+    result = pe->callPlatformFunction(functionName, false, (void *)rootName, (void *)0, (void *)0, (void *)0);
     
     functionName->release();
+    
+    if (result == kIOReturnNotPrivileged) mdevremoveall();
 #endif
 }
 
index bf820b20ea9894b34be4af50c5a7b5b6c2cf6f8f..ae3f0e88be400d4b0814dfa9954af1816e53c76b 100644 (file)
@@ -63,7 +63,6 @@ options               IOKITCPP        # C++ implementation            # <iokitcpp>
 options                KDEBUG          # kernel tracing                # <kdebug>
 options                NETWORKING      # kernel networking             # <networking>
 options                CRYPTO          # want crypto code              # <crypto>
-options                KPIDIRECT       # direct access                 # <kpidirect>
 options     CONFIG_DTRACE # enable dtrace       # <config_dtrace>
 
 #makeoptions   LIBDRIVER = "libDriver_kern.o"                  # <libdriver>
index dea0585dcaf72fd2d61ecd323a4e5172d208266e..3b87d080aace5a15884892ef9cec913a4204c4c1 100644 (file)
@@ -3,7 +3,6 @@
 OPTIONS/iokitcpp                                       optional iokitcpp
 OPTIONS/kdebug                                         optional kdebug
 OPTIONS/networking                                     optional networking
-OPTIONS/kpidirect                                      optional kpidirect
 OPTIONS/hibernation                                    optional hibernation
 OPTIONS/crypto                                         optional crypto
 OPTIONS/config_dtrace                          optional config_dtrace
index f0f7e3df4b260f92914557b18fdd333e90469f31..8da2cc9b22cce7300b5e202799a6ab2dc39cc99f 100644 (file)
--- a/kgmacros
+++ b/kgmacros
@@ -5383,3 +5383,169 @@ document showMCAstate
 Syntax: showMCAstate
 | Print machine-check register state after MC exception.
 end
+
+define _pt_step
+    #
+    # Step to lower-level page table and print attributes
+    #   $kgm_pt_paddr: current page table entry physical address
+    #   $kgm_pt_index: current page table entry index (0..511)
+    # returns
+    #   $kgm_pt_paddr: next level page table entry physical address
+    #                  or null if invalid
+    # For $kgm_pt_verbose = 0: print nothing
+    #                       1: print basic information
+    #                       2: print basic information and hex table dump
+    # The trickery with kdp_src_high32 is required for accesses above 4GB.
+    #
+    set $kgm_entryp = $kgm_pt_paddr + 8*$kgm_pt_index
+    set kdp_src_high32 = $kgm_pt_paddr >> 32
+    set kdp_trans_off = 1
+    set $entry =  *(pt_entry_t *)($kgm_entryp & 0x0ffffffffULL)
+    if $kgm_pt_verbose == 2
+        x/512g ($kgm_pt_paddr & 0x0ffffffffULL)
+    end
+    set kdp_trans_off = 0
+    set kdp_src_high32 = 0
+    set $kgm_paddr_mask = ~((0xffffULL<<48) | 0xfffULL)
+    if $kgm_pt_verbose == 0
+        if $entry & (0x1 << 0)
+            set $kgm_pt_paddr = $entry & $kgm_paddr_mask
+        else
+            set $kgm_pt_paddr = 0
+        end
+    else
+        printf "0x%016llx:\n\t0x%016llx\n\t", $kgm_entryp, $entry
+        if $entry & (0x1 << 0)
+            printf "valid"     
+            set $kgm_pt_paddr = $entry & $kgm_paddr_mask
+        else
+            printf "invalid"
+            set $kgm_pt_paddr = 0
+        end
+        if $entry & (0x1 << 1)
+            printf " writeable" 
+        else
+            printf " read-only" 
+        end
+        if $entry & (0x1 << 2)
+            printf " user" 
+        else
+            printf " supervisor" 
+        end
+        if $entry & (0x1 << 3)
+            printf " PWT" 
+        end
+        if $entry & (0x1 << 4)
+            printf " PCD" 
+        end
+        if $entry & (0x1 << 5)
+            printf " accessed" 
+        end
+        if $entry & (0x1 << 6)
+            printf " dirty" 
+        end
+        if $entry & (0x1 << 7)
+            printf " PAT" 
+        end
+        if $entry & (0x1 << 8)
+            printf " global" 
+        end
+        if $entry & (0x3 << 9)
+            printf " avail:0x%x", ($entry >> 9) & 0x3
+        end
+        if $entry & (0x1 << 63)
+            printf " noexec" 
+        end
+        printf "\n"
+    end
+end
+
+define _pmap_walk
+    set $kgm_pmap = (pmap_t) $arg0
+    set $kgm_vaddr = $arg1
+    set $kgm_pt_paddr = $kgm_pmap->pm_cr3
+    if $kgm_pt_paddr && cpu_64bit
+        set $kgm_pt_index = ($kgm_vaddr >> 39) & 0x1ffULL
+        if $kgm_pt_verbose
+            printf "pml4 (index %d):\n", $kgm_pt_index
+        end
+        _pt_step
+    end
+    if $kgm_pt_paddr
+        set $kgm_pt_index = ($kgm_vaddr >> 30) & 0x1ffULL
+        if $kgm_pt_verbose
+            printf "pdpt (index %d):\n", $kgm_pt_index
+        end
+        _pt_step
+    end
+    if $kgm_pt_paddr
+        set $kgm_pt_index = ($kgm_vaddr >> 21) & 0x1ffULL
+        if $kgm_pt_verbose
+            printf "pdt (index %d):\n", $kgm_pt_index
+        end
+        _pt_step
+    end
+    if $kgm_pt_paddr
+        set $kgm_pt_index = ($kgm_vaddr >> 12) & 0x1ffULL
+        if $kgm_pt_verbose
+            printf "pt (index %d):\n", $kgm_pt_index
+        end
+        _pt_step
+    end
+    if $kgm_pt_paddr
+        set $kgm_paddr = $kgm_pt_paddr + ($kgm_vaddr & 0xfffULL)
+        set kdp_trans_off = 1
+        set kdp_src_high32 = $kgm_paddr >> 32
+        set $kgm_value = *($kgm_paddr & 0x0ffffffffULL)
+        set kdp_trans_off = 0
+        set kdp_src_high32 = 0
+        printf "phys 0x%016llx: 0x%08x\n", $kgm_paddr, $kgm_value
+    else
+        set $kgm_paddr = 0
+        printf "(no translation)\n"
+    end
+end
+
+define pmap_walk
+    if $kgm_mtype != 7
+        printf "Not available for current architecture.\n"
+    else
+        if $argc != 2
+            printf "pmap_walk <pmap> <vaddr>\n"
+        else
+            if !$kgm_pt_verbose
+                set $kgm_pt_verbose = 1
+            else
+                if $kgm_pt_verbose != 2
+                    set $kgm_pt_verbose = 1
+                end
+            end
+            _pmap_walk $arg0 $arg1
+        end
+    end
+end
+
+document pmap_walk
+Syntax: (gdb) pmap_walk <pmap> <virtual_address>
+| Perform a page-table walk in <pmap> for <virtual_address>.
+| Set $kgm_pt_verbose=2 for full hex dump of page tables.
+end
+
+define pmap_vtop
+    if $kgm_mtype != 7
+        printf "Not available for current architecture.\n"
+    else
+        if $argc != 2
+            printf "pmap_vtop <pamp> <vaddr>\n"
+        else
+            set $kgm_pt_verbose = 0
+            _pmap_walk $arg0 $arg1
+        end
+    end
+end
+
+document pmap_vtop
+Syntax: (gdb) pmap_vtop <pmap> <virtual_address>
+| For page-tables in <pmap> translate <virtual_address> to physical address.
+end
+
index 9b193ef21e43af5f2af50f69644af8c18b686ebd..ab642795b8be35c0845b16975e090bcc8bd73a05 100644 (file)
@@ -43,7 +43,7 @@ MAKEOBJDIR ?= ${OBJROOT}
 # add version string
 SRCS += libsyscall_version.c
 libsyscall_version.c:
-       /Developer/Makefiles/bin/version.pl Libsyscall > $@
+       ${NEXT_ROOT}/Developer/Makefiles/bin/version.pl Libsyscall > $@
 
 CFLAGS += -I${SYMROOT}
 .include "${.CURDIR}/Makefile.inc"
index 62721adbded3f399cbf79d32ce8f7a3def2ebfdd..4b3d8d543e91dbb7ab11160ca6a122996776a021 100644 (file)
@@ -94,9 +94,15 @@ PRIVHDRSPPC = ${PRIVHDRS}/architecture/ppc
 KERNELFRAMEWORK = ${DESTDIR}/System/Library/Frameworks/Kernel.framework
 PRIVKERNELHDRS = ${KERNELFRAMEWORK}/Versions/A/PrivateHeaders
 
+.if ${MACHINE_ARCH} == armv6
+ARCHDIR = arm
+.else
+ARCHDIR = ${MACHINE_ARCH}
+.endif
+
 installhdrs-md: gen_md_mig_defs
-       mkdir -p ${INCDIR}/mach/${MACHINE_ARCH}
-       ${INSTALL} -o 0 -c -m 444 ${MD_MIGHDRS} ${INCDIR}/mach/${MACHINE_ARCH}
+       mkdir -p ${INCDIR}/mach/${ARCHDIR}
+       ${INSTALL} -o 0 -c -m 444 ${MD_MIGHDRS} ${INCDIR}/mach/${ARCHDIR}
        mkdir -p ${PRIVHDRSPPC}
        ${INSTALL} -c -m 444 ${PRIVHDRSPPCHDRS} ${PRIVHDRSPPC}
 
index f54f344ed8d1462ebba06466ff057fa1566945ea..83bf17c1f40ea3f03014a2d8b4c9973a3dbecd49 100755 (executable)
@@ -102,7 +102,7 @@ my %TypeBytes = (
 
 ##########################################################################
 # Make a __xxx.s file: if it exists in the $CustomDir, just copy it, otherwise
-# create one.  We define the macro __SYSCALL_I386_ARG_BYTES so that SYS.h could
+# create one.  We define the macro __SYSCALL_32BIT_ARG_BYTES so that SYS.h could
 # use that to define __SYSCALL dependent on the arguments' total size.
 ##########################################################################
 sub make_s {
@@ -119,7 +119,7 @@ sub make_s {
     } else {
        my $f = IO::File->new($path, 'w');
        die "$MyName: $path: $!\n" unless defined($f);
-       print $f "#define __SYSCALL_I386_ARG_BYTES $bytes\n\n";
+       print $f "#define __SYSCALL_32BIT_ARG_BYTES $bytes\n\n";
        print $f "#include \"SYS.h\"\n\n";
        print $f "__SYSCALL($pseudo, $name, $args)\n";
        print "Creating $path\n";
index 53039d9e3e728e099a10f37a56153c65d8218a35..af9074020ffef70bbcf9a1ec935478c94f0de47d 100644 (file)
@@ -138,14 +138,14 @@ LEAF(_##name, 0)                                  ;\
        BRANCH_EXTERN(cerror)                           ;\
 2:
 
-#if defined(__SYSCALL_I386_ARG_BYTES) && ((__SYSCALL_I386_ARG_BYTES >= 4) && (__SYSCALL_I386_ARG_BYTES <= 20))
+#if defined(__SYSCALL_32BIT_ARG_BYTES) && ((__SYSCALL_32BIT_ARG_BYTES >= 4) && (__SYSCALL_32BIT_ARG_BYTES <= 20))
 #define UNIX_SYSCALL_NONAME(name, nargs)                       \
-       movl    $(SYS_##name | (__SYSCALL_I386_ARG_BYTES << I386_SYSCALL_ARG_BYTES_SHIFT)), %eax                ;\
+       movl    $(SYS_##name | (__SYSCALL_32BIT_ARG_BYTES << I386_SYSCALL_ARG_BYTES_SHIFT)), %eax               ;\
        UNIX_SYSCALL_SYSENTER                                   ;\
        jnb     2f                                              ;\
        BRANCH_EXTERN(cerror)                                   ;\
 2:
-#else /* __SYSCALL_I386_ARG_BYTES < 4 || > 20 */
+#else /* __SYSCALL_32BIT_ARG_BYTES < 4 || > 20 */
 #define UNIX_SYSCALL_NONAME(name, nargs)               \
        .globl  cerror                                  ;\
        movl    $ SYS_##name, %eax                      ;\
index 4efe509ce606f8f7855737aecbafbb827b4355d5..40048e71ee8b578ba8a7dd5515a304d4c28bef43 100644 (file)
@@ -1,6 +1,11 @@
 # machine-dependent mach sources
-.if exists(${.CURDIR}/mach/${MACHINE_ARCH}/Makefile.inc)
-.include "${.CURDIR}/mach/${MACHINE_ARCH}/Makefile.inc"
+.if ${MACHINE_ARCH} == armv6
+ARCHDIR = arm
+.else
+ARCHDIR = ${MACHINE_ARCH}
+.endif
+.if exists(${.CURDIR}/mach/${ARCHDIR}/Makefile.inc)
+.include "${.CURDIR}/mach/${ARCHDIR}/Makefile.inc"
 .endif
 
 .PATH: ${.CURDIR}/mach
index b08213bc0f27992a2fdefe4b974a32bc1df23f65..0d4989a32099eeab41c0853bcfff146663d45fa2 100644 (file)
@@ -84,47 +84,10 @@ $(error There were $(words $(KERNEL_CONFIG)) parameters passed to KERNEL_CONFIG
                Are you sure? To specify multiple configurations please use KERNEL_CONFIGS)
 endif
 
-#
-# Machine Configuration options  
-#
-# ppc supported configurations : none
-# i386 supported configurations : none
-# arm supported configurations : LN2410SBC MX31ADS INTEGRATORCP S5I3000SMDK S5L8900XFPGA S5L8900XRB OLOCREEK
-#
-ifndef SUPPORTED_MACHINE_CONFIGS
-export SUPPORTED_MACHINE_CONFIGS = LN2410SBC MX31ADS INTEGRATORCP S5I3000SMDK S5L8900XFPGA S5L8900XRB OLOCREEK DEFAULT
-endif
-
-export DEFAULT_ARM_MACHINE_CONFIG      = S5L8900XRB
-
 ifndef MACHINE_CONFIG
 export MACHINE_CONFIG  = DEFAULT
 endif
 
-ifndef MACHINE_FLAGS_LN2410SBC
-export MACHINE_FLAGS_LN2410SBC = -DARM_BOARD_CONFIG_LN2410_920T
-endif
-ifndef MACHINE_FLAGS_MX31ADS
-export MACHINE_FLAGS_MX31ADS = -DARM_BOARD_CONFIG_MX31ADS_1136JFS
-endif
-ifndef MACHINE_FLAGS_INTEGRATORCP
-export MACHINE_FLAGS_INTEGRATORCP = -DARM_BOARD_CONFIG_INTEGRATORCP_1136JFS
-endif
-ifndef MACHINE_FLAGS_S5I3000SMDK
-export MACHINE_FLAGS_S5I3000SMDK = -DARM_BOARD_CONFIG_S5I3000SMDK_1176JZFS
-endif
-ifndef MACHINE_FLAGS_S5L8900XFPGA
-export MACHINE_FLAGS_S5L8900XFPGA = -DARM_BOARD_CONFIG_S5L8900XFPGA_1136JFS
-endif
-ifndef MACHINE_FLAGS_S5L8900XRB
-export MACHINE_FLAGS_S5L8900XRB = -DARM_BOARD_CONFIG_S5L8900XRB
-endif
-ifndef MACHINE_FLAGS_OLOCREEK
-export MACHINE_FLAGS_OLOCREEK = -DARM_BOARD_CONFIG_OLOCREEK
-endif
-ifndef MACHINE_FLAGS_DEFAULT
-export MACHINE_FLAGS_DEFAULT =
-endif
 
 #
 # Target configuration options.  NOTE - target configurations will 
@@ -234,13 +197,6 @@ ARCH_FLAGS_PPC               = -arch ppc
 ARCH_FLAGS_I386                  = -arch i386
 ARCH_FLAGS_ARM           = $($(addsuffix $(MACHINE_CONFIG),ARCH_FLAGS_ARM_))
 
-ARCH_FLAGS_ARM_LN2410SBC         = -arch arm
-ARCH_FLAGS_ARM_MX31ADS           = -arch armv6
-ARCH_FLAGS_ARM_INTEGRATORCP      = -arch armv6
-ARCH_FLAGS_ARM_S5I3000SMDK       = -arch armv6
-ARCH_FLAGS_ARM_S5L8900XFPGA      = -arch armv6
-ARCH_FLAGS_ARM_S5L8900XRB        = -arch armv6
-ARCH_FLAGS_ARM_OLOCREEK                  = -arch arm
 
 #
 # Default CFLAGS
@@ -290,6 +246,12 @@ endif
 ifeq (-arch armv6,$(ARCH_FLAGS_ARM))
 CFLAGS_ARM             += -mthumb
 endif
+ifeq (-arch armv5,$(ARCH_FLAGS_ARM))
+CFLAGS_ARM             += -mthumb
+endif
+ifeq (-arch xscale,$(ARCH_FLAGS_ARM))
+CFLAGS_ARM             += -mthumb
+endif
 
 export CFLAGS_RELEASEPPC = -O2 -mcpu=750 -mmultiple
 export CFLAGS_RELEASE_TRACEPPC = -O2 -mcpu=750 -mmultiple
index 01731eae512f17973515805f2de6067eb25100d6..5eb745c8764b28d59950ff1a092d4f2c16060e13 100644 (file)
@@ -58,5 +58,9 @@ options               PAE
 options                X86_64
 options                DISPATCH_COUNTS
 
+#
+# Note: MAC options must be set in all the bsd/conf, osfmk/conf, and 
+# security/conf MASTER files.
+#
 options                CONFIG_MACF             # Mandatory Access Control Framework
 #options       CONFIG_MACF_MACH        # MACF applied to Mach services
index d655eea9eb459fb3c44af5db9343981434606870..09dfbf8ee20e6d2fc66284803313c730417ac65e 100644 (file)
@@ -58,5 +58,9 @@ options               POWERMAC
 
 options                DISPATCH_COUNTS
 
+#
+# Note: MAC options must be set in all the bsd/conf, osfmk/conf, and 
+# security/conf MASTER files.
+#
 options                CONFIG_MACF             # Mandatory Access Control Framework
 #options       CONFIG_MACF_MACH        # MACF applied to Mach services
index eb86e791ea75c70b5e8bdfcfe11e058e79e3575e..6743dc70bf83ea8bd428a79a0aee71bfca166cf0 100644 (file)
@@ -670,11 +670,13 @@ panic_io_port_read(void) {
 /* For use with the MP rendezvous mechanism
  */
 
+#if !CONFIG_EMBEDDED
 static void
 machine_halt_cpu(__unused void *arg) {
        panic_io_port_read();
        pmCPUHalt(PM_HALT_DEBUG);
 }
+#endif
 
 void
 Debugger(
index 539a82fde9ad53133d19ec6bd0bbb138c66f507c..64c21447ee624d97d1b02db5b7121f19c86a5590 100644 (file)
@@ -173,7 +173,11 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon)
        acpi_sleep_cpu(func, refcon);
 #endif
 
-       /* reset UART if kprintf is enabled */
+       /* Reset UART if kprintf is enabled.
+        * However kprintf should not be used before rtc_sleep_wakeup()
+        * for compatibility with firewire kprintf.
+        */
+
        if (FALSE == disable_serial_output)
                serial_init();
 
index 52c70d903079b8131928214473641bca8a339ba7..5ae9621d52d8dbc20d304fdf936dde3921c17ccf 100644 (file)
@@ -280,24 +280,6 @@ hpet_init(void)
        DBG(" CVT: HPET to BUS = %08X.%08X\n",
            (uint32_t)(hpet2bus >> 32), (uint32_t)hpet2bus);
 
-       /* Make sure the counter is off in the HPET configuration flags */
-       uint64_t hpetcon = ((hpetReg_t *)hpetArea)->GEN_CONF;
-       hpetcon = hpetcon & ~1;
-       ((hpetReg_t *)hpetArea)->GEN_CONF = hpetcon;
-
-       /*
-        * Convert current TSC to HPET value,
-        * set it, and start it ticking.
-        */
-       uint64_t currtsc = rdtsc64();
-       uint64_t tscInHPET = tmrCvt(currtsc, tsc2hpet);
-       ((hpetReg_t *)hpetArea)->MAIN_CNT = tscInHPET;
-       hpetcon = hpetcon | 1;
-       ((hpetReg_t *)hpetArea)->GEN_CONF = hpetcon;
-       kprintf("HPET started: TSC = %08X.%08X, HPET = %08X.%08X\n", 
-               (uint32_t)(currtsc >> 32), (uint32_t)currtsc,
-               (uint32_t)(tscInHPET >> 32), (uint32_t)tscInHPET);
-
 #if MACH_KDB
        db_display_hpet((hpetReg_t *)hpetArea); /* (BRINGUP) */
 #endif
@@ -317,8 +299,13 @@ hpet_get_info(hpetInfo_t *info)
     info->hpet2tsc   = hpet2tsc;
     info->bus2hpet   = bus2hpet;
     info->hpet2bus   = hpet2bus;
-    info->rcbaArea   = rcbaArea;
-    info->rcbaAreap  = rcbaAreap;
+    /*
+     * XXX
+     * We're repurposing the rcbaArea so we can use the HPET.
+     * Eventually we'll rename this correctly.
+     */
+    info->rcbaArea   = hpetArea;
+    info->rcbaAreap  = hpetAreap;
 }
 
 
index 8abf223f975612d1ec7ccbc3a9700b81ba8dcc30..214a588b75fdd7542a038493466fd19238952a67 100644 (file)
@@ -308,11 +308,18 @@ mca_dump(void)
 {
        ia32_mcg_status_t       status;
 
-       mca_exception_taken = TRUE;
        mca_save_state();
 
-       /* Serialize in case of multiple simultaneous machine-checks */
+       /*
+        * Serialize in case of multiple simultaneous machine-checks.
+        * Only the first caller is allowed to print MCA registers.
+        */
        simple_lock(&mca_lock);
+       if (mca_exception_taken) {
+               simple_unlock(&mca_lock);
+               return;
+       }
+       mca_exception_taken = TRUE;
 
        /*
         * Report machine-check capabilities:
index 8d9036a15e97939900af2a7c87b881d613eae752..93d45455d07cf6f655febdeafb2f1b8c9ce3b59e 100644 (file)
@@ -126,7 +126,7 @@ extern void rtc_clock_stepping(
 extern void    rtc_clock_stepped(
                        uint32_t new_frequency,
                        uint32_t old_frequency);
-extern void    rtc_clock_napped(uint64_t);
+extern void    rtc_clock_napped(uint64_t, uint64_t);
 
 extern void     x86_lowmem_free(void);
 
index 12a071c2ccb87aeaac1fc854fca617951c348c83..75bbe25cf81c27614e5e01648f8d09f8e01ed301 100644 (file)
@@ -457,8 +457,11 @@ fast_syscall_init64(void)
         */
        wrmsr64(MSR_IA32_KERNEL_GS_BASE,
                UBER64((unsigned long)current_cpu_datap()));
+
+#if ONLY_SAFE_FOR_LINDA_SERIAL
        kprintf("fast_syscall_init64() KERNEL_GS_BASE=0x%016llx\n",
                rdmsr64(MSR_IA32_KERNEL_GS_BASE));
+#endif
 }
 
 /*
@@ -725,7 +728,9 @@ cpu_desc_load64(cpu_data_t *cdp)
        
        ml_load_desc64();
 
+#if ONLY_SAFE_FOR_LINDA_SERIAL
        kprintf("64-bit descriptor tables loaded\n");
+#endif
 }
 
 void
index 36dae2f3ef138edd719cd43106ceea56865a6a75..72ecf5f761a771085e94ed83c0f383deb23f8530 100644 (file)
@@ -221,7 +221,11 @@ void dump_4GB_pdpt_thread(thread_t tp);
 #define        iswired(pte)    ((pte) & INTEL_PTE_WIRED)
 
 int nx_enabled = 1;                    /* enable no-execute protection */
+#ifdef CONFIG_EMBEDDED
+int allow_data_exec  = 0;      /* no exec from data, embedded is hardcore like that */
+#else
 int allow_data_exec  = VM_ABI_32;      /* 32-bit apps may execute data by default, 64-bit apps may not */
+#endif
 int allow_stack_exec = 0;              /* No apps may execute from the stack by default */
 
 int cpu_64bit  = 0;
index bad2abbe78eea436909336b8c76c54a5f1cd1a6c..a1784f3bfcc9dc2d4116ba74807a08e921366e46 100644 (file)
@@ -107,6 +107,28 @@ extern uint64_t            _rtc_nanotime_read(
 rtc_nanotime_t rtc_nanotime_info = {0,0,0,0,1,0};
 
 
+/*
+ * tsc_to_nanoseconds:
+ *
+ * Basic routine to convert a raw 64 bit TSC value to a
+ * 64 bit nanosecond value.  The conversion is implemented
+ * based on the scale factor and an implicit 32 bit shift.
+ */
+static inline uint64_t
+_tsc_to_nanoseconds(uint64_t value)
+{
+    asm volatile("movl %%edx,%%esi     ;"
+                "mull  %%ecx           ;"
+                "movl  %%edx,%%edi     ;"
+                "movl  %%esi,%%eax     ;"
+                "mull  %%ecx           ;"
+                "addl  %%edi,%%eax     ;"      
+                "adcl  $0,%%edx         "
+                : "+A" (value) : "c" (rtc_nanotime_info.scale) : "esi", "edi");
+
+    return (value);
+}
+
 static uint32_t
 deadline_to_decrementer(
        uint64_t        deadline,
@@ -234,26 +256,31 @@ rtc_nanotime_read(void)
 /*
  * rtc_clock_napped:
  *
- * Invoked from power manangement when we have awoken from a nap (C3/C4)
- * during which the TSC lost counts.  The nanotime data is updated according
- * to the provided value which indicates the number of nanoseconds that the
- * TSC was not counting.
- *
- * The caller must guarantee non-reentrancy.
+ * Invoked from power management when we exit from a low C-State (>= C4)
+ * and the TSC has stopped counting.  The nanotime data is updated according
+ * to the provided value which represents the new value for nanotime.
  */
 void
-rtc_clock_napped(
-       uint64_t                delta)
+rtc_clock_napped(uint64_t base, uint64_t tsc_base)
 {
        rtc_nanotime_t  *rntp = &rtc_nanotime_info;
-       uint32_t        generation;
+       uint64_t        oldnsecs;
+       uint64_t        newnsecs;
+       uint64_t        tsc;
 
        assert(!ml_get_interrupts_enabled());
-       generation = rntp->generation;
-       rntp->generation = 0;
-       rntp->ns_base += delta;
-       rntp->generation = ((generation + 1) != 0) ? (generation + 1) : 1;
-       rtc_nanotime_set_commpage(rntp);
+       tsc = rdtsc64();
+       oldnsecs = rntp->ns_base + _tsc_to_nanoseconds(tsc - rntp->tsc_base);
+       newnsecs = base + _tsc_to_nanoseconds(tsc - tsc_base);
+       
+       /*
+        * Only update the base values if time using the new base values
+        * is later than the time using the old base values.
+        */
+       if (oldnsecs < newnsecs) {
+           _rtc_nanotime_store(tsc_base, base, rntp->scale, rntp->shift, rntp);
+           rtc_nanotime_set_commpage(rntp);
+       }
 }
 
 void
index 0998530b133c5ac35168fd235fc10fde88576c45..b252c496adba2e4767cfcda56f7b82d82ec9ddb5 100644 (file)
@@ -115,7 +115,9 @@ cpu_IA32e_enable(cpu_data_t *cdp)
                : "i" (CR0_PG)
                : "eax" );
        
+#if ONLY_SAFE_FOR_LINDA_SERIAL
        kprintf("cpu_IA32e_enable(%p)\n", cdp);
+#endif
 
        if ((rdmsr64(MSR_IA32_EFER) & MSR_IA32_EFER_LMA) == 0)
                panic("cpu_IA32e_enable() MSR_IA32_EFER_LMA not asserted");
index 85a82e8804aa8fdfa5726ea826e02d9fb5990b85..badd5491f21873ccf374850ee38bb1233aec18e3 100644 (file)
@@ -79,7 +79,7 @@
 #include <i386/eflags.h>
 
 /*
- *     i386_saved_state:
+ *     x86_saved_state32/64:
  *
  *     Has been exported to servers.  See: mach/i386/thread_status.h
  *
index 724bb0f9f7332c2ed8444556f335c0f49afb2f0e..19b7469a694cf38327e495de4d922f581eee6583 100644 (file)
@@ -160,13 +160,16 @@ tsc_init(void)
         * Get the TSC increment.  The TSC is incremented by this
         * on every bus tick.  Calculate the TSC conversion factors
         * to and from nano-seconds.
+        * The tsc granularity is also called the "bus ratio". If the N/2 bit
+        * is set this indicates the bus ration is 0.5 more than this - i.e.
+        * that the true bus ratio is (2*tscGranularity + 1)/2.
         */
        if (cpuid_info()->cpuid_family == CPU_FAMILY_PENTIUM_M) {
                uint64_t        prfsts;
 
                prfsts = rdmsr64(IA32_PERF_STS);
                tscGranularity = (uint32_t)bitfield(prfsts, 44, 40);
-               N_by_2_bus_ratio = prfsts & bit(46);
+               N_by_2_bus_ratio = (prfsts & bit(46)) != 0;
 
        } else {
                panic("rtclock_init: unknown CPU family: 0x%X\n",
@@ -174,20 +177,20 @@ tsc_init(void)
        }
 
        if (N_by_2_bus_ratio)
-               tscFCvtt2n = busFCvtt2n * 2 / (uint64_t)tscGranularity;
+               tscFCvtt2n = busFCvtt2n * 2 / (1 + 2*tscGranularity);
        else
-               tscFCvtt2n = busFCvtt2n / (uint64_t)tscGranularity;
+               tscFCvtt2n = busFCvtt2n / tscGranularity;
 
        tscFreq = ((1 * Giga)  << 32) / tscFCvtt2n;
        tscFCvtn2t = 0xFFFFFFFFFFFFFFFFULL / tscFCvtt2n;
 
        kprintf(" TSC: Frequency = %6d.%04dMHz, "
-                       "cvtt2n = %08X.%08X, cvtn2t = %08X.%08X, gran = %lld\n",
+                       "cvtt2n = %08X.%08X, cvtn2t = %08X.%08X, gran = %lld%s\n",
                        (uint32_t)(tscFreq / Mega),
                        (uint32_t)(tscFreq % Mega), 
                        (uint32_t)(tscFCvtt2n >> 32), (uint32_t)tscFCvtt2n,
                        (uint32_t)(tscFCvtn2t >> 32), (uint32_t)tscFCvtn2t,
-                       tscGranularity);
+                       tscGranularity, N_by_2_bus_ratio ? " (N/2)" : "");
 
        /*
         * Calculate conversion from BUS to TSC
index 84674a9901df498de5daaab1cb0b15665e288b30..29eebc4a8523d2aa59374598f7a5a21d8903af57 100644 (file)
@@ -53,8 +53,6 @@ extern int setPop(uint64_t time);
 
 extern void etimer_resync_deadlines(void);
 
-extern uint32_t rtclock_tick_interval;
-
 #if 0 /* this is currently still MD */
 #pragma pack(push,4)
 struct rtclock_timer_t  {
index 064a58252f081420534e3223fc1ed42272955c20..5f9d4d80ae7ce6dcee66094c8aacdf5c27b941e6 100644 (file)
@@ -561,25 +561,20 @@ lck_mtx_lock_wait (
        priority = self->sched_pri;
        if (priority < self->priority)
                priority = self->priority;
-       if (priority > MINPRI_KERNEL)
-               priority = MINPRI_KERNEL;
-       else
        if (priority < BASEPRI_DEFAULT)
                priority = BASEPRI_DEFAULT;
 
        thread_lock(holder);
        if (mutex->lck_mtx_pri == 0)
                holder->promotions++;
-       if (holder->priority < MINPRI_KERNEL) {
-               holder->sched_mode |= TH_MODE_PROMOTED;
-               if (    mutex->lck_mtx_pri < priority   &&
+       holder->sched_mode |= TH_MODE_PROMOTED;
+       if (            mutex->lck_mtx_pri < priority   &&
                                holder->sched_pri < priority            ) {
-                       KERNEL_DEBUG_CONSTANT(
-                               MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
+               KERNEL_DEBUG_CONSTANT(
+                       MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
                                        holder->sched_pri, priority, (int)holder, (int)lck, 0);
 
-                       set_sched_pri(holder, priority);
-               }
+               set_sched_pri(holder, priority);
        }
        thread_unlock(holder);
        splx(s);
@@ -654,15 +649,13 @@ lck_mtx_lock_acquire(
 
                thread_lock(thread);
                thread->promotions++;
-               if (thread->priority < MINPRI_KERNEL) {
-                       thread->sched_mode |= TH_MODE_PROMOTED;
-                       if (thread->sched_pri < priority) {
-                               KERNEL_DEBUG_CONSTANT(
-                                       MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
+               thread->sched_mode |= TH_MODE_PROMOTED;
+               if (thread->sched_pri < priority) {
+                       KERNEL_DEBUG_CONSTANT(
+                               MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
                                                thread->sched_pri, priority, 0, (int)lck, 0);
 
-                               set_sched_pri(thread, priority);
-                       }
+                       set_sched_pri(thread, priority);
                }
                thread_unlock(thread);
                splx(s);
index 2c7b7f2549193829d853a9000ed2abbf452fda39..779855296c0bc64dcc0232e41c2563f261784c68 100644 (file)
@@ -106,9 +106,7 @@ hertz_tick(
 #endif
 {
        processor_t             processor = current_processor();
-#if !GPROF
        thread_t                thread = current_thread();
-#endif
        timer_t                 state;
 
        if (usermode) {
@@ -117,8 +115,11 @@ hertz_tick(
                state = &PROCESSOR_DATA(processor, user_state);
        }
        else {
-               TIMER_BUMP(&thread->system_timer, ticks);
-
+               /* If this thread is idling, do not charge that time as system time */
+               if ((thread->state & TH_IDLE) == 0) {
+                       TIMER_BUMP(&thread->system_timer, ticks);
+               }
+        
                if (processor->state == PROCESSOR_IDLE)
                        state = &PROCESSOR_DATA(processor, idle_state);
                else
index 4f08fd378bf2d1e017a7febbb7d0fe4cc4e60367..6564cc97c8f34f96bce28d3fa37808891f0d08db 100644 (file)
@@ -96,7 +96,7 @@ thread_quantum_expire(
        /*
         *      Check for fail-safe trip.
         */
-       if (!(thread->sched_mode & TH_MODE_TIMESHARE)) {
+       if (!(thread->sched_mode & (TH_MODE_TIMESHARE|TH_MODE_PROMOTED))) {
                uint64_t                        new_computation;
 
                new_computation = processor->quantum_end;
@@ -115,7 +115,6 @@ thread_quantum_expire(
 
                        thread->safe_release = sched_tick + sched_safe_duration;
                        thread->sched_mode |= (TH_MODE_FAILSAFE|TH_MODE_TIMESHARE);
-                       thread->sched_mode &= ~TH_MODE_PREEMPT;
                }
        }
                
index 0cbde484cd7c7f8e54240934f1229e32af6ad935..e2027c0662e0f474c01245dffbdd810277da89c6 100644 (file)
@@ -150,6 +150,7 @@ void                (*pm_tick_callout)(void)        = NULL;
 void wait_queues_init(void) __attribute__((section("__TEXT, initcode")));
 
 static void load_shift_init(void) __attribute__((section("__TEXT, initcode")));
+static void preempt_pri_init(void) __attribute__((section("__TEXT, initcode")));
 
 static thread_t        thread_select_idle(
                                        thread_t                        thread,
@@ -181,8 +182,6 @@ boolean_t   thread_runnable(
 
 #endif /*DEBUG*/
 
-
-
 /*
  *     State machine
  *
@@ -243,6 +242,7 @@ struct wait_queue wait_queues[NUMQUEUES];
        ((((int)(event) < 0)? ~(int)(event): (int)(event)) % NUMQUEUES)
 
 int8_t         sched_load_shifts[NRQS];
+int                    sched_preempt_pri[NRQBM];
 
 void
 sched_init(void)
@@ -262,6 +262,7 @@ sched_init(void)
 
        wait_queues_init();
        load_shift_init();
+       preempt_pri_init();
        simple_lock_init(&rt_lock, 0);
        run_queue_init(&rt_runq);
        sched_tick = 0;
@@ -299,9 +300,15 @@ sched_timebase_init(void)
        /* scheduler tick interval */
        clock_interval_to_absolutetime_interval(USEC_PER_SEC >> SCHED_TICK_SHIFT,
                                                                                                        NSEC_PER_USEC, &abstime);
-       assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
        sched_tick_interval = abstime;
 
+#if DEBUG
+       printf("Quantum: %d. Smallest quantum: %d. Min Rt/Max Rt: %d/%d."
+               " Tick: %d.\n",
+               std_quantum, min_std_quantum, min_rt_quantum, max_rt_quantum,
+               sched_tick_interval);
+#endif
+
        /*
         * Compute conversion factor from usage to
         * timesharing priorities with 5/8 ** n aging.
@@ -343,6 +350,18 @@ load_shift_init(void)
        }
 }
 
+static void
+preempt_pri_init(void)
+{
+       int             i, *p = sched_preempt_pri;
+
+       for (i = BASEPRI_FOREGROUND + 1; i < MINPRI_KERNEL; ++i)
+               setbit(i, p);
+
+       for (i = BASEPRI_PREEMPT; i <= MAXPRI; ++i)
+               setbit(i, p);
+}
+
 /*
  *     Thread wait timer expiration.
  */
@@ -1200,8 +1219,8 @@ thread_select(
                                                ((queue_entry_t)thread)->next->prev = q;
                                                q->next = ((queue_entry_t)thread)->next;
                                                thread->runq = PROCESSOR_NULL;
-                                               assert(thread->sched_mode & TH_MODE_PREEMPT);
                                                runq->count--; runq->urgency--;
+                                               assert(runq->urgency >= 0);
                                                if (queue_empty(q)) {
                                                        if (runq->highq != IDLEPRI)
                                                                clrbit(MAXPRI - runq->highq, runq->bitmap);
@@ -1916,8 +1935,9 @@ run_queue_dequeue(
 
        thread->runq = PROCESSOR_NULL;
        rq->count--;
-       if (thread->sched_mode & TH_MODE_PREEMPT)
-               rq->urgency--;
+       if (testbit(rq->highq, sched_preempt_pri)) {
+               rq->urgency--; assert(rq->urgency >= 0);
+       }
        if (queue_empty(queue)) {
                if (rq->highq != IDLEPRI)
                        clrbit(MAXPRI - rq->highq, rq->bitmap);
@@ -1971,7 +1991,6 @@ realtime_queue_insert(
        }
 
        thread->runq = RT_RUNQ;
-       assert(thread->sched_mode & TH_MODE_PREEMPT);
        rq->count++; rq->urgency++;
 
        simple_unlock(&rt_lock);
@@ -2060,7 +2079,7 @@ processor_enqueue(
                enqueue_head(queue, (queue_entry_t)thread);
 
        thread->runq = processor;
-       if (thread->sched_mode & TH_MODE_PREEMPT)
+       if (testbit(thread->sched_pri, sched_preempt_pri))
                rq->urgency++;
        rq->count++;
 
@@ -2106,7 +2125,7 @@ processor_setrun(
        /*
         *      Set preemption mode.
         */
-       if (thread->sched_mode & TH_MODE_PREEMPT)
+       if (testbit(thread->sched_pri, sched_preempt_pri))
                preempt = (AST_PREEMPT | AST_URGENT);
        else
        if (thread->sched_mode & TH_MODE_TIMESHARE && thread->priority < BASEPRI_BACKGROUND)
@@ -2409,8 +2428,9 @@ processor_queue_shutdown(
 
                                thread->runq = PROCESSOR_NULL;
                                rq->count--;
-                               if (thread->sched_mode & TH_MODE_PREEMPT)
-                                       rq->urgency--;
+                               if (testbit(pri, sched_preempt_pri)) {
+                                       rq->urgency--; assert(rq->urgency >= 0);
+                               }
                                if (queue_empty(queue)) {
                                        if (pri != IDLEPRI)
                                                clrbit(MAXPRI - pri, rq->bitmap);
@@ -2524,15 +2544,6 @@ set_sched_pri(
 {
        boolean_t               removed = run_queue_remove(thread);
 
-       if (    !(thread->sched_mode & TH_MODE_TIMESHARE)                               &&
-                       (priority >= BASEPRI_PREEMPT                                            ||
-                        (thread->task_priority < MINPRI_KERNEL                 &&
-                         thread->task_priority >= BASEPRI_BACKGROUND   &&
-                         priority > thread->task_priority)                                     )       )
-               thread->sched_mode |= TH_MODE_PREEMPT;
-       else
-               thread->sched_mode &= ~TH_MODE_PREEMPT;
-
        thread->sched_pri = priority;
        if (removed)
                thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
@@ -2630,9 +2641,9 @@ run_queue_remove(
                         */
                        remqueue(&rq->queues[0], (queue_entry_t)thread);
                        rq->count--;
-                       if (thread->sched_mode & TH_MODE_PREEMPT)
-                               rq->urgency--;
-                       assert(rq->urgency >= 0);
+                       if (testbit(thread->sched_pri, sched_preempt_pri)) {
+                               rq->urgency--; assert(rq->urgency >= 0);
+                       }
 
                        if (queue_empty(rq->queues + thread->sched_pri)) {
                                /* update run queue status */
@@ -2741,8 +2752,9 @@ steal_thread(
 
                                thread->runq = PROCESSOR_NULL;
                                rq->count--;
-                               if (thread->sched_mode & TH_MODE_PREEMPT)
-                                       rq->urgency--;
+                               if (testbit(pri, sched_preempt_pri)) {
+                                       rq->urgency--; assert(rq->urgency >= 0);
+                               }
                                if (queue_empty(queue)) {
                                        if (pri != IDLEPRI)
                                                clrbit(MAXPRI - pri, rq->bitmap);
@@ -2807,9 +2819,6 @@ processor_idle(
                        break;
        }
 
-       KERNEL_DEBUG_CONSTANT(
-               MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (int)thread, 0, 0, 0, 0);
-
        timer_switch(&PROCESSOR_DATA(processor, idle_state),
                                                                        mach_absolute_time(), &PROCESSOR_DATA(processor, system_state));
        PROCESSOR_DATA(processor, current_state) = &PROCESSOR_DATA(processor, system_state);
@@ -2829,8 +2838,8 @@ processor_idle(
                processor->next_thread = THREAD_NULL;
                processor->state = PROCESSOR_RUNNING;
 
-               if (    processor->runq.highq > new_thread->sched_pri   ||
-                               rt_runq.highq >= new_thread->sched_pri                  ) {
+               if (    processor->runq.highq > new_thread->sched_pri                                   ||
+                               (rt_runq.highq > 0 && rt_runq.highq >= new_thread->sched_pri)   ) {
                        processor->deadline = UINT64_MAX;
 
                        pset_unlock(pset);
@@ -2839,11 +2848,17 @@ processor_idle(
                        thread_setrun(new_thread, SCHED_HEADQ);
                        thread_unlock(new_thread);
 
+                       KERNEL_DEBUG_CONSTANT(
+                               MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (int)thread, (int)state, 0, 0, 0);
+       
                        return (THREAD_NULL);
                }
 
                pset_unlock(pset);
 
+               KERNEL_DEBUG_CONSTANT(
+                       MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (int)thread, (int)state, (int)new_thread, 0, 0);
+
                return (new_thread);
        }
        else
@@ -2870,12 +2885,18 @@ processor_idle(
                        thread_setrun(new_thread, SCHED_HEADQ);
                        thread_unlock(new_thread);
 
+                       KERNEL_DEBUG_CONSTANT(
+                               MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (int)thread, (int)state, 0, 0, 0);
+
                        return (THREAD_NULL);
                }
        }
 
        pset_unlock(pset);
 
+       KERNEL_DEBUG_CONSTANT(
+               MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (int)thread, (int)state, 0, 0, 0);
+
        return (THREAD_NULL);
 }
 
index f7855e11422dadc35fa4d7e1aa113d4d75c7231e..311e96c7dad5ebe338ebb03312c87574385ab9c1 100644 (file)
@@ -331,7 +331,6 @@ thread_depress_abstime(
 
                self->sched_pri = DEPRESSPRI;
                myprocessor->current_pri = self->sched_pri;
-               self->sched_mode &= ~TH_MODE_PREEMPT;
                self->sched_mode |= TH_MODE_DEPRESS;
 
                if (interval != 0) {
@@ -427,7 +426,6 @@ thread_poll_yield(
                        if (!(self->sched_mode & TH_MODE_ISDEPRESSED)) {
                                self->sched_pri = DEPRESSPRI;
                                myprocessor->current_pri = self->sched_pri;
-                               self->sched_mode &= ~TH_MODE_PREEMPT;
                        }
                        self->computation_epoch = abstime;
                        self->computation_metered = 0;
index 8387019b8cf9db2c7baa8fe79c871b849c1b91cb..4cca246563896080ec1687ad217c8a1d44899c98 100644 (file)
@@ -185,14 +185,13 @@ struct thread {
        integer_t                       sched_mode;                     /* scheduling mode bits */
 #define TH_MODE_REALTIME               0x0001          /* time constraints supplied */
 #define TH_MODE_TIMESHARE              0x0002          /* use timesharing algorithm */
-#define TH_MODE_PREEMPT                        0x0004          /* can preempt kernel contexts */
-#define TH_MODE_FAILSAFE               0x0008          /* fail-safe has tripped */
-#define        TH_MODE_PROMOTED                0x0010          /* sched pri has been promoted */
-#define TH_MODE_ABORT                  0x0020          /* abort interruptible waits */
-#define TH_MODE_ABORTSAFELY            0x0040          /* ... but only those at safe point */
+#define TH_MODE_FAILSAFE               0x0004          /* fail-safe has tripped */
+#define        TH_MODE_PROMOTED                0x0008          /* sched pri has been promoted */
+#define TH_MODE_ABORT                  0x0010          /* abort interruptible waits */
+#define TH_MODE_ABORTSAFELY            0x0020          /* ... but only those at safe point */
 #define TH_MODE_ISABORTED              (TH_MODE_ABORT | TH_MODE_ABORTSAFELY)
-#define        TH_MODE_DEPRESS                 0x0080          /* normal depress yield */
-#define TH_MODE_POLLDEPRESS            0x0100          /* polled depress yield */
+#define        TH_MODE_DEPRESS                 0x0040          /* normal depress yield */
+#define TH_MODE_POLLDEPRESS            0x0080          /* polled depress yield */
 #define TH_MODE_ISDEPRESSED            (TH_MODE_DEPRESS | TH_MODE_POLLDEPRESS)
 
        integer_t                       sched_pri;                      /* scheduled (current) priority */
index da65103821e0c6b4bf544fdbad053dfcf4e1b03c..4fcb5f9570a6291d0bf2f301a223e5c92bbef664 100644 (file)
@@ -759,7 +759,6 @@ special_handler_continue(void)
 
                        thread->sched_pri = DEPRESSPRI;
                        myprocessor->current_pri = thread->sched_pri;
-                       thread->sched_mode &= ~TH_MODE_PREEMPT;
                }
                thread_unlock(thread);
                splx(s);
index d8c38843f2781c487bc4918acd9642765fc54ed5..173e79a8b104d6f1091aa583af3dfbaec82064f0 100644 (file)
@@ -361,7 +361,7 @@ struct x86_saved_state32_tagged {
 typedef struct x86_saved_state32_tagged x86_saved_state32_tagged_t;
 
 struct x86_sframe32 {
-/*
+       /*
         * in case we throw a fault reloading
         * segment registers on a return out of
         * the kernel... the 'slf' state is only kept
index 8a73ba9d892dc96b38b5c82188e46c1365a14aa2..e28a2c5371b7893e0f0b102e8f0487e27db3d469 100644 (file)
@@ -345,6 +345,8 @@ __END_DECLS
 #define CPU_SUBTYPE_ARM_ALL             ((cpu_subtype_t) 0)
 #define CPU_SUBTYPE_ARM_V4T             ((cpu_subtype_t) 5)
 #define CPU_SUBTYPE_ARM_V6              ((cpu_subtype_t) 6)
+#define CPU_SUBTYPE_ARM_V5TEJ           ((cpu_subtype_t) 7)
+#define CPU_SUBTYPE_ARM_XSCALE         ((cpu_subtype_t) 8)
 
 /*
  *     CPU families (sysctl hw.cpufamily)
@@ -368,6 +370,7 @@ __END_DECLS
 #define CPUFAMILY_INTEL_6_26 0x6b5a4cd2  /* Nehalem */
 #define CPUFAMILY_ARM_9      0xe73283ae
 #define CPUFAMILY_ARM_11     0x8ff620d8
+#define CPUFAMILY_ARM_XSCALE 0x53b005f5
 
 #define CPUFAMILY_INTEL_YONAH  CPUFAMILY_INTEL_6_14
 #define CPUFAMILY_INTEL_MEROM  CPUFAMILY_INTEL_6_15
index 6721d47bf8e69f4decd24afc871b676fc9eacd47..b0280482533a6bce82bf33e3e15206942bdbe663 100644 (file)
@@ -321,6 +321,8 @@ trigger_name_to_port(
 extern int     uiomove64(addr64_t, int, void *);
 #define        MAX_RUN 32
 
+unsigned long vm_cs_tainted_forces = 0;
+
 int
 memory_object_control_uiomove(
        memory_object_control_t control,
@@ -396,8 +398,18 @@ memory_object_control_uiomove(
                         */
                        assert(!dst_page->encrypted);
 
-                       if (mark_dirty)
+                       if (mark_dirty) {
                                dst_page->dirty = TRUE;
+                               if (dst_page->cs_validated) {
+                                       /*
+                                        * CODE SIGNING:
+                                        * We're modifying a code-signed
+                                        * page:  assume that it is now tainted.
+                                        */
+                                       dst_page->cs_tainted = TRUE;
+                                       vm_cs_tainted_forces++;
+                               }
+                       }
                        dst_page->busy = TRUE;
 
                        page_run[cur_run++] = dst_page;
index 87ffc3ee7c6d8b78671fcc2bfe599303a8e851ce..809d71e177940e8343a2eeb30bd8f7ce466a8418 100644 (file)
@@ -149,6 +149,12 @@ extern void vm_fault_classify(vm_object_t  object,
 extern void vm_fault_classify_init(void);
 #endif
 
+
+unsigned long vm_cs_validates = 0;
+unsigned long vm_cs_revalidates = 0;
+unsigned long vm_cs_query_modified = 0;
+unsigned long vm_cs_validated_dirtied = 0;
+
 /*
  *     Routine:        vm_fault_init
  *     Purpose:
@@ -1988,19 +1994,21 @@ vm_fault_enter(vm_page_t m,
 
         cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
 
-       if (m->object->code_signed && !m->cs_validated &&
-           pmap != kernel_pmap) {
-               /*
-                * CODE SIGNING:
-                * This page comes from a VM object backed by a
-                * signed memory object and it hasn't been validated yet.
-                * We're about to enter it into a process address space,
-                * so we need to validate its signature now.
-                */
+       if (m->object->code_signed && pmap != kernel_pmap &&
+           (!m->cs_validated || m->wpmapped)) {
                vm_object_lock_assert_exclusive(m->object);
 
-               /* VM map still locked, so 1 ref will remain on VM object */
+               if (m->cs_validated && m->wpmapped) {
+                       vm_cs_revalidates++;
+               }
 
+               /*
+                * CODE SIGNING:
+                * This page comes from a VM object backed by a signed
+                * memory object.  We are about to enter it into a process
+                * address space, so we need to validate its signature.
+                */
+               /* VM map is locked, so 1 ref will remain on VM object */
                vm_page_validate_cs(m);
        }
 
@@ -2087,6 +2095,10 @@ vm_fault_enter(vm_page_t m,
                 * that's needed for an AtomicCompareAndSwap
                 */
                m->pmapped = TRUE;
+               if (prot & VM_PROT_WRITE) {
+                       vm_object_lock_assert_exclusive(m->object);
+                       m->wpmapped = TRUE;
+               }
 
                PMAP_ENTER(pmap, vaddr, m, prot, cache_attr, wired);
        }
@@ -2273,7 +2285,6 @@ RetryFault:
         */
        if (wired) {
                fault_type = prot | VM_PROT_WRITE;
-       
                /*
                 * since we're treating this fault as a 'write'
                 * we must hold the top object lock exclusively
@@ -2500,9 +2511,10 @@ RetryFault:
                        }
                        ASSERT_PAGE_DECRYPTED(m);
 
-                       if (m->object->code_signed && !m->cs_validated) {
+                       if (m->object->code_signed && map != kernel_map &&
+                           (!m->cs_validated || m->wpmapped)) {
                                /*
-                                * We will need to validate this page
+                                * We might need to validate this page
                                 * against its code signature, so we
                                 * want to hold the VM object exclusively.
                                 */
@@ -2547,8 +2559,23 @@ RetryFault:
                         *              --> must disallow write.
                         */
 
-                       if (object == cur_object && object->copy == VM_OBJECT_NULL)
+                       if (object == cur_object && object->copy == VM_OBJECT_NULL) {
+                               if ((fault_type & VM_PROT_WRITE) == 0) {
+                                       /*
+                                        * This is not a "write" fault, so we
+                                        * might not have taken the object lock
+                                        * exclusively and we might not be able
+                                        * to update the "wpmapped" bit in
+                                        * vm_fault_enter().
+                                        * Let's just grant read access to
+                                        * the page for now and we'll
+                                        * soft-fault again if we need write
+                                        * access later...
+                                        */
+                                       prot &= ~VM_PROT_WRITE;
+                               }
                                goto FastPmapEnter;
+                       }
 
                        if ((fault_type & VM_PROT_WRITE) == 0) {
 
@@ -4117,13 +4144,51 @@ vm_page_validate_cs(
        boolean_t               validated, tainted;
        boolean_t               busy_page;
 
-       vm_object_lock_assert_exclusive(page->object);
-       assert(!page->cs_validated);
+       vm_object_lock_assert_held(page->object);
 
        if (!cs_validation) {
                return;
        }
 
+       if (page->cs_validated && !page->cs_tainted && page->wpmapped) {
+               vm_object_lock_assert_exclusive(page->object);
+
+               /*
+                * This page has already been validated and found to
+                * be valid.  However, it was mapped for "write" access
+                * sometime in the past, so we have to check if it was
+                * modified.  If so, it needs to be revalidated.
+                * If the page was already found to be "tainted", no
+                * need to re-validate.
+                */
+               if (!page->dirty) {
+                       vm_cs_query_modified++;
+                       page->dirty = pmap_is_modified(page->phys_page);
+               }
+               if (page->dirty) {
+                       /*
+                        * The page is dirty, so let's clear its
+                        * "validated" bit and re-validate it.
+                        */
+                       if (cs_debug) {
+                               printf("CODESIGNING: vm_page_validate_cs: "
+                                      "page %p obj %p off 0x%llx "
+                                      "was modified\n",
+                                      page, page->object, page->offset);
+                       }
+                       page->cs_validated = FALSE;
+                       vm_cs_validated_dirtied++;
+               }
+       }
+
+       if (page->cs_validated) {
+               return;
+       }
+
+       vm_object_lock_assert_exclusive(page->object);
+
+       vm_cs_validates++;
+
        object = page->object;
        assert(object->code_signed);
        offset = page->offset;
index f20b587c16205fa8f6b6aaf2c94ec32260ed8300..74e805b790c01cf8ae35b965e8a0c2a30f85c696 100644 (file)
@@ -1749,10 +1749,13 @@ StartAgain: ;
                        }
                        for (; entry->vme_start < end;
                             entry = entry->vme_next) {
+                               /*
+                                * Check if the mapping's attributes
+                                * match the existing map entry.
+                                */
                                if (entry == vm_map_to_entry(map) ||
                                    entry->vme_start != tmp_start ||
                                    entry->is_sub_map != is_submap ||
-                                   entry->object.vm_object != object ||
                                    entry->offset != tmp_offset ||
                                    entry->needs_copy != needs_copy ||
                                    entry->protection != cur_protection ||
@@ -1762,6 +1765,36 @@ StartAgain: ;
                                        /* not the same mapping ! */
                                        RETURN(KERN_NO_SPACE);
                                }
+                               /*
+                                * Check if the same object is being mapped.
+                                */
+                               if (is_submap) {
+                                       if (entry->object.sub_map !=
+                                           (vm_map_t) object) {
+                                               /* not the same submap */
+                                               RETURN(KERN_NO_SPACE);
+                                       }
+                               } else {
+                                       if (entry->object.vm_object != object) {
+                                               /* not the same VM object... */
+                                               vm_object_t obj2;
+
+                                               obj2 = entry->object.vm_object;
+                                               if ((obj2 == VM_OBJECT_NULL ||
+                                                    obj2->internal) &&
+                                                   (object == VM_OBJECT_NULL ||
+                                                    object->internal)) {
+                                                       /*
+                                                        * ... but both are
+                                                        * anonymous memory,
+                                                        * so equivalent.
+                                                        */
+                                               } else {
+                                                       RETURN(KERN_NO_SPACE);
+                                               }
+                                       }
+                               }
+
                                tmp_offset += entry->vme_end - entry->vme_start;
                                tmp_start += entry->vme_end - entry->vme_start;
                                if (entry->vme_end >= end) {
@@ -7978,8 +8011,8 @@ submap_recurse:
 
 
                        if(submap_entry->wired_count != 0 ||
-                          (sub_object->copy_strategy !=
-                           MEMORY_OBJECT_COPY_SYMMETRIC)) {
+                          (sub_object->copy_strategy ==
+                           MEMORY_OBJECT_COPY_NONE)) {
                                vm_object_lock(sub_object);
                                vm_object_copy_slowly(sub_object,
                                                      submap_entry->offset,
@@ -8086,7 +8119,7 @@ submap_recurse:
                        entry->max_protection |= submap_entry->max_protection;
 
                        if(copied_slowly) {
-                               entry->offset = 0;
+                               entry->offset = local_start - old_start;
                                entry->needs_copy = FALSE;
                                entry->is_shared = FALSE;
                        } else {
index 60a80d38a1f000b1b196b464a9c66d291f072bcc..218a491572ee4721b76a12765d6501861c83f53f 100644 (file)
@@ -1205,7 +1205,7 @@ vm_object_terminate(
                        panic("vm_object_terminate.4 %p %p", object, p);
                }
 
-               if (!p->dirty && p->pmapped)
+               if (!p->dirty && p->wpmapped)
                        p->dirty = pmap_is_modified(p->phys_page);
 
                if ((p->dirty || p->precious) && !p->error && object->alive) {
index 53d13765482b06eef02e20d816b03fda8d68f05b..4052f9673b84a6cc31a8a29f4d8074ec045f1a89 100644 (file)
@@ -196,6 +196,8 @@ struct vm_page {
                        fictitious:1,   /* Physical page doesn't exist (O) */
                        pmapped:1,      /* page has been entered at some
                                         * point into a pmap (O) */
+                       wpmapped:1,     /* page has been entered at some
+                                        * point into a pmap for write (O) */
                        absent:1,       /* Data has been requested, but is
                                         *  not yet available (O) */
                        error:1,        /* Data manager was unable to provide
@@ -230,7 +232,7 @@ struct vm_page {
                                           /* other pages                  */
                        deactivated:1,
                        zero_fill:1,
-                       __unused_object_bits:9;  /* 9 bits available here */
+                       __unused_object_bits:8;  /* 8 bits available here */
 
        ppnum_t         phys_page;      /* Physical address of page, passed
                                         *  to pmap_enter (read-only) */
@@ -484,6 +486,12 @@ extern void                vm_page_insert(
                                        vm_object_t             object,
                                        vm_object_offset_t      offset);
 
+extern void            vm_page_insert_internal(
+                                       vm_page_t               page,
+                                       vm_object_t             object,
+                                       vm_object_offset_t      offset,
+                                       boolean_t               queues_lock_held);
+
 extern void            vm_page_replace(
                                        vm_page_t               mem,
                                        vm_object_t             object,
index 7eeace1d0bd183914c5ae76680f8b9c67c48dd6c..0f3e790a689bb5a55ac1c3df21c89711e36621ef 100644 (file)
@@ -370,6 +370,7 @@ unsigned int vm_page_speculative_target = 0;
 
 vm_object_t    vm_pageout_scan_wants_object = VM_OBJECT_NULL;
 
+unsigned long vm_cs_validated_resets = 0;
 
 /*
  *     Routine:        vm_backing_store_disable
@@ -1632,12 +1633,30 @@ consider_inactive:
                                vm_purgeable_q_advance_all(1);
                }
 
-               if (object->copy == VM_OBJECT_NULL && 
-                   (object->purgable == VM_PURGABLE_EMPTY ||
-                    object->purgable == VM_PURGABLE_VOLATILE)) {
-                       assert(m->wire_count == 0);     /* if it's wired, we can't put it on our queue */
-                       /* just stick it back on! */
-                       goto reactivate_page;
+               /* If the object is empty, the page must be reclaimed even if dirty or used. */
+               /* If the page belongs to a volatile object, we stick it back on. */
+               if (object->copy == VM_OBJECT_NULL) {
+                       if(object->purgable == VM_PURGABLE_EMPTY && !m->cleaning) {
+                               m->busy = TRUE;
+                               if (m->pmapped == TRUE) {
+                                       /* unmap the page */
+                                       refmod_state = pmap_disconnect(m->phys_page);
+                                       if (refmod_state & VM_MEM_MODIFIED) {
+                                               m->dirty = TRUE;
+                                       }
+                               }
+                               if (m->dirty || m->precious) {
+                                       /* we saved the cost of cleaning this page ! */
+                                       vm_page_purged_count++;
+                               }
+                               goto reclaim_page;
+                       }
+                       if (object->purgable == VM_PURGABLE_VOLATILE) {
+                               /* if it's wired, we can't put it on our queue */
+                               assert(m->wire_count == 0);
+                               /* just stick it back on! */
+                               goto reactivate_page;
+                       }
                }
                m->pageq.next = NULL;
                m->pageq.prev = NULL;
@@ -2578,6 +2597,7 @@ vm_object_upl_request(
        wpl_array_t             lite_list = NULL;
        vm_object_t             last_copy_object;
        int                     delayed_unlock = 0;
+       int                     j;
 
        if (cntrl_flags & ~UPL_VALID_FLAGS) {
                /*
@@ -2711,11 +2731,34 @@ vm_object_upl_request(
                        }
                        vm_object_unlock(object);
                        VM_PAGE_GRAB_FICTITIOUS(alias_page);
-                       vm_object_lock(object);
+                       goto relock;
                }
-               if (delayed_unlock == 0)
-                       vm_page_lock_queues();
+               if (delayed_unlock == 0) {
+                       /*
+                        * pageout_scan takes the vm_page_lock_queues first
+                        * then tries for the object lock... to avoid what
+                        * is effectively a lock inversion, we'll go to the
+                        * trouble of taking them in that same order... otherwise
+                        * if this object contains the majority of the pages resident
+                        * in the UBC (or a small set of large objects actively being
+                        * worked on contain the majority of the pages), we could
+                        * cause the pageout_scan thread to 'starve' in its attempt
+                        * to find pages to move to the free queue, since it has to
+                        * successfully acquire the object lock of any candidate page
+                        * before it can steal/clean it.
+                        */
+                       vm_object_unlock(object);
+relock:
+                       for (j = 0; ; j++) {
+                               vm_page_lock_queues();
 
+                               if (vm_object_lock_try(object))
+                                       break;
+                               vm_page_unlock_queues();
+                               mutex_pause(j);
+                       }
+                       delayed_unlock = 1;
+               }
                if (cntrl_flags & UPL_COPYOUT_FROM) {
                        upl->flags |= UPL_PAGE_SYNC_DONE;
 
@@ -2848,6 +2891,7 @@ check_busy:
                                dst_page->busy = was_busy;
 
                                vm_page_lock_queues();
+                               delayed_unlock = 1;
                        }
                        if (dst_page->pageout_queue == TRUE)
                                /*
@@ -3001,6 +3045,7 @@ check_busy:
                                        upl_cow_again_pages += xfer_size >> PAGE_SHIFT;
 
                                        vm_page_lock_queues();
+                                       delayed_unlock = 1;
                                }
                                /*
                                 * remember the copy object we synced with
@@ -3070,14 +3115,8 @@ check_busy:
                                }
                                /*
                                 * need to allocate a page
-                                * vm_page_alloc may grab the
-                                * queues lock for a purgeable object
-                                * so drop it
                                 */
-                               delayed_unlock = 0;
-                               vm_page_unlock_queues();
-
-                               dst_page = vm_page_alloc(object, dst_offset);
+                               dst_page = vm_page_grab();
 
                                if (dst_page == VM_PAGE_NULL) {
                                        if ( (cntrl_flags & (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) {
@@ -3096,14 +3135,41 @@ check_busy:
                                         * then try again for the same
                                         * offset...
                                         */
+                                       delayed_unlock = 0;
+                                       vm_page_unlock_queues();
+
                                        vm_object_unlock(object);
                                        VM_PAGE_WAIT();
-                                       vm_object_lock(object);
+
+                                       /*
+                                        * pageout_scan takes the vm_page_lock_queues first
+                                        * then tries for the object lock... to avoid what
+                                        * is effectively a lock inversion, we'll go to the
+                                        * trouble of taking them in that same order... otherwise
+                                        * if this object contains the majority of the pages resident
+                                        * in the UBC (or a small set of large objects actively being
+                                        * worked on contain the majority of the pages), we could
+                                        * cause the pageout_scan thread to 'starve' in its attempt
+                                        * to find pages to move to the free queue, since it has to
+                                        * successfully acquire the object lock of any candidate page
+                                        * before it can steal/clean it.
+                                        */
+                                       for (j = 0; ; j++) {
+                                               vm_page_lock_queues();
+
+                                               if (vm_object_lock_try(object))
+                                                       break;
+                                               vm_page_unlock_queues();
+                                               mutex_pause(j);
+                                       }
+                                       delayed_unlock = 1;
 
                                        continue;
                                }
-                               dst_page->busy = FALSE;
+                               vm_page_insert_internal(dst_page, object, dst_offset, TRUE);
+
                                dst_page->absent = TRUE;
+                               dst_page->busy = FALSE;
 
                                if (cntrl_flags & UPL_RET_ONLY_ABSENT) {
                                        /*
@@ -3116,7 +3182,6 @@ check_busy:
                                         */
                                        dst_page->clustered = TRUE;
                                }
-                               vm_page_lock_queues();
                        }
                        /*
                         * ENCRYPTED SWAP:
@@ -3268,7 +3333,29 @@ check_busy:
                }
 delay_unlock_queues:
                if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) {
+                       /*
+                        * pageout_scan takes the vm_page_lock_queues first
+                        * then tries for the object lock... to avoid what
+                        * is effectively a lock inversion, we'll go to the
+                        * trouble of taking them in that same order... otherwise
+                        * if this object contains the majority of the pages resident
+                        * in the UBC (or a small set of large objects actively being
+                        * worked on contain the majority of the pages), we could
+                        * cause the pageout_scan thread to 'starve' in its attempt
+                        * to find pages to move to the free queue, since it has to
+                        * successfully acquire the object lock of any candidate page
+                        * before it can steal/clean it.
+                        */
+                       vm_object_unlock(object);
                        mutex_yield(&vm_page_queue_lock);
+
+                       for (j = 0; ; j++) {
+                               if (vm_object_lock_try(object))
+                                       break;
+                               vm_page_unlock_queues();
+                               mutex_pause(j);
+                               vm_page_lock_queues();
+                       }
                        delayed_unlock = 1;
                }
 try_next_page:
@@ -3279,7 +3366,7 @@ try_next_page:
        if (alias_page != NULL) {
                if (delayed_unlock == 0) {
                        vm_page_lock_queues();
-                       delayed_unlock++;
+                       delayed_unlock = 1;
                }
                vm_page_free(alias_page);
        }
@@ -3760,6 +3847,7 @@ vm_map_enter_upl(
                        cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
 
                        m->pmapped = TRUE;
+                       m->wpmapped = TRUE;
        
                        PMAP_ENTER(map->pmap, addr, m, VM_PROT_ALL, cache_attr, TRUE);
                }
@@ -3844,6 +3932,7 @@ upl_commit_range(
        int                     delayed_unlock = 0;
        int                     clear_refmod = 0;
        int                     pgpgout_count = 0;
+       int                     j;
 
        *empty = FALSE;
 
@@ -3887,17 +3976,35 @@ upl_commit_range(
        } else {
                shadow_object = object;
        }
-       vm_object_lock(shadow_object);
-
        entry = offset/PAGE_SIZE;
        target_offset = (vm_object_offset_t)offset;
 
+       /*
+        * pageout_scan takes the vm_page_lock_queues first
+        * then tries for the object lock... to avoid what
+        * is effectively a lock inversion, we'll go to the
+        * trouble of taking them in that same order... otherwise
+        * if this object contains the majority of the pages resident
+        * in the UBC (or a small set of large objects actively being
+        * worked on contain the majority of the pages), we could
+        * cause the pageout_scan thread to 'starve' in its attempt
+        * to find pages to move to the free queue, since it has to
+        * successfully acquire the object lock of any candidate page
+        * before it can steal/clean it.
+        */
+       for (j = 0; ; j++) {
+               vm_page_lock_queues();
+
+               if (vm_object_lock_try(shadow_object))
+                       break;
+               vm_page_unlock_queues();
+               mutex_pause(j);
+       }
+       delayed_unlock = 1;
+
        while (xfer_size) {
                vm_page_t       t, m;
 
-               if (delayed_unlock == 0)
-                       vm_page_lock_queues();
-
                m = VM_PAGE_NULL;
 
                if (upl->flags & UPL_LITE) {
@@ -3937,6 +4044,17 @@ upl_commit_range(
                                        m->dirty = TRUE;
                                else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
                                        m->dirty = FALSE;
+                                       if (m->cs_validated && !m->cs_tainted) {
+                                               /*
+                                                * CODE SIGNING:
+                                                * This page is no longer dirty
+                                                * but could have been modified,
+                                                * so it will need to be
+                                                * re-validated.
+                                                */
+                                               m->cs_validated = FALSE;
+                                               vm_cs_validated_resets++;
+                                       }
                                        clear_refmod |= VM_MEM_MODIFIED;
                                }
                                if (flags & UPL_COMMIT_INACTIVATE)
@@ -3964,6 +4082,17 @@ upl_commit_range(
                         */
                        if (flags & UPL_COMMIT_CLEAR_DIRTY) {
                                m->dirty = FALSE;
+                               if (m->cs_validated && !m->cs_tainted) {
+                                       /*
+                                        * CODE SIGNING:
+                                        * This page is no longer dirty
+                                        * but could have been modified,
+                                        * so it will need to be
+                                        * re-validated.
+                                        */
+                                       m->cs_validated = FALSE;
+                                       vm_cs_validated_resets++;
+                               }
                                clear_refmod |= VM_MEM_MODIFIED;
                        }
                        if (clear_refmod)
@@ -4003,6 +4132,17 @@ upl_commit_range(
                                if (m->wanted) vm_pageout_target_collisions++;
 #endif
                                m->dirty = FALSE;
+                               if (m->cs_validated && !m->cs_tainted) {
+                                       /*
+                                        * CODE SIGNING:
+                                        * This page is no longer dirty
+                                        * but could have been modified,
+                                        * so it will need to be
+                                        * re-validated.
+                                        */
+                                       m->cs_validated = FALSE;
+                                       vm_cs_validated_resets++;
+                               }
 
                                if (m->pmapped && (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED))
                                        m->dirty = TRUE;
@@ -4049,7 +4189,7 @@ upl_commit_range(
                                goto commit_next_page;
                        }
 #if MACH_CLUSTER_STATS
-                       if (m->pmapped)
+                       if (m->wpmapped)
                                m->dirty = pmap_is_modified(m->phys_page);
 
                        if (m->dirty)   vm_pageout_cluster_dirtied++;
@@ -4057,6 +4197,17 @@ upl_commit_range(
                        if (m->wanted)  vm_pageout_cluster_collisions++;
 #endif
                        m->dirty = FALSE;
+                       if (m->cs_validated && !m->cs_tainted) {
+                               /*
+                                * CODE SIGNING:
+                                * This page is no longer dirty
+                                * but could have been modified,
+                                * so it will need to be
+                                * re-validated.
+                                */
+                               m->cs_validated = FALSE;
+                               vm_cs_validated_resets++;
+                       }
 
                        if ((m->busy) && (m->cleaning)) {
                                /*
@@ -4122,7 +4273,29 @@ commit_next_page:
                entry++;
 
                if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) {
+                       /*
+                        * pageout_scan takes the vm_page_lock_queues first
+                        * then tries for the object lock... to avoid what
+                        * is effectively a lock inversion, we'll go to the
+                        * trouble of taking them in that same order... otherwise
+                        * if this object contains the majority of the pages resident
+                        * in the UBC (or a small set of large objects actively being
+                        * worked on contain the majority of the pages), we could
+                        * cause the pageout_scan thread to 'starve' in its attempt
+                        * to find pages to move to the free queue, since it has to
+                        * successfully acquire the object lock of any candidate page
+                        * before it can steal/clean it.
+                        */
+                       vm_object_unlock(shadow_object);
                        mutex_yield(&vm_page_queue_lock);
+
+                       for (j = 0; ; j++) {
+                               if (vm_object_lock_try(shadow_object))
+                                       break;
+                               vm_page_unlock_queues();
+                               mutex_pause(j);
+                               vm_page_lock_queues();
+                       }
                        delayed_unlock = 1;
                }
        }
@@ -4199,6 +4372,7 @@ upl_abort_range(
        wpl_array_t             lite_list;
        int                     occupied;
        int                     delayed_unlock = 0;
+       int                     j;
 
        *empty = FALSE;
 
@@ -4233,17 +4407,35 @@ upl_abort_range(
        } else
                shadow_object = object;
 
-       vm_object_lock(shadow_object);
-
        entry = offset/PAGE_SIZE;
        target_offset = (vm_object_offset_t)offset;
 
+       /*
+        * pageout_scan takes the vm_page_lock_queues first
+        * then tries for the object lock... to avoid what
+        * is effectively a lock inversion, we'll go to the
+        * trouble of taking them in that same order... otherwise
+        * if this object contains the majority of the pages resident
+        * in the UBC (or a small set of large objects actively being
+        * worked on contain the majority of the pages), we could
+        * cause the pageout_scan thread to 'starve' in its attempt
+        * to find pages to move to the free queue, since it has to
+        * successfully acquire the object lock of any candidate page
+        * before it can steal/clean it.
+        */
+       for (j = 0; ; j++) {
+               vm_page_lock_queues();
+
+               if (vm_object_lock_try(shadow_object))
+                       break;
+               vm_page_unlock_queues();
+               mutex_pause(j);
+       }
+       delayed_unlock = 1;
+
        while (xfer_size) {
                vm_page_t       t, m;
 
-               if (delayed_unlock == 0)
-                       vm_page_lock_queues();
-
                m = VM_PAGE_NULL;
 
                if (upl->flags & UPL_LITE) {
@@ -4352,7 +4544,29 @@ upl_abort_range(
                        }
                }
                if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) {
+                       /*
+                        * pageout_scan takes the vm_page_lock_queues first
+                        * then tries for the object lock... to avoid what
+                        * is effectively a lock inversion, we'll go to the
+                        * trouble of taking them in that same order... otherwise
+                        * if this object contains the majority of the pages resident
+                        * in the UBC (or a small set of large objects actively being
+                        * worked on contain the majority of the pages), we could
+                        * cause the pageout_scan thread to 'starve' in its attempt
+                        * to find pages to move to the free queue, since it has to
+                        * successfully acquire the object lock of any candidate page
+                        * before it can steal/clean it.
+                        */
+                       vm_object_unlock(shadow_object);
                        mutex_yield(&vm_page_queue_lock);
+
+                       for (j = 0; ; j++) {
+                               if (vm_object_lock_try(shadow_object))
+                                       break;
+                               vm_page_unlock_queues();
+                               mutex_pause(j);
+                               vm_page_lock_queues();
+                       }
                        delayed_unlock = 1;
                }
                target_offset += PAGE_SIZE_64;
@@ -5230,6 +5444,7 @@ vm_paging_map_object(
                        pmap_sync_page_data_phys(page->phys_page);
                }
                page->pmapped = TRUE;
+               page->wpmapped = TRUE;
                cache_attr = ((unsigned int) object->wimg_bits) & VM_WIMG_MASK;
 
                //assert(pmap_verify_free(page->phys_page));
@@ -5656,6 +5871,17 @@ vm_page_decrypt(
         * and the decryption doesn't count.
         */
        page->dirty = FALSE;
+       if (page->cs_validated && !page->cs_tainted) {
+               /*
+                * CODE SIGNING:
+                * This page is no longer dirty
+                * but could have been modified,
+                * so it will need to be
+                * re-validated.
+                */
+               page->cs_validated = FALSE;
+               vm_cs_validated_resets++;
+       }
        pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
 
        page->encrypted = FALSE;
@@ -5676,6 +5902,7 @@ vm_page_decrypt(
         */
        assert(pmap_verify_free(page->phys_page));
        page->pmapped = FALSE;
+       page->wpmapped = FALSE;
 
        vm_object_paging_end(page->object);
 }
index df1f3f810b7bcf307937fa66a97875672e5c5713..dfd80266f956420d3e8592214e335f8297a4e4f6 100644 (file)
@@ -33,9 +33,9 @@ struct token {
 
 struct token    tokens[MAX_VOLATILE];
 
-token_idx_t     token_free_idx = 0;    /* head of free queue */
-token_cnt_t     token_init_count = 1;  /* token 0 is reserved!! */
-token_cnt_t     token_new_pagecount = 0;       /* count of pages that will
+token_idx_t     token_free_idx = 0;            /* head of free queue */
+token_idx_t     token_init_idx = 1;            /* token 0 is reserved!! */
+int32_t                token_new_pagecount = 0;        /* count of pages that will
                                                 * be added onto token queue */
 
 int             available_for_purge = 0;       /* increase when ripe token
@@ -96,9 +96,9 @@ vm_purgeable_token_add(purgeable_q_t queue)
        token_idx_t     token;
        enum purgeable_q_type i;
 
-       if (token_init_count < MAX_VOLATILE) {  /* lazy token array init */
-               token = token_init_count;
-               token_init_count++;
+       if (token_init_idx < MAX_VOLATILE) {    /* lazy token array init */
+               token = token_init_idx;
+               token_init_idx++;
        } else if (token_free_idx) {
                token = token_free_idx;
                token_free_idx = tokens[token_free_idx].next;
@@ -111,9 +111,10 @@ vm_purgeable_token_add(purgeable_q_t queue)
         * obsolete
         */
        for (i = PURGEABLE_Q_TYPE_FIFO; i < PURGEABLE_Q_TYPE_MAX; i++) {
-               purgeable_queues[i].new_pages += token_new_pagecount;
-               assert(purgeable_queues[i].new_pages >= 0);
-               assert((uint64_t) (purgeable_queues[i].new_pages) <= TOKEN_COUNT_MAX);
+               int64_t pages = purgeable_queues[i].new_pages += token_new_pagecount;
+               assert(pages >= 0);
+               assert(pages <= TOKEN_COUNT_MAX);
+               purgeable_queues[i].new_pages=pages;
        }
        token_new_pagecount = 0;
 
@@ -235,6 +236,20 @@ vm_purgeable_token_delete_first(purgeable_q_t queue)
 void
 vm_purgeable_q_advance_all(uint32_t num_pages)
 {
+       /* check queue counters - if they get really large, scale them back.
+        * They tend to get that large when there is no purgeable queue action */
+       int i;
+       if(token_new_pagecount > (INT32_MAX >> 1))      /* a system idling years might get there */
+       {
+               for (i = PURGEABLE_Q_TYPE_FIFO; i < PURGEABLE_Q_TYPE_MAX; i++) {
+                       int64_t pages = purgeable_queues[i].new_pages += token_new_pagecount;
+                       assert(pages >= 0);
+                       assert(pages <= TOKEN_COUNT_MAX);
+                       purgeable_queues[i].new_pages=pages;
+               }
+               token_new_pagecount = 0;
+       }
+       
        /*
         * don't need to advance obsolete queue - all items are ripe there,
         * always
index ab2db597e68f6e4d47468bcca6b454476002dc39..e225da651463a5770d8b5d3f398db77afc9f04b9 100644 (file)
@@ -46,12 +46,7 @@ enum purgeable_q_type {
        PURGEABLE_Q_TYPE_MAX
 };
 
-/* 
- * It appears there's a 16 vs 32 size mismatch when using
- * CONFIG_TOKEN_QUEUE_SMALL and the resulting math can lead to a large 
- * negative value for new_pages in vm_purgeable.c.
- */
-#if (CONFIG_TOKEN_QUEUE_SMALL == 1) && 0
+#if (CONFIG_TOKEN_QUEUE_SMALL == 1)
 typedef uint16_t token_idx_t;
 typedef uint16_t token_cnt_t;
 #define MAX_VOLATILE 0x01000
@@ -80,7 +75,7 @@ struct purgeable_q {
 typedef struct purgeable_q * purgeable_q_t;
 
 extern struct purgeable_q purgeable_queues[PURGEABLE_Q_TYPE_MAX];
-extern token_cnt_t token_new_pagecount;
+extern int32_t token_new_pagecount;
 extern int available_for_purge;
 
 
index 5d4d80b47e38fffd67d0665dc649e037ccc1575c..f50356d0db2ed33cabd628ba2b394e03d3715883 100644 (file)
@@ -100,8 +100,6 @@ int                 speculative_steal_index = 0;
 
 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
 
-static void vm_page_insert_internal(vm_page_t, vm_object_t, vm_object_offset_t, boolean_t);
-
 
 /*
  *     Associated with page of user-allocatable memory is a
@@ -406,6 +404,7 @@ vm_page_bootstrap(
        m->laundry = FALSE;
        m->free = FALSE;
        m->pmapped = FALSE;
+       m->wpmapped = FALSE;
        m->reference = FALSE;
        m->pageout = FALSE;
        m->dump_cleaning = FALSE;
@@ -889,7 +888,7 @@ vm_page_insert(
 }
 
 
-static void
+void
 vm_page_insert_internal(
        vm_page_t               mem,
        vm_object_t             object,
@@ -1546,6 +1545,7 @@ vm_page_grablo(void)
                assert(mem->free);
                assert(mem->busy);
                assert(!mem->pmapped);
+               assert(!mem->wpmapped);
 
                mem->pageq.next = NULL;
                mem->pageq.prev = NULL;
@@ -1613,6 +1613,7 @@ return_page_from_cpu_list:
                assert(mem->busy);
                assert(!mem->encrypted);
                assert(!mem->pmapped);
+               assert(!mem->wpmapped);
 
                return mem;
        }
@@ -1723,6 +1724,7 @@ return_page_from_cpu_list:
                        assert(!mem->free);
                        assert(!mem->encrypted);
                        assert(!mem->pmapped);
+                       assert(!mem->wpmapped);
                }
                PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
                PROCESSOR_DATA(current_processor(), start_color) = color;
@@ -2090,6 +2092,7 @@ vm_page_free_prepare(
        mem->encrypted_cleaning = FALSE;
        mem->deactivated = FALSE;
        mem->pmapped = FALSE;
+       mem->wpmapped = FALSE;
 
        if (mem->private) {
                mem->private = FALSE;
@@ -2805,11 +2808,9 @@ vm_page_copy(
        dest_m->encrypted = FALSE;
 
        if (src_m->object != VM_OBJECT_NULL &&
-           src_m->object->code_signed &&
-           !src_m->cs_validated) {
+           src_m->object->code_signed) {
                /*
-                * We're copying a not-yet-validated page from a
-                * code-signed object.
+                * We're copying a page from a code-signed object.
                 * Whoever ends up mapping the copy page might care about
                 * the original page's integrity, so let's validate the
                 * source page now.
index 50632a9a0ed30c01eac2deaa958f24d5d82e834c..f6975e1c1dd9a81086035701cf0fef56bd6e59de 100644 (file)
 #include <kern/ipc_tt.h>
 #include <kern/kalloc.h>
 
+#include <mach/mach_vm.h>
+
 #include <vm/vm_map.h>
 #include <vm/vm_shared_region.h>
 
@@ -770,6 +772,9 @@ vm_shared_region_map_file(
        unsigned int            i;
        mach_port_t             map_port;
        mach_vm_offset_t        target_address;
+       vm_object_t             object;
+       vm_object_size_t        obj_size;
+
 
        kr = KERN_SUCCESS;
 
@@ -844,51 +849,143 @@ vm_shared_region_map_file(
                target_address =
                        mappings[i].sfm_address - sr_base_address;
 
-               /* establish that mapping, OK if it's to "already" there */
-               kr = vm_map_enter_mem_object(
-                       sr_map,
-                       &target_address,
-                       vm_map_round_page(mappings[i].sfm_size),
-                       0,
-                       VM_FLAGS_FIXED | VM_FLAGS_ALREADY,
-                       map_port,
-                       mappings[i].sfm_file_offset,
-                       TRUE,
-                       mappings[i].sfm_init_prot & VM_PROT_ALL,
-                       mappings[i].sfm_max_prot & VM_PROT_ALL,
-                       VM_INHERIT_DEFAULT);
-               if (kr == KERN_MEMORY_PRESENT) {
-                       /* this exact mapping was already there: that's fine */
-                       SHARED_REGION_TRACE_INFO(
-                               ("shared_region: mapping[%d]: "
-                                "address:0x%016llx size:0x%016llx "
-                                "offset:0x%016llx "
-                                "maxprot:0x%x prot:0x%x already mapped...\n",
-                                i,
-                                (long long)mappings[i].sfm_address,
-                                (long long)mappings[i].sfm_size,
-                                (long long)mappings[i].sfm_file_offset,
-                                mappings[i].sfm_max_prot,
-                                mappings[i].sfm_init_prot));
-                       kr = KERN_SUCCESS;
-               } else if (kr != KERN_SUCCESS) {
-                       /* this mapping failed ! */
-                       SHARED_REGION_TRACE_ERROR(
-                               ("shared_region: mapping[%d]: "
-                                "address:0x%016llx size:0x%016llx "
-                                "offset:0x%016llx "
-                                "maxprot:0x%x prot:0x%x failed 0x%x\n",
-                                i,
-                                (long long)mappings[i].sfm_address,
-                                (long long)mappings[i].sfm_size,
-                                (long long)mappings[i].sfm_file_offset,
-                                mappings[i].sfm_max_prot,
-                                mappings[i].sfm_init_prot,
-                                kr));
-                       break;
+               /* establish that mapping, OK if it's "already" there */
+               if (map_port == MACH_PORT_NULL) {
+                       /*
+                        * We want to map some anonymous memory in a
+                        * shared region.
+                        * We have to create the VM object now, so that it
+                        * can be mapped "copy-on-write".
+                        */
+                       obj_size = vm_map_round_page(mappings[i].sfm_size);
+                       object = vm_object_allocate(obj_size);
+                       if (object == VM_OBJECT_NULL) {
+                               kr = KERN_RESOURCE_SHORTAGE;
+                       } else {
+                               kr = vm_map_enter(
+                                       sr_map,
+                                       &target_address,
+                                       vm_map_round_page(mappings[i].sfm_size),
+                                       0,
+                                       VM_FLAGS_FIXED | VM_FLAGS_ALREADY,
+                                       object,
+                                       0,
+                                       TRUE,
+                                       mappings[i].sfm_init_prot & VM_PROT_ALL,
+                                       mappings[i].sfm_max_prot & VM_PROT_ALL,
+                                       VM_INHERIT_DEFAULT);
+                       }
+               } else {
+                       object = VM_OBJECT_NULL; /* no anonymous memory here */
+                       kr = vm_map_enter_mem_object(
+                               sr_map,
+                               &target_address,
+                               vm_map_round_page(mappings[i].sfm_size),
+                               0,
+                               VM_FLAGS_FIXED | VM_FLAGS_ALREADY,
+                               map_port,
+                               mappings[i].sfm_file_offset,
+                               TRUE,
+                               mappings[i].sfm_init_prot & VM_PROT_ALL,
+                               mappings[i].sfm_max_prot & VM_PROT_ALL,
+                               VM_INHERIT_DEFAULT);
                }
 
-               /* we're protected by "sr_mapping_in_progress" */
+               if (kr != KERN_SUCCESS) {
+                       if (map_port == MACH_PORT_NULL) {
+                               /*
+                                * Get rid of the VM object we just created
+                                * but failed to map.
+                                */
+                               vm_object_deallocate(object);
+                               object = VM_OBJECT_NULL;
+                       }
+                       if (kr == KERN_MEMORY_PRESENT) {
+                               /*
+                                * This exact mapping was already there:
+                                * that's fine.
+                                */
+                               SHARED_REGION_TRACE_INFO(
+                                       ("shared_region: mapping[%d]: "
+                                        "address:0x%016llx size:0x%016llx "
+                                        "offset:0x%016llx "
+                                        "maxprot:0x%x prot:0x%x "
+                                        "already mapped...\n",
+                                        i,
+                                        (long long)mappings[i].sfm_address,
+                                        (long long)mappings[i].sfm_size,
+                                        (long long)mappings[i].sfm_file_offset,
+                                        mappings[i].sfm_max_prot,
+                                        mappings[i].sfm_init_prot));
+                               /*
+                                * We didn't establish this mapping ourselves;
+                                * let's reset its size, so that we do not
+                                * attempt to undo it if an error occurs later.
+                                */
+                               mappings[i].sfm_size = 0;
+                               kr = KERN_SUCCESS;
+                       } else {
+                               unsigned int j;
+
+                               /* this mapping failed ! */
+                               SHARED_REGION_TRACE_ERROR(
+                                       ("shared_region: mapping[%d]: "
+                                        "address:0x%016llx size:0x%016llx "
+                                        "offset:0x%016llx "
+                                        "maxprot:0x%x prot:0x%x failed 0x%x\n",
+                                        i,
+                                        (long long)mappings[i].sfm_address,
+                                        (long long)mappings[i].sfm_size,
+                                        (long long)mappings[i].sfm_file_offset,
+                                        mappings[i].sfm_max_prot,
+                                        mappings[i].sfm_init_prot,
+                                        kr));
+
+                               /*
+                                * Undo the mappings we've established so far.
+                                */
+                               for (j = 0; j < i; j++) {
+                                       kern_return_t kr2;
+
+                                       if (mappings[j].sfm_size == 0) {
+                                               /*
+                                                * We didn't establish this
+                                                * mapping, so nothing to undo.
+                                                */
+                                               continue;
+                                       }
+                                       SHARED_REGION_TRACE_INFO(
+                                               ("shared_region: mapping[%d]: "
+                                                "address:0x%016llx "
+                                                "size:0x%016llx "
+                                                "offset:0x%016llx "
+                                                "maxprot:0x%x prot:0x%x: "
+                                                "undoing...\n",
+                                                j,
+                                                (long long)mappings[j].sfm_address,
+                                                (long long)mappings[j].sfm_size,
+                                                (long long)mappings[j].sfm_file_offset,
+                                                mappings[j].sfm_max_prot,
+                                                mappings[j].sfm_init_prot));
+                                       kr2 = mach_vm_deallocate(
+                                               sr_map,
+                                               (mappings[j].sfm_address -
+                                                sr_base_address),
+                                               mappings[j].sfm_size);
+                                       assert(kr2 == KERN_SUCCESS);
+                               }
+
+                               break;
+                       }
+
+               }
+
+               /*
+                * Record the first (chronologically) mapping in
+                * this shared region.
+                * We're protected by "sr_mapping_in_progress" here,
+                * so no need to lock "shared_region".
+                */
                if (shared_region->sr_first_mapping == (mach_vm_offset_t) -1) {
                        shared_region->sr_first_mapping = target_address;
                }
index d3fcf6d1f53e94a74044e7ab4eee01320142ed45..d692d5ae572c3c6608bfb1574d517c684b5f72ae 100644 (file)
@@ -55,7 +55,7 @@ ident         SECURITY
 # Note: MAC options must be set in both bsd/conf and security/conf MASTER files
 #
 options                KDEBUG          # kernel tracing                # <kdebug>
-options                AUDIT           # Security event auditing
+options                AUDIT           # Security event auditing       # <audit>
 options                CONFIG_LCTX     # Login Context
 
 options                CONFIG_DTRACE   # dtrace support        # <config_dtrace>
index 01b3a55d28ff511350ad398402247f563ca245f6..1bd4637655e1b47a9629fdd76fcee23fb09c4558 100644 (file)
@@ -1,16 +1,17 @@
 ######################################################################
 #
-#  RELEASE     = [ intel mach libkerncpp config_dtrace ]
+#  RELEASE     = [ intel mach libkerncpp config_dtrace audit ]
 #  PROFILE     = [ RELEASE profile ]
 #  DEBUG       = [ RELEASE debug ]
 #
-#  EMBEDDED    = [ intel mach libkerncpp ]
+#  EMBEDDED    = [ intel mach libkerncpp audit ]
 #  DEVELOPMENT = [ EMBEDDED config_dtrace ]
 #
 ######################################################################
 
 #
-# Note: MAC options must be set in both bsd/conf and security/conf MASTER files
+# Note: MAC options must be set in all the bsd/conf, osfmk/conf, and 
+# security/conf MASTER files.
 #
 options                CONFIG_MACF                     # Mandatory Access Control Framework
 options                CONFIG_MACF_SOCKET_SUBSET       # MACF subset of socket support
index 177301b38eec06a1e63b2533873a6e7eb26773b3..534e8d2fc5b4fc86cf8b0405203f2e4aa2946ffd 100644 (file)
@@ -4,7 +4,7 @@
 #  Standard Apple MacOS X Configurations:
 #  -------- ---- -------- ---------------
 #
-#  RELEASE = [ppc mach libkerncpp config_dtrace]
+#  RELEASE = [ppc mach libkerncpp config_dtrace audit]
 #  DEVELOPMENT = [RELEASE]
 #  PROFILE = [RELEASE]
 #  DEBUG = [RELEASE debug]
@@ -14,8 +14,8 @@
 ######################################################################
 
 #
-# Note: corresponding MACF options must be set in both security/conf
-#       bsd/conf and/or osfmk/conf MASTER files (depending upon the option)
+# Note: MAC options must be set in all the bsd/conf, osfmk/conf, and 
+# security/conf MASTER files.
 #
 options                CONFIG_MACF                     # Mandatory Access Control Framework
 options                CONFIG_MACF_SOCKET_SUBSET       # MACF subset of socket support
index b5a57e1580a12059316f3caf36064d8423f61ea5..f697e624e3c42fac682141742739158cf2fea0ac 100644 (file)
@@ -26,7 +26,7 @@ include $(MakeInc_def)
 #
 # XXX: CFLAGS
 #
-CFLAGS+= -DKERNEL -DBSD_KERNEL_PRIVATE \
+CFLAGS+= -I. -imacros meta_features.h -DKERNEL -DBSD_KERNEL_PRIVATE \
        -Wall -Wno-four-char-constants -fno-common
 
 #
index c0565103dd237692f9a6415c0b346330eb2bd106..bea378a45e20eb81387faa9ad816edac5d728197 100644 (file)
@@ -1,6 +1,12 @@
 # options
 
 # OPTIONS/kdebug                        optional kdebug
+OPTIONS/audit                          optional audit
+OPTIONS/config_macf                    optional config_macf
+OPTIONS/config_macf_socket_subset      optional config_macf_socket_subset
+OPTIONS/config_macf_socket             optional config_macf_socket
+OPTIONS/config_macf_net                        optional config_macf_net
+
 # security
 
 security/mac_alloc.c                                   optional config_macf
index cb61c1912c228dc2e6be11c8acff2f7a2114bc05..286b6ad5a1344fc022cb4105c44a28ee7ee7d442 100644 (file)
@@ -74,7 +74,7 @@
 #include <kern/kalloc.h>
 #include <kern/zalloc.h>
 
-#ifdef AUDIT
+#if AUDIT
 
 /* The zone allocator is initialized in mac_base.c. */
 zone_t mac_audit_data_zone;
@@ -395,4 +395,10 @@ mac_audit(int len, u_char *data)
 
        return (0);
 }
+
+int
+mac_audit_text(__unused char *text, __unused mac_policy_handle_t handle)
+{
+       return (0);
+}
 #endif /* !AUDIT */
index 37c9d05af09e5d26b66dd5d3967d30a31e2e3c8a..b659481314d7eec4bee46c912af821c88152fb0f 100644 (file)
@@ -248,12 +248,14 @@ SYSCTL_UINT(_security_mac, OID_AUTO, label_mbufs, CTLFLAG_RW,
        &mac_label_mbufs, 0, "Label all MBUFs");
 #endif
 
+#if AUDIT
 /*
  * mac_audit_data_zone is the zone used for data pushed into the audit
  * record by policies. Using a zone simplifies memory management of this
  * data, and allows tracking of the amount of data in flight.
  */
 extern zone_t mac_audit_data_zone;
+#endif
 
 /*
  * mac_policy_list holds the list of policy modules.  Modules with a
@@ -540,9 +542,11 @@ mac_policy_initbsd(void)
        struct mac_policy_conf *mpc;
        u_int i;
 
+#if AUDIT
        mac_audit_data_zone = zinit(MAC_AUDIT_DATA_LIMIT,
                                    AQ_HIWATER * MAC_AUDIT_DATA_LIMIT,
                                    8192, "mac_audit_data_zone");
+#endif
 
        printf("MAC Framework successfully initialized\n");
 
index ad0db0e630b162d1a39e5bcbf789a63b79161d73..4bd3f0e4ab9092d2955d3a5f2fe61d662d7d5e13 100644 (file)
@@ -4577,7 +4577,7 @@ int aio_tests( void * the_argp )
        
        my_aiocbp = &my_aiocbs[ 0 ];
     my_aiocbp->aio_fildes = my_fd_list[ 0 ];
-       my_aiocbp->aio_offset = 0;
+       my_aiocbp->aio_offset = 4096;
        my_aiocbp->aio_buf = my_buffers[ 0 ];
     my_aiocbp->aio_nbytes = AIO_TESTS_BUFFER_SIZE;
     my_aiocbp->aio_reqprio = 0;