]> git.saurik.com Git - apple/xnu.git/commitdiff
xnu-1228.3.13.tar.gz mac-os-x-1052 v1228.3.13
authorApple <opensource@apple.com>
Tue, 12 Feb 2008 10:17:42 +0000 (10:17 +0000)
committerApple <opensource@apple.com>
Tue, 12 Feb 2008 10:17:42 +0000 (10:17 +0000)
123 files changed:
README
bsd/conf/MASTER.i386
bsd/conf/MASTER.ppc
bsd/conf/files
bsd/dev/dtrace/dtrace_glue.c
bsd/dev/dtrace/lockstat.c
bsd/dev/memdev.c
bsd/dev/random/randomdev.c
bsd/hfs/hfs_catalog.c
bsd/hfs/hfs_cnode.c
bsd/hfs/hfs_link.c
bsd/hfs/hfs_vfsops.c
bsd/hfs/hfs_vnops.c
bsd/hfs/hfs_xattr.c
bsd/kern/bsd_init.c
bsd/kern/kern_exec.c
bsd/kern/kern_exit.c
bsd/kern/kern_sysctl.c
bsd/kern/kpi_socketfilter.c
bsd/kern/pthread_synch.c
bsd/kern/sys_generic.c
bsd/kern/uipc_socket.c
bsd/kern/uipc_socket2.c
bsd/kern/uipc_syscalls.c
bsd/net/dlil.c
bsd/net/dlil.h
bsd/net/if.c
bsd/net/kext_net.h
bsd/net/kpi_interface.c
bsd/net/kpi_interface.h
bsd/net/kpi_protocol.c
bsd/net/route.c
bsd/netinet/igmp.c
bsd/netinet/in.h
bsd/netinet/ip_fw2.h
bsd/netinet/ip_input.c
bsd/netinet/ip_output.c
bsd/netinet/raw_ip.c
bsd/netinet/tcp_input.c
bsd/netinet/tcp_output.c
bsd/netinet/tcp_timer.c
bsd/netinet6/nd6.c
bsd/nfs/nfs_socket.c
bsd/nfs/nfs_syscalls.c
bsd/nfs/nfsm_subs.h
bsd/sys/aio.h
bsd/sys/dtrace_glue.h
bsd/sys/errno.h
bsd/sys/namei.h
bsd/sys/reboot.h
bsd/sys/socket.h
bsd/sys/socketvar.h
bsd/vfs/vfs_cache.c
bsd/vfs/vfs_journal.c
bsd/vfs/vfs_lookup.c
bsd/vfs/vfs_subr.c
bsd/vfs/vfs_xattr.c
bsd/vm/vm_unix.c
config/BSDKernel.exports
config/IOKit.exports
config/MasterVersion
config/System6.0.exports
iokit/IOKit/pwr_mgt/IOPM.h
iokit/IOKit/pwr_mgt/RootDomain.h
iokit/Kernel/IOBufferMemoryDescriptor.cpp
iokit/Kernel/IODMACommand.cpp
iokit/Kernel/IODeviceTreeSupport.cpp
iokit/Kernel/IOHibernateIO.cpp
iokit/Kernel/IOKitKernelInternal.h
iokit/Kernel/IOMemoryDescriptor.cpp
iokit/Kernel/IOPMrootDomain.cpp
iokit/Kernel/IOPlatformExpert.cpp
iokit/bsddev/IOKitBSDInit.cpp
iokit/conf/MASTER
iokit/conf/files
kgmacros
libsyscall/Makefile
libsyscall/Makefile.xbs
libsyscall/create-syscalls.pl
libsyscall/custom/SYS.h
libsyscall/mach/Makefile.inc
makedefs/MakeInc.def
osfmk/conf/MASTER.i386
osfmk/conf/MASTER.ppc
osfmk/i386/AT386/model_dep.c
osfmk/i386/acpi.c
osfmk/i386/hpet.c
osfmk/i386/machine_check.c
osfmk/i386/misc_protos.h
osfmk/i386/mp_desc.c
osfmk/i386/pmap.c
osfmk/i386/rtclock.c
osfmk/i386/startup64.c
osfmk/i386/thread.h
osfmk/i386/tsc.c
osfmk/kern/etimer.h
osfmk/kern/locks.c
osfmk/kern/mach_clock.c
osfmk/kern/priority.c
osfmk/kern/sched_prim.c
osfmk/kern/syscall_subr.c
osfmk/kern/thread.h
osfmk/kern/thread_act.c
osfmk/mach/i386/thread_status.h
osfmk/mach/machine.h
osfmk/vm/bsd_vm.c
osfmk/vm/vm_fault.c
osfmk/vm/vm_map.c
osfmk/vm/vm_object.c
osfmk/vm/vm_page.h
osfmk/vm/vm_pageout.c
osfmk/vm/vm_purgeable.c
osfmk/vm/vm_purgeable_internal.h
osfmk/vm/vm_resident.c
osfmk/vm/vm_shared_region.c
security/conf/MASTER
security/conf/MASTER.i386
security/conf/MASTER.ppc
security/conf/Makefile.template
security/conf/files
security/mac_audit.c
security/mac_base.c
tools/tests/xnu_quick_test/tests.c

diff --git a/README b/README
index 9ab5b012d8d964afedfa405ce815f02804ac9cb5..76ea08c3829cb1ebca801ab44a5282f7e852df2e 100644 (file)
--- a/README
+++ b/README
@@ -15,31 +15,27 @@ A. How to build XNU:
 
   By default, architecture defaults to the build machine 
   architecture, and the kernel configuration is set to build for DEVELOPMENT.
 
   By default, architecture defaults to the build machine 
   architecture, and the kernel configuration is set to build for DEVELOPMENT.
-  The machine configuration defaults to MX31ADS for arm and nothing for i386 and ppc.
+  The machine configuration defaults to S5L8900XRB for arm and default for i386 and ppc.
   
   This will also create a bootable image, mach_kernel,  and a kernel binary 
   with symbols, mach_kernel.sys.
   
   This will also create a bootable image, mach_kernel,  and a kernel binary 
   with symbols, mach_kernel.sys.
-
-  Here are the valid arm machine configs:
-       LN2410SBC MX31ADS INTEGRATORCP S5I3000SMDK S5L8900XFPGA S5L8900XRB
-       OLOCREEK
        
   Examples:
        
   Examples:
-       /* make a debug kernel for MX31 arm board */
-       make TARGET_CONFIGS="debug arm MX31ADS"
+       /* make a debug kernel for H1 arm board */
+       make TARGET_CONFIGS="debug arm s5l8900xrb"
        
        
-    $(OBJROOT)/DEBUG_ARM_MX31ADS/osfmk/DEBUG/osfmk.o: pre-linked object for osfmk component
-    $(OBJROOT)/DEBUG_ARM_MX31ADS/mach_kernel: bootable image
+    $(OBJROOT)/DEBUG_ARM_S5L8900XRB/osfmk/DEBUG/osfmk.o: pre-linked object for osfmk component
+    $(OBJROOT)/DEBUG_ARM_S5L8900XRB/mach_kernel: bootable image
 
 
-       /* make debug and development kernels for MX31 arm board */
-       make TARGET_CONFIGS="debug arm MX31ADS  development arm MX31ADS"
+       /* make debug and development kernels for H1 arm board */
+       make TARGET_CONFIGS="debug arm s5l8900xrb  development arm s5l8900xrb"
        
        
-    $(OBJROOT)/DEBUG_ARM_MX31ADS/osfmk/DEBUG/osfmk.o: pre-linked object for osfmk component
-    $(OBJROOT)/DEBUG_ARM_MX31ADS/mach_kernel: bootable image
+    $(OBJROOT)/DEBUG_ARM_S5L8900XRB/osfmk/DEBUG/osfmk.o: pre-linked object for osfmk component
+    $(OBJROOT)/DEBUG_ARM_S5L8900XRB/mach_kernel: bootable image
     $(OBJROOT)/DEVELOPMENT_ARM/osfmk/DEVELOPMENT/osfmk.o: pre-linked object for osfmk component
     $(OBJROOT)/DEVELOPMENT_ARM/mach_kernel: bootable image
 
     $(OBJROOT)/DEVELOPMENT_ARM/osfmk/DEVELOPMENT/osfmk.o: pre-linked object for osfmk component
     $(OBJROOT)/DEVELOPMENT_ARM/mach_kernel: bootable image
 
-       /* this is all you need to do to build MX31ADS arm with DEVELOPMENT kernel configuration  */
+       /* this is all you need to do to build H1 arm with DEVELOPMENT kernel configuration  */
        make TARGET_CONFIGS="default arm default"
        
        or the following is equivalent
        make TARGET_CONFIGS="default arm default"
        
        or the following is equivalent
index 24125e3ce6c5d4522eea6e2dc2cc075787c2dab7..a4504b8a8ad55874f21a96f61f7abcf2da72fb20 100644 (file)
@@ -55,7 +55,7 @@
 #
 #  EMBEDDED_BASE =     [ intel mach bsmall vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug compat_43_tty sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot ]
 #  EMBEDDED_FILESYS =  [ devfs hfs journaling fdesc fifo ]
 #
 #  EMBEDDED_BASE =     [ intel mach bsmall vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug compat_43_tty sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot ]
 #  EMBEDDED_FILESYS =  [ devfs hfs journaling fdesc fifo ]
-#  EMBEDDED_NET =      [ inet compat_oldsock mrouting tcpdrop_synfin bpfilter ipdivert config_mbuf_noexpand dummynet ipfirewall ipfw2 zlib ifnet_input_chk ]
+#  EMBEDDED_NET =      [ inet compat_oldsock mrouting tcpdrop_synfin bpfilter config_mbuf_noexpand ]
 #  EMBEDDED =          [ EMBEDDED_BASE EMBEDDED_NET VPN EMBEDDED_FILESYS libdriver no_printf_str no_kprintf_str no_kdebug ]
 #  DEVELOPMENT =       [ EMBEDDED_BASE EMBEDDED_NET NFS VPN EMBEDDED_FILESYS libdriver netmibs development mach_assert config_dtrace ]
 #
 #  EMBEDDED =          [ EMBEDDED_BASE EMBEDDED_NET VPN EMBEDDED_FILESYS libdriver no_printf_str no_kprintf_str no_kdebug ]
 #  DEVELOPMENT =       [ EMBEDDED_BASE EMBEDDED_NET NFS VPN EMBEDDED_FILESYS libdriver netmibs development mach_assert config_dtrace ]
 #
@@ -79,7 +79,8 @@ config                mach_kernel     swap generic                    # <mach>
 options                EVENT                                           # <event>
 
 #
 options                EVENT                                           # <event>
 
 #
-# Note: MAC options must be set in both bsd/conf and security/conf MASTER files
+# Note: MAC options must be set in all the bsd/conf, osfmk/conf, and 
+# security/conf MASTER files.
 #
 options                CONFIG_MACF                                     # Mandatory Access Control Framework
 options                CONFIG_MACF_SOCKET_SUBSET       # MAC socket subest (no labels)
 #
 options                CONFIG_MACF                                     # Mandatory Access Control Framework
 options                CONFIG_MACF_SOCKET_SUBSET       # MAC socket subest (no labels)
index 4e1513cad173060534edd3afa65c735dde2b83e5..9f4a08d6d25bd7feb923383a51d2dcf5febffd72 100644 (file)
@@ -69,7 +69,8 @@ options               UXPR            # user-level XPR package        # <uxpr>
 config         mach_kernel     swap generic                    # <mach>
 
 #
 config         mach_kernel     swap generic                    # <mach>
 
 #
-# Note: MAC options must be set in both bsd/conf and security/conf MASTER files
+# Note: MAC options must be set in all the bsd/conf, osfmk/conf, and 
+# security/conf MASTER files.
 #
 options                CONFIG_MACF                     # Mandatory Access Control Framework
 options                CONFIG_MACF_SOCKET_SUBSET       # MAC socket subest (no labels)
 #
 options                CONFIG_MACF                     # Mandatory Access Control Framework
 options                CONFIG_MACF_SOCKET_SUBSET       # MAC socket subest (no labels)
index 502307c920728c71c0694d91df66646add471b26..4f927bcbac2a21541d801de980fbee6b99669162 100644 (file)
@@ -67,7 +67,6 @@ OPTIONS/vndevice                      optional vndevice
 OPTIONS/audit                          optional audit
 OPTIONS/config_fse                     optional config_fse
 OPTIONS/sockets                                optional sockets
 OPTIONS/audit                          optional audit
 OPTIONS/config_fse                     optional config_fse
 OPTIONS/sockets                                optional sockets
-OPTIONS/kpidirect                      optional kpidirect
 OPTIONS/development                    optional development
 OPTIONS/sysv_sem                       optional sysv_sem
 OPTIONS/sysv_msg                       optional sysv_msg
 OPTIONS/development                    optional development
 OPTIONS/sysv_sem                       optional sysv_sem
 OPTIONS/sysv_msg                       optional sysv_msg
index 035150aa799f0a0fd5511737813161d8a06cbd5d..1ef883569ca730ec83c5b8634272a28e6380f015 100644 (file)
@@ -1218,7 +1218,16 @@ dtrace_copyinstr(user_addr_t src, uintptr_t dst, size_t len)
        size_t actual;
        
        if (dtrace_copycheck( src, dst, len )) {
        size_t actual;
        
        if (dtrace_copycheck( src, dst, len )) {
-               if (copyinstr((const user_addr_t)src, (char *)dst, (vm_size_t)len, &actual)) {
+               /*  copyin as many as 'len' bytes. */
+               int error = copyinstr((const user_addr_t)src, (char *)dst, (vm_size_t)len, &actual);
+
+               /*
+                * ENAMETOOLONG is returned when 'len' bytes have been copied in but the NUL terminator was
+                * not encountered. That does not require raising CPU_DTRACE_BADADDR, and we press on.
+                * Note that we do *not* stuff a NUL terminator when returning ENAMETOOLONG, that's left
+                * to the caller.
+                */
+               if (error && error != ENAMETOOLONG) {
                        DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
                        cpu_core[CPU->cpu_id].cpuc_dtrace_illval = src;
                }
                        DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
                        cpu_core[CPU->cpu_id].cpuc_dtrace_illval = src;
                }
@@ -1244,6 +1253,13 @@ dtrace_copyoutstr(uintptr_t src, user_addr_t dst, size_t len)
        size_t actual;
 
        if (dtrace_copycheck( dst, src, len )) {
        size_t actual;
 
        if (dtrace_copycheck( dst, src, len )) {
+
+               /*
+                * ENAMETOOLONG is returned when 'len' bytes have been copied out but the NUL terminator was
+                * not encountered. We raise CPU_DTRACE_BADADDR in that case.
+                * Note that we do *not* stuff a NUL terminator when returning ENAMETOOLONG, that's left
+                * to the caller.
+                */
                if (copyoutstr((const void *)src, dst, (size_t)len, &actual)) {
                        DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
                        cpu_core[CPU->cpu_id].cpuc_dtrace_illval = dst;
                if (copyoutstr((const void *)src, dst, (size_t)len, &actual)) {
                        DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
                        cpu_core[CPU->cpu_id].cpuc_dtrace_illval = dst;
index f466c873e5e149e1eaa78a10bf53dc947c34daf4..3c5602be9d45bdb86be9857d67baad26bcddcd4a 100644 (file)
@@ -77,7 +77,7 @@ typedef struct lockstat_probe {
 
 lockstat_probe_t lockstat_probes[] =
 {
 
 lockstat_probe_t lockstat_probes[] =
 {
-#ifndef        __PPC__
+#ifdef __i386__
        /* Not implemented yet on PPC... */
        { LS_LCK_MTX_LOCK,      LSA_ACQUIRE,    LS_LCK_MTX_LOCK_ACQUIRE, DTRACE_IDNONE },
        { LS_LCK_MTX_LOCK,      LSA_SPIN,       LS_LCK_MTX_LOCK_SPIN, DTRACE_IDNONE },
        /* Not implemented yet on PPC... */
        { LS_LCK_MTX_LOCK,      LSA_ACQUIRE,    LS_LCK_MTX_LOCK_ACQUIRE, DTRACE_IDNONE },
        { LS_LCK_MTX_LOCK,      LSA_SPIN,       LS_LCK_MTX_LOCK_SPIN, DTRACE_IDNONE },
index 307ad77d9949cead937863ce7f5835fe25c7e0c4..f957be33c52a71e9f8109403b51827479d6fb488 100644 (file)
@@ -172,6 +172,7 @@ int mdevCMajor = -1;
 static int mdevioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p, int is_char);
 dev_t mdevadd(int devid, ppnum_t base, unsigned int size, int phys);
 dev_t mdevlookup(int devid);
 static int mdevioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p, int is_char);
 dev_t mdevadd(int devid, ppnum_t base, unsigned int size, int phys);
 dev_t mdevlookup(int devid);
+void mdevremoveall(void);
 
 static int mdevclose(__unused dev_t dev, __unused int flags, 
                                          __unused int devtype, __unused struct proc *p) {
 
 static int mdevclose(__unused dev_t dev, __unused int flags, 
                                          __unused int devtype, __unused struct proc *p) {
@@ -609,3 +610,24 @@ dev_t mdevlookup(int devid) {
        if(!(mdev[devid].mdFlags & mdInited)) return -1;        /* This one hasn't been defined */
        return mdev[devid].mdBDev;                                                      /* Return the device number */
 }
        if(!(mdev[devid].mdFlags & mdInited)) return -1;        /* This one hasn't been defined */
        return mdev[devid].mdBDev;                                                      /* Return the device number */
 }
+
+void mdevremoveall(void) {
+
+       int i;
+
+       for(i = 0; i < 16; i++) {
+               if(!(mdev[i].mdFlags & mdInited)) continue;     /* Ignore unused mdevs */
+
+               devfs_remove(mdev[i].mdbdevb);                  /* Remove the block device */
+               devfs_remove(mdev[i].mdcdevb);                  /* Remove the character device */
+
+               mdev[i].mdBase = 0;                             /* Clear the mdev's storage */
+               mdev[i].mdSize = 0;
+               mdev[i].mdSecsize = 0;
+               mdev[i].mdFlags = 0;
+               mdev[i].mdBDev = 0;
+               mdev[i].mdCDev = 0;
+               mdev[i].mdbdevb = 0;
+               mdev[i].mdcdevb = 0;
+       }
+}
index 4a7741e2a78c551c47ebda5d9517d05b8feeed08..9208ff6b69e6cb449fc8c34be2b29e310b2e6c1d 100644 (file)
@@ -99,7 +99,7 @@ typedef BlockWord Block[kBSize];
 
 void add_blocks(Block a, Block b, BlockWord carry);
 void fips_initialize(void);
 
 void add_blocks(Block a, Block b, BlockWord carry);
 void fips_initialize(void);
-void random_block(Block b);
+void random_block(Block b, int addOptional);
 u_int32_t CalculateCRC(u_int8_t* buffer, size_t length);
 
 /*
 u_int32_t CalculateCRC(u_int8_t* buffer, size_t length);
 
 /*
@@ -194,18 +194,22 @@ u_int32_t CalculateCRC(u_int8_t* buffer, size_t length)
  * get a random block of data per fips 186-2
  */
 void
  * get a random block of data per fips 186-2
  */
 void
-random_block(Block b)
+random_block(Block b, int addOptional)
 {
        int repeatCount = 0;
        do
        {
                // do one iteration
 {
        int repeatCount = 0;
        do
        {
                // do one iteration
-               Block xSeed;
-               prngOutput (gPrngRef, (BYTE*) &xSeed, sizeof (xSeed));
                
                
-               // add the seed to the previous value of g_xkey
-               add_blocks (g_xkey, xSeed, 0);
-
+               if (addOptional)
+               {
+                       Block xSeed;
+                       prngOutput (gPrngRef, (BYTE*) &xSeed, sizeof (xSeed));
+                       
+                       // add the seed to the previous value of g_xkey
+                       add_blocks (g_xkey, xSeed, 0);
+               }
+               
                // compute "G"
                SHA1Update (&g_sha1_ctx, (const u_int8_t *) &g_xkey, sizeof (g_xkey));
                
                // compute "G"
                SHA1Update (&g_sha1_ctx, (const u_int8_t *) &g_xkey, sizeof (g_xkey));
                
@@ -309,11 +313,13 @@ PreliminarySetup(void)
        fips_initialize ();
 }
 
        fips_initialize ();
 }
 
+const Block kKnownAnswer = {0x92b404e5, 0x56588ced, 0x6c1acd4e, 0xbf053f68, 0x9f73a93};
+
 void
 fips_initialize(void)
 {
 void
 fips_initialize(void)
 {
-       /* Read the initial value of g_xkey from yarrow */
-       prngOutput (gPrngRef, (BYTE*) &g_xkey, sizeof (g_xkey));
+       /* So that we can do the self test, set the seed to zero */
+       memset(&g_xkey, 0, sizeof(g_xkey));
        
        /* initialize our SHA1 generator */
        SHA1Init (&g_sha1_ctx);
        
        /* initialize our SHA1 generator */
        SHA1Init (&g_sha1_ctx);
@@ -321,7 +327,20 @@ fips_initialize(void)
        /* other initializations */
        memset (zeros, 0, sizeof (zeros));
        g_bytes_used = 0;
        /* other initializations */
        memset (zeros, 0, sizeof (zeros));
        g_bytes_used = 0;
-       random_block(g_random_data);
+       random_block(g_random_data, FALSE);
+       
+       // check here to see if we got the initial data we were expecting
+       int i;
+       for (i = 0; i < kBSize; ++i)
+       {
+               if (kKnownAnswer[i] != g_random_data[i])
+               {
+                       panic("FIPS random self test failed");
+               }
+       }
+       
+       // now do the random block again to make sure that userland doesn't get predicatable data
+       random_block(g_random_data, TRUE);
 }
 
 /*
 }
 
 /*
@@ -490,7 +509,7 @@ random_read(__unused dev_t dev, struct uio *uio, __unused int ioflag)
                int bytes_available = kBSizeInBytes - g_bytes_used;
         if (bytes_available == 0)
                {
                int bytes_available = kBSizeInBytes - g_bytes_used;
         if (bytes_available == 0)
                {
-                       random_block(g_random_data);
+                       random_block(g_random_data, TRUE);
                        g_bytes_used = 0;
                        bytes_available = kBSizeInBytes;
                }
                        g_bytes_used = 0;
                        bytes_available = kBSizeInBytes;
                }
@@ -533,7 +552,7 @@ read_random(void* buffer, u_int numbytes)
         int bytes_to_read = min(bytes_remaining, kBSizeInBytes - g_bytes_used);
         if (bytes_to_read == 0)
                {
         int bytes_to_read = min(bytes_remaining, kBSizeInBytes - g_bytes_used);
         if (bytes_to_read == 0)
                {
-                       random_block(g_random_data);
+                       random_block(g_random_data, TRUE);
                        g_bytes_used = 0;
                        bytes_to_read = min(bytes_remaining, kBSizeInBytes);
                }
                        g_bytes_used = 0;
                        bytes_to_read = min(bytes_remaining, kBSizeInBytes);
                }
index 0a4953cbd2f6909b687c182af14d26b1ca35faf8..b52a0cd22f30dc3c74cfa9c21ad30b28fa23b541 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -226,6 +226,11 @@ cat_convertattr(
        }
 }
 
        }
 }
 
+/*
+ * Convert a raw catalog key and record into an in-core catalog descriptor.
+ *
+ * Note: The caller is responsible for releasing the catalog descriptor.
+ */
 __private_extern__
 int
 cat_convertkey(
 __private_extern__
 int
 cat_convertkey(
@@ -286,6 +291,9 @@ cat_releasedesc(struct cat_desc *descp)
 
 /*
  * cat_lookup - lookup a catalog node using a cnode decriptor
 
 /*
  * cat_lookup - lookup a catalog node using a cnode decriptor
+ *
+ * Note: The caller is responsible for releasing the output
+ * catalog descriptor (when supplied outdescp is non-null).
  */
 __private_extern__
 int
  */
 __private_extern__
 int
@@ -394,6 +402,10 @@ exit:
  * cat_findname - obtain a descriptor from cnid
  *
  * Only a thread lookup is performed.
  * cat_findname - obtain a descriptor from cnid
  *
  * Only a thread lookup is performed.
+ *
+ * Note: The caller is responsible for releasing the output
+ * catalog descriptor (when supplied outdescp is non-null).
+
  */
 __private_extern__
 int
  */
 __private_extern__
 int
@@ -464,6 +476,9 @@ exit:
 
 /*
  * cat_idlookup - lookup a catalog node using a cnode id
 
 /*
  * cat_idlookup - lookup a catalog node using a cnode id
+ *
+ * Note: The caller is responsible for releasing the output
+ * catalog descriptor (when supplied outdescp is non-null).
  */
 __private_extern__
 int
  */
 __private_extern__
 int
@@ -765,6 +780,9 @@ exit:
  *
  * NOTE: both the catalog file and attribute file locks must
  *       be held before calling this function.
  *
  * NOTE: both the catalog file and attribute file locks must
  *       be held before calling this function.
+ *
+ * The caller is responsible for releasing the output
+ * catalog descriptor (when supplied outdescp is non-null).
  */
 __private_extern__
 int
  */
 __private_extern__
 int
@@ -937,6 +955,9 @@ exit:
  *     3. BTDeleteRecord(from_cnode);
  *     4. BTDeleteRecord(from_thread);
  *     5. BTInsertRecord(to_thread);
  *     3. BTDeleteRecord(from_cnode);
  *     4. BTDeleteRecord(from_thread);
  *     5. BTInsertRecord(to_thread);
+ *
+ * Note: The caller is responsible for releasing the output
+ * catalog descriptor (when supplied out_cdp is non-null).
  */
 __private_extern__
 int 
  */
 __private_extern__
 int 
@@ -1690,6 +1711,7 @@ cat_set_childlinkbit(struct hfsmount *hfsmp, cnid_t cnid)
                if (retval) {
                        hfs_systemfile_unlock(hfsmp, lockflags);
                        hfs_end_transaction(hfsmp);
                if (retval) {
                        hfs_systemfile_unlock(hfsmp, lockflags);
                        hfs_end_transaction(hfsmp);
+                       cat_releasedesc(&desc);
                        break;
                }
 
                        break;
                }
 
@@ -1697,6 +1719,7 @@ cat_set_childlinkbit(struct hfsmount *hfsmp, cnid_t cnid)
                hfs_end_transaction(hfsmp);
 
                cnid = desc.cd_parentcnid;
                hfs_end_transaction(hfsmp);
 
                cnid = desc.cd_parentcnid;
+               cat_releasedesc(&desc);
        }
 
        return retval;
        }
 
        return retval;
index 1f434da3dc3047abbf3a0dd44d43c360048431dc..c09c058c66cb525e18eb1a882ba198d59165c90e 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2002-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2002-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -105,6 +105,14 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap)
 
        (void) hfs_lock(cp, HFS_FORCE_LOCK);
 
 
        (void) hfs_lock(cp, HFS_FORCE_LOCK);
 
+       /*
+        * Recycle named streams quickly so that the data fork vnode can
+        * go inactive in a timely manner (so that it can be zero filled
+        * or truncated if needed).
+        */
+       if (vnode_isnamedstream(vp))
+               recycle = 1;
+
        /*
         * We should lock cnode before checking the flags in the 
         * condition below and should unlock the cnode before calling 
        /*
         * We should lock cnode before checking the flags in the 
         * condition below and should unlock the cnode before calling 
@@ -219,9 +227,11 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap)
 
                lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
 
 
                lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
 
-               if (cp->c_blocks > 0)
-                       printf("hfs_inactive: attempting to delete a non-empty file!");
-
+               if (cp->c_blocks > 0) {
+                       printf("hfs_inactive: deleting non-empty%sfile %d, "
+                              "blks %d\n", VNODE_IS_RSRC(vp) ? " rsrc " : " ",
+                              (int)cp->c_fileid, (int)cp->c_blocks);
+               }
 
                //
                // release the name pointer in the descriptor so that
 
                //
                // release the name pointer in the descriptor so that
@@ -270,8 +280,15 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap)
                        hfs_volupdate(hfsmp, (v_type == VDIR) ? VOL_RMDIR : VOL_RMFILE, 0);
        }
 
                        hfs_volupdate(hfsmp, (v_type == VDIR) ? VOL_RMDIR : VOL_RMFILE, 0);
        }
 
+       /*
+        * A file may have had delayed allocations, in which case hfs_update
+        * would not have updated the catalog record (cat_update).  We need
+        * to do that now, before we lose our fork data.  We also need to
+        * force the update, or hfs_update will again skip the cat_update.
+        */
        if ((cp->c_flag & C_MODIFIED) ||
            cp->c_touch_acctime || cp->c_touch_chgtime || cp->c_touch_modtime) {
        if ((cp->c_flag & C_MODIFIED) ||
            cp->c_touch_acctime || cp->c_touch_chgtime || cp->c_touch_modtime) {
+               cp->c_flag |= C_FORCEUPDATE;
                hfs_update(vp, 0);
        }
 out:
                hfs_update(vp, 0);
        }
 out:
@@ -388,6 +405,35 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap)
        (void) hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
        cp = VTOC(vp);
 
        (void) hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
        cp = VTOC(vp);
 
+       /*
+        * Check if a deleted resource fork vnode missed a
+        * VNOP_INACTIVE call and requires truncation.
+        */
+       if (VNODE_IS_RSRC(vp) &&
+           (cp->c_flag & C_DELETED) &&
+           (VTOF(vp)->ff_blocks != 0)) {
+               hfs_unlock(cp);
+               ubc_setsize(vp, 0);
+
+               hfs_lock_truncate(cp, TRUE);
+               (void) hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
+
+               (void) hfs_truncate(vp, (off_t)0, IO_NDELAY, 1, ap->a_context);
+
+               hfs_unlock_truncate(cp, TRUE);
+       }
+       /*
+        * A file may have had delayed allocations, in which case hfs_update
+        * would not have updated the catalog record (cat_update).  We need
+        * to do that now, before we lose our fork data.  We also need to
+        * force the update, or hfs_update will again skip the cat_update.
+        */
+       if ((cp->c_flag & C_MODIFIED) ||
+           cp->c_touch_acctime || cp->c_touch_chgtime || cp->c_touch_modtime) {
+               cp->c_flag |= C_FORCEUPDATE;
+               hfs_update(vp, 0);
+       }
+
        /*
         * Keep track of an inactive hot file.
         */
        /*
         * Keep track of an inactive hot file.
         */
@@ -742,6 +788,16 @@ hfs_getnewvnode(
        if (cp->c_flag & C_HARDLINK) {
                vnode_setmultipath(vp);
        }
        if (cp->c_flag & C_HARDLINK) {
                vnode_setmultipath(vp);
        }
+       /*
+        * Tag resource fork vnodes as needing an VNOP_INACTIVE
+        * so that any deferred removes (open unlinked files)
+        * have the chance to process the resource fork.
+        */
+       if (VNODE_IS_RSRC(vp)) {
+               /* Force VL_NEEDINACTIVE on this vnode */
+               vnode_ref(vp);
+               vnode_rele(vp);
+       }
        hfs_chashwakeup(cp, H_ALLOC | H_ATTACH);
 
        /*
        hfs_chashwakeup(cp, H_ALLOC | H_ATTACH);
 
        /*
index 65f5e9ee8567ca273d3ca7939ed79442ad613626..a2e08a098713c662ed7d0ecc9e928166f3d567d7 100644 (file)
@@ -1059,13 +1059,14 @@ __private_extern__
 void
 hfs_relorigin(struct cnode *cp, cnid_t parentcnid)
 {
 void
 hfs_relorigin(struct cnode *cp, cnid_t parentcnid)
 {
-       linkorigin_t *origin = NULL;
+       linkorigin_t *origin, *prev;
        void * thread = current_thread();
 
        void * thread = current_thread();
 
-       TAILQ_FOREACH(origin, &cp->c_originlist, lo_link) {
+       TAILQ_FOREACH_SAFE(origin, &cp->c_originlist, lo_link, prev) {
                if ((origin->lo_thread == thread) ||
                    (origin->lo_parentcnid == parentcnid)) {
                        TAILQ_REMOVE(&cp->c_originlist, origin, lo_link);
                if ((origin->lo_thread == thread) ||
                    (origin->lo_parentcnid == parentcnid)) {
                        TAILQ_REMOVE(&cp->c_originlist, origin, lo_link);
+                       FREE(origin, M_TEMP);
                        break;
                }
        }
                        break;
                }
        }
index d5a05045bce0619d5f1b0d943ee6118fefeb2386..c0dc7253d7da460c8887b5b21d5a1c4643eb4167 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 1999-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 1999-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -2398,6 +2398,7 @@ hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock)
                } else if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
                           (bcmp(nameptr, HFS_DELETE_PREFIX, HFS_DELETE_PREFIX_LEN) == 0)) {
                        *vpp = NULL;
                } else if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
                           (bcmp(nameptr, HFS_DELETE_PREFIX, HFS_DELETE_PREFIX_LEN) == 0)) {
                        *vpp = NULL;
+                       cat_releasedesc(&cndesc);
                        return (ENOENT);  /* open unlinked file */
                }
        }
                        return (ENOENT);  /* open unlinked file */
                }
        }
@@ -3313,6 +3314,12 @@ out:
                VTOC(vp)->c_blocks = fp->ff_blocks;
 
        }
                VTOC(vp)->c_blocks = fp->ff_blocks;
 
        }
+       /*
+          Regardless of whether or not the totalblocks actually increased,
+          we should reset the allocLimit field. If it changed, it will
+          get updated; if not, it will remain the same.
+       */
+       hfsmp->allocLimit = vcb->totalBlocks;
        hfs_systemfile_unlock(hfsmp, lockflags);
        hfs_end_transaction(hfsmp);
 
        hfs_systemfile_unlock(hfsmp, lockflags);
        hfs_end_transaction(hfsmp);
 
@@ -4026,6 +4033,7 @@ hfs_reclaim_journal_file(struct hfsmount *hfsmp, vfs_context_t context)
        journal_fork.cf_extents[0].blockCount = newBlockCount;
        journal_fork.cf_blocks = newBlockCount;
        error = cat_update(hfsmp, &journal_desc, &journal_attr, &journal_fork, NULL);
        journal_fork.cf_extents[0].blockCount = newBlockCount;
        journal_fork.cf_blocks = newBlockCount;
        error = cat_update(hfsmp, &journal_desc, &journal_attr, &journal_fork, NULL);
+       cat_releasedesc(&journal_desc);  /* all done with cat descriptor */
        if (error) {
                printf("hfs_reclaim_journal_file: cat_update returned %d\n", error);
                goto free_fail;
        if (error) {
                printf("hfs_reclaim_journal_file: cat_update returned %d\n", error);
                goto free_fail;
@@ -4140,6 +4148,7 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, vfs_context_t context)
        jib_fork.cf_extents[0].blockCount = 1;
        jib_fork.cf_blocks = 1;
        error = cat_update(hfsmp, &jib_desc, &jib_attr, &jib_fork, NULL);
        jib_fork.cf_extents[0].blockCount = 1;
        jib_fork.cf_blocks = 1;
        error = cat_update(hfsmp, &jib_desc, &jib_attr, &jib_fork, NULL);
+       cat_releasedesc(&jib_desc);  /* all done with cat descriptor */
        if (error) {
                printf("hfs_reclaim_journal_info_block: cat_update returned %d\n", error);
                goto fail;
        if (error) {
                printf("hfs_reclaim_journal_info_block: cat_update returned %d\n", error);
                goto fail;
index cac1f5b755faec3ab3757d0243dd65f50bcfa4de..d8350638aa2f170dcadf3d521c8991efb13cc3cb 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -1728,9 +1728,10 @@ hfs_vnop_remove(ap)
 
        hfs_lock_truncate(cp, TRUE);
 
 
        hfs_lock_truncate(cp, TRUE);
 
-       if ((error = hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK)))
-               goto out;
-
+       if ((error = hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK))) {
+               hfs_unlock_truncate(cp, TRUE);
+               return (error);
+       }
        error = hfs_removefile(dvp, vp, ap->a_cnp, ap->a_flags, 0, 0);
 
        //
        error = hfs_removefile(dvp, vp, ap->a_cnp, ap->a_flags, 0, 0);
 
        //
@@ -1748,9 +1749,14 @@ hfs_vnop_remove(ap)
            recycle_rsrc = 1;
        }
 
            recycle_rsrc = 1;
        }
 
-       hfs_unlockpair(dcp, cp);
-out:
+       /*
+        * Drop the truncate lock before unlocking the cnode
+        * (which can potentially perform a vnode_put and
+        * recycle the vnode which in turn might require the
+        * truncate lock)
+        */
        hfs_unlock_truncate(cp, TRUE);
        hfs_unlock_truncate(cp, TRUE);
+       hfs_unlockpair(dcp, cp);
 
        if (recycle_rsrc && vnode_getwithvid(rvp, rvid) == 0) {
                vnode_recycle(rvp);
 
        if (recycle_rsrc && vnode_getwithvid(rvp, rvid) == 0) {
                vnode_recycle(rvp);
@@ -1798,7 +1804,7 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
        int lockflags;
        int error = 0;
        int started_tr = 0;
        int lockflags;
        int error = 0;
        int started_tr = 0;
-       int isbigfile = 0, hasxattrs=0, isdir=0;
+       int isbigfile = 0, defer_remove=0, isdir=0;
 
        cp = VTOC(vp);
        dcp = VTOC(dvp);
 
        cp = VTOC(vp);
        dcp = VTOC(dvp);
@@ -1866,11 +1872,22 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
         * (needed for hfs_truncate)
         */
        if (isdir == 0 && (cp->c_blocks - VTOF(vp)->ff_blocks)) {
         * (needed for hfs_truncate)
         */
        if (isdir == 0 && (cp->c_blocks - VTOF(vp)->ff_blocks)) {
-               error = hfs_vgetrsrc(hfsmp, vp, &rvp, FALSE);
-               if (error)
-                       goto out;
-               /* Defer the vnode_put on rvp until the hfs_unlock(). */
-               cp->c_flag |= C_NEED_RVNODE_PUT;
+               /*
+                * We must avoid calling hfs_vgetrsrc() when we have
+                * an active resource fork vnode to avoid deadlocks
+                * when that vnode is in the VL_TERMINATE state. We
+                * can defer removing the file and its resource fork
+                * until the call to hfs_vnop_inactive() occurs.
+                */
+               if (cp->c_rsrc_vp) {
+                       defer_remove = 1;
+               } else {
+                       error = hfs_vgetrsrc(hfsmp, vp, &rvp, FALSE);
+                       if (error)
+                               goto out;
+                       /* Defer the vnode_put on rvp until the hfs_unlock(). */
+                       cp->c_flag |= C_NEED_RVNODE_PUT;
+               }
        }
        /* Check if this file is being used. */
        if (isdir == 0) {
        }
        /* Check if this file is being used. */
        if (isdir == 0) {
@@ -1887,7 +1904,7 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
           individual transactions in case there are too many */
        if ((hfsmp->hfs_attribute_vp != NULL) &&
            (cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0) {
           individual transactions in case there are too many */
        if ((hfsmp->hfs_attribute_vp != NULL) &&
            (cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0) {
-           hasxattrs = 1;
+           defer_remove = 1;
        }
 
        /*
        }
 
        /*
@@ -1976,10 +1993,10 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
 
        /*
         * There are two cases to consider:
 
        /*
         * There are two cases to consider:
-        *  1. File is busy/big   ==> move/rename the file
+        *  1. File is busy/big/defer_remove ==> move/rename the file
         *  2. File is not in use ==> remove the file
         */
         *  2. File is not in use ==> remove the file
         */
-       if (dataforkbusy || rsrcforkbusy || isbigfile || hasxattrs) {
+       if (dataforkbusy || rsrcforkbusy || isbigfile || defer_remove) {
                char delname[32];
                struct cat_desc to_desc;
                struct cat_desc todir_desc;
                char delname[32];
                struct cat_desc to_desc;
                struct cat_desc todir_desc;
@@ -3191,6 +3208,7 @@ hfs_update(struct vnode *vp, __unused int waitfor)
        struct cat_fork *dataforkp = NULL;
        struct cat_fork *rsrcforkp = NULL;
        struct cat_fork datafork;
        struct cat_fork *dataforkp = NULL;
        struct cat_fork *rsrcforkp = NULL;
        struct cat_fork datafork;
+       struct cat_fork rsrcfork;
        struct hfsmount *hfsmp;
        int lockflags;
        int error;
        struct hfsmount *hfsmp;
        int lockflags;
        int error;
@@ -3272,6 +3290,18 @@ hfs_update(struct vnode *vp, __unused int waitfor)
                dataforkp = &datafork;
        }
 
                dataforkp = &datafork;
        }
 
+       /*
+        * For resource forks with delayed allocations, make sure
+        * the block count and file size match the number of blocks
+        * actually allocated to the file on disk.
+        */
+       if (rsrcforkp && (cp->c_rsrcfork->ff_unallocblocks != 0)) {
+               bcopy(rsrcforkp, &rsrcfork, sizeof(rsrcfork));
+               rsrcfork.cf_blocks = (cp->c_rsrcfork->ff_blocks - cp->c_rsrcfork->ff_unallocblocks);
+               rsrcfork.cf_size   = rsrcfork.cf_blocks * HFSTOVCB(hfsmp)->blockSize;
+               rsrcforkp = &rsrcfork;
+       }
+
        /*
         * Lock the Catalog b-tree file.
         */
        /*
         * Lock the Catalog b-tree file.
         */
@@ -3585,6 +3615,7 @@ hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, int
        int error;
        int vid;
 
        int error;
        int vid;
 
+restart:
        /* Attempt to use exising vnode */
        if ((rvp = cp->c_rsrc_vp)) {
                vid = vnode_vid(rvp);
        /* Attempt to use exising vnode */
        if ((rvp = cp->c_rsrc_vp)) {
                vid = vnode_vid(rvp);
@@ -3607,15 +3638,22 @@ hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, int
 
                error = vnode_getwithvid(rvp, vid);
 
 
                error = vnode_getwithvid(rvp, vid);
 
-               if (can_drop_lock)
+               if (can_drop_lock) {
                        (void) hfs_lock(cp, HFS_FORCE_LOCK);
                        (void) hfs_lock(cp, HFS_FORCE_LOCK);
-
+                       /*
+                        * When our lock was relinquished, the resource fork
+                        * could have been recycled.  Check for this and try
+                        * again.
+                        */
+                       if (error == ENOENT)
+                               goto restart;
+               }
                if (error) {
                        const char * name = (const char *)VTOC(vp)->c_desc.cd_nameptr;
 
                        if (name)
                if (error) {
                        const char * name = (const char *)VTOC(vp)->c_desc.cd_nameptr;
 
                        if (name)
-                               printf("hfs_vgetrsrc: couldn't get"
-                                       " resource fork for %s\n", name);
+                               printf("hfs_vgetrsrc: couldn't get resource"
+                                      " fork for %s, err %d\n", name, error);
                        return (error);
                }
        } else {
                        return (error);
                }
        } else {
index 37dca768bd6a6a8c3905882e7f0bdaf7ff312fe0..d025ae1cf5d6fe97ff17088e03504db0071fac5a 100644 (file)
@@ -1504,6 +1504,8 @@ hfs_removeallattr(struct hfsmount *hfsmp, u_int32_t fileid)
 #endif
                hfs_systemfile_unlock(hfsmp, lockflags);
                hfs_end_transaction(hfsmp);
 #endif
                hfs_systemfile_unlock(hfsmp, lockflags);
                hfs_end_transaction(hfsmp);
+               if (result)
+                       break;
        }
 exit:
        FREE(iterator, M_TEMP);
        }
 exit:
        FREE(iterator, M_TEMP);
index 2ea3d6377226591794b60cfaf3087f4332696e42..2a04d688caec3fcb294839c60271d9a6ebafd3b8 100644 (file)
@@ -569,6 +569,7 @@ bsd_init(void)
        /*
         * Initialize the calendar.
         */
        /*
         * Initialize the calendar.
         */
+       bsd_init_kprintf("calling IOKitInitializeTime\n");
        IOKitInitializeTime();
 
        if (turn_on_log_leaks && !new_nkdbufs)
        IOKitInitializeTime();
 
        if (turn_on_log_leaks && !new_nkdbufs)
@@ -1031,7 +1032,9 @@ parse_bsd_args(void)
        if (PE_parse_boot_arg("nbuf", &max_nbuf_headers)) {
                customnbuf = 1;
        }
        if (PE_parse_boot_arg("nbuf", &max_nbuf_headers)) {
                customnbuf = 1;
        }
+#if !defined(SECURE_KERNEL)
        PE_parse_boot_arg("kmem", &setup_kmem);
        PE_parse_boot_arg("kmem", &setup_kmem);
+#endif
        PE_parse_boot_arg("trace", &new_nkdbufs);
 
        if (PE_parse_boot_arg("msgbuf", &msgbuf)) {
        PE_parse_boot_arg("trace", &new_nkdbufs);
 
        if (PE_parse_boot_arg("msgbuf", &msgbuf)) {
index ded4a1dcf2c1aa4e532f2102101be9602a681bf7..6b2702d7b63e0b9475ccd52f5b2d6b79ba6c394d 100644 (file)
@@ -1548,6 +1548,7 @@ int
 posix_spawn(proc_t ap, struct posix_spawn_args *uap, register_t *retval)
 {
        proc_t p = ap;          /* quiet bogus GCC vfork() warning */
 posix_spawn(proc_t ap, struct posix_spawn_args *uap, register_t *retval)
 {
        proc_t p = ap;          /* quiet bogus GCC vfork() warning */
+       user_addr_t pid = uap->pid;
        register_t ival[2];             /* dummy retval for vfork() */
        struct image_params image_params, *imgp;
        struct vnode_attr va;
        register_t ival[2];             /* dummy retval for vfork() */
        struct image_params image_params, *imgp;
        struct vnode_attr va;
@@ -1809,8 +1810,8 @@ bad:
                 *
                 * If the parent wants the pid, copy it out
                 */
                 *
                 * If the parent wants the pid, copy it out
                 */
-               if (uap->pid != USER_ADDR_NULL)
-                       (void)suword(uap->pid, p->p_pid);
+               if (pid != USER_ADDR_NULL)
+                       (void)suword(pid, p->p_pid);
                retval[0] = error;
                /*
                 * Override inherited code signing flags with the
                retval[0] = error;
                /*
                 * Override inherited code signing flags with the
index 7bc8b1d74c19a74a3baff3416e175cd25c734d39..27f98defbaf6af68a9d6e0c667af0f4ba0ab235c 100644 (file)
@@ -246,8 +246,7 @@ exit1(proc_t p, int rv, int *retval)
                }
                sig_lock_to_exit(p);
        }
                }
                sig_lock_to_exit(p);
        }
-#if !CONFIG_EMBEDDED /* BER_XXX */
-       if (p->p_pid == 1) {
+       if (p == initproc) {
                proc_unlock(p);
                printf("pid 1 exited (signal %d, exit %d)",
                    WTERMSIG(rv), WEXITSTATUS(rv));
                proc_unlock(p);
                printf("pid 1 exited (signal %d, exit %d)",
                    WTERMSIG(rv), WEXITSTATUS(rv));
@@ -257,7 +256,6 @@ exit1(proc_t p, int rv, int *retval)
                                                                "launchd"),
                                                        init_task_failure_data);
        }
                                                                "launchd"),
                                                        init_task_failure_data);
        }
-#endif
 
        p->p_lflag |= P_LEXIT;
        p->p_xstat = rv;
 
        p->p_lflag |= P_LEXIT;
        p->p_xstat = rv;
index 029fddc8ed35aa02a36835f52d7d611b5660b241..27f0e09066f220aa3a25dbdeabf1317a372f7899 100644 (file)
@@ -2415,23 +2415,26 @@ static int
 sysctl_nx
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
 sysctl_nx
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
+#ifdef SECURE_KERNEL
+       return ENOTSUP;
+#endif
        int new_value, changed;
        int error;
 
        error = sysctl_io_number(req, nx_enabled, sizeof(nx_enabled), &new_value, &changed);
        int new_value, changed;
        int error;
 
        error = sysctl_io_number(req, nx_enabled, sizeof(nx_enabled), &new_value, &changed);
-    if (error)
-        return error;
+       if (error)
+               return error;
 
 
-    if (changed) {
+       if (changed) {
 #ifdef __i386__
                /*
                 * Only allow setting if NX is supported on the chip
                 */
                if (!(cpuid_extfeatures() & CPUID_EXTFEATURE_XD))
 #ifdef __i386__
                /*
                 * Only allow setting if NX is supported on the chip
                 */
                if (!(cpuid_extfeatures() & CPUID_EXTFEATURE_XD))
-            return ENOTSUP;
+                       return ENOTSUP;
 #endif
 #endif
-        nx_enabled = new_value;
-    }
+               nx_enabled = new_value;
+       }
        return(error);
 }
 
        return(error);
 }
 
index 377bb2e849ad36f8fa30346408bad99e56a02ef6..cefe3047326b8b6be6f5173cf816845d714c0312 100644 (file)
@@ -298,10 +298,19 @@ sflt_detach_private(
        if (!unregistering) {
                if ((entry->sfe_flags & SFEF_UNREGISTERING) != 0) {
                        /*
        if (!unregistering) {
                if ((entry->sfe_flags & SFEF_UNREGISTERING) != 0) {
                        /*
-                        * Another thread is unregistering the filter, we need to
-                        * avoid detaching the filter here so the socket won't go
-                        * away.
+                        * Another thread is unregistering the filter, we
+                        * need to avoid detaching the filter here so the
+                        * socket won't go away.  Bump up the socket's
+                        * usecount so that it won't be freed until after
+                        * the filter unregistration has been completed;
+                        * at this point the caller has already held the
+                        * socket's lock, so we can directly modify the
+                        * usecount.
                         */
                         */
+                       if (!(entry->sfe_flags & SFEF_DETACHXREF)) {
+                               entry->sfe_socket->so_usecount++;
+                               entry->sfe_flags |= SFEF_DETACHXREF;
+                       }
                        lck_mtx_unlock(sock_filter_lock);
                        return;
                }
                        lck_mtx_unlock(sock_filter_lock);
                        return;
                }
@@ -322,9 +331,14 @@ sflt_detach_private(
        else {
                /*
                 * Clear the removing flag. We will perform the detach here or
        else {
                /*
                 * Clear the removing flag. We will perform the detach here or
-                * request a delayed deatch.
+                * request a delayed detach.  Since we do an extra ref release
+                * below, bump up the usecount if we haven't done so.
                 */
                entry->sfe_flags &= ~SFEF_UNREGISTERING;
                 */
                entry->sfe_flags &= ~SFEF_UNREGISTERING;
+               if (!(entry->sfe_flags & SFEF_DETACHXREF)) {
+                       entry->sfe_socket->so_usecount++;
+                       entry->sfe_flags |= SFEF_DETACHXREF;
+               }
        }
 
        if (entry->sfe_socket->so_filteruse != 0) {
        }
 
        if (entry->sfe_socket->so_filteruse != 0) {
@@ -510,10 +524,22 @@ sflt_unregister(
                filter->sf_flags |= SFF_DETACHING;
        
                for (next_entry = entry_head; next_entry;
                filter->sf_flags |= SFF_DETACHING;
        
                for (next_entry = entry_head; next_entry;
-                        next_entry = next_entry->sfe_next_onfilter) {
-                       socket_lock(next_entry->sfe_socket, 1);
+                   next_entry = next_entry->sfe_next_onfilter) {
+                       /*
+                        * Mark this as "unregistering"; upon dropping the
+                        * lock, another thread may win the race and attempt
+                        * to detach a socket from it (e.g. as part of close)
+                        * before we get a chance to detach.  Setting this
+                        * flag practically tells the other thread to go away.
+                        * If the other thread wins, this causes an extra
+                        * reference hold on the socket so that it won't be
+                        * deallocated until after we finish with the detach
+                        * for it below.  If we win the race, the extra
+                        * reference hold is also taken to compensate for the
+                        * extra reference release when detach is called
+                        * with a "1" for its second parameter.
+                        */
                        next_entry->sfe_flags |= SFEF_UNREGISTERING;
                        next_entry->sfe_flags |= SFEF_UNREGISTERING;
-                       socket_unlock(next_entry->sfe_socket, 0);       /* Radar 4201550: prevents the socket from being deleted while being unregistered */
                }
        }
        
                }
        }
        
index 980c1ad892c9ff2c1d243347d3a116ff84a7bc74..9ccbc9a9e50a61d1829d32214f69d58aff844835 100644 (file)
@@ -159,7 +159,7 @@ void _pthread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), voi
 #define PTHREAD_START_SETSCHED 0x02000000
 #define PTHREAD_START_DETACHED 0x04000000
 #define PTHREAD_START_POLICY_BITSHIFT 16
 #define PTHREAD_START_SETSCHED 0x02000000
 #define PTHREAD_START_DETACHED 0x04000000
 #define PTHREAD_START_POLICY_BITSHIFT 16
-#define PTHREAD_START_POLICY_MASK 0xffff
+#define PTHREAD_START_POLICY_MASK 0xff
 #define PTHREAD_START_IMPORTANCE_MASK 0xffff
 
 #define SCHED_OTHER      POLICY_TIMESHARE
 #define PTHREAD_START_IMPORTANCE_MASK 0xffff
 
 #define SCHED_OTHER      POLICY_TIMESHARE
@@ -958,7 +958,8 @@ bsdthread_create(__unused struct proc *p, struct bsdthread_create_args  *uap, us
                        extinfo.timeshare = 0;
                thread_policy_set(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extinfo, THREAD_EXTENDED_POLICY_COUNT);
 
                        extinfo.timeshare = 0;
                thread_policy_set(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extinfo, THREAD_EXTENDED_POLICY_COUNT);
 
-               precedinfo.importance = importance;
+#define BASEPRI_DEFAULT 31
+               precedinfo.importance = (importance - BASEPRI_DEFAULT);
                thread_policy_set(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT);
        }
 
                thread_policy_set(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT);
        }
 
index 0fe948aaed433cfcc7f547d929b912e7e3e31d09..509468087c76eb31fcbfd922eb3c29a9e1d3c256 100644 (file)
@@ -224,7 +224,7 @@ pread_nocancel(struct proc *p, struct pread_nocancel_args *uap, user_ssize_t *re
        int error;
 
        if ( (error = preparefileread(p, &fp, fd, 1)) )
        int error;
 
        if ( (error = preparefileread(p, &fp, fd, 1)) )
-               return (error);
+               goto out;
 
        error = dofileread(vfs_context_current(), fp, uap->buf, uap->nbyte,
                        uap->offset, FOF_OFFSET, retval);
 
        error = dofileread(vfs_context_current(), fp, uap->buf, uap->nbyte,
                        uap->offset, FOF_OFFSET, retval);
@@ -234,7 +234,8 @@ pread_nocancel(struct proc *p, struct pread_nocancel_args *uap, user_ssize_t *re
        if (!error)
            KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE),
              uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
        if (!error)
            KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE),
              uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
-       
+
+out:
        return (error);
 }
 
        return (error);
 }
 
index e178c29b478e8cb7b341bcaecdb0ac23df4b11a7..7b259ec9f657e36a049d8383732bf7f5118088c7 100644 (file)
@@ -837,8 +837,12 @@ soclose_wait_locked(struct socket *so)
                mutex_held = so->so_proto->pr_domain->dom_mtx;
        lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
 
                mutex_held = so->so_proto->pr_domain->dom_mtx;
        lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
 
-       /* Double check here and return if there's no outstanding upcall */
-       if (!(so->so_flags & SOF_UPCALLINUSE))
+       /*
+        * Double check here and return if there's no outstanding upcall;
+        * otherwise proceed further only if SOF_UPCALLCLOSEWAIT is set.
+        */
+       if (!(so->so_flags & SOF_UPCALLINUSE) ||
+           !(so->so_flags & SOF_UPCALLCLOSEWAIT))
                return;
 
        so->so_flags |= SOF_CLOSEWAIT;
                return;
 
        so->so_flags |= SOF_CLOSEWAIT;
@@ -3195,6 +3199,19 @@ sosetopt(struct socket *so, struct sockopt *sopt)
 #endif /* MAC_SOCKET */
                        break;
 
 #endif /* MAC_SOCKET */
                        break;
 
+#ifdef __APPLE_API_PRIVATE
+               case SO_UPCALLCLOSEWAIT:
+                       error = sooptcopyin(sopt, &optval, sizeof (optval),
+                           sizeof (optval));
+                       if (error)
+                               goto bad;
+                       if (optval)
+                               so->so_flags |= SOF_UPCALLCLOSEWAIT;
+                       else
+                               so->so_flags &= ~SOF_UPCALLCLOSEWAIT;
+                       break;
+#endif
+
                default:
                        error = ENOPROTOOPT;
                        break;
                default:
                        error = ENOPROTOOPT;
                        break;
@@ -3463,6 +3480,12 @@ integer:
 #endif /* MAC_SOCKET */
                        break;
 
 #endif /* MAC_SOCKET */
                        break;
 
+#ifdef __APPLE_API_PRIVATE
+               case SO_UPCALLCLOSEWAIT:
+                       optval = (so->so_flags & SOF_UPCALLCLOSEWAIT);
+                       goto integer;
+#endif
+
                default:
                        error = ENOPROTOOPT;
                        break;
                default:
                        error = ENOPROTOOPT;
                        break;
index 379b9afd616bcdabe1e2ddd5905b78c8226cfec6..41a606ca39fa35818172d8411a845b334cbd7452 100644 (file)
@@ -843,6 +843,7 @@ sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
                sb->sb_mb = m0;
        }
        sb->sb_lastrecord = m0;
                sb->sb_mb = m0;
        }
        sb->sb_lastrecord = m0;
+       sb->sb_mbtail = m0;
 
        m = m0->m_next;
        m0->m_next = 0;
 
        m = m0->m_next;
        m0->m_next = 0;
index 1126e7955d59b249dfa1ffe7bc9142ac5edcd244..7e9cafa358223b23584c9c65562a642a216aff8e 100644 (file)
@@ -137,9 +137,9 @@ static int sendit(struct proc *, int, struct user_msghdr *, uio_t, int,
 static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
     register_t *);
 static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
 static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
     register_t *);
 static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
-    size_t);
+    size_t, boolean_t);
 static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
 static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
-    user_addr_t, size_t);
+    user_addr_t, size_t, boolean_t);
 #if SENDFILE
 static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
     boolean_t);
 #if SENDFILE
 static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
     boolean_t);
@@ -251,9 +251,9 @@ bind(__unused proc_t p, struct bind_args *uap, __unused register_t *retval)
                goto out;
        }
        if (uap->namelen > sizeof (ss)) {
                goto out;
        }
        if (uap->namelen > sizeof (ss)) {
-               error = getsockaddr(so, &sa, uap->name, uap->namelen);
+               error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
        } else {
        } else {
-               error = getsockaddr_s(so, &ss, uap->name, uap->namelen);
+               error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
                if (error == 0) {
                        sa = (struct sockaddr *)&ss;
                        want_free = FALSE;
                if (error == 0) {
                        sa = (struct sockaddr *)&ss;
                        want_free = FALSE;
@@ -595,6 +595,7 @@ connect_nocancel(__unused proc_t p, struct connect_nocancel_args *uap, __unused
        boolean_t want_free = TRUE;
        int error;
        int fd = uap->s;
        boolean_t want_free = TRUE;
        int error;
        int fd = uap->s;
+       boolean_t dgram;
 
        AUDIT_ARG(fd, uap->s);
        error = file_socket(fd, &so);
 
        AUDIT_ARG(fd, uap->s);
        error = file_socket(fd, &so);
@@ -605,11 +606,17 @@ connect_nocancel(__unused proc_t p, struct connect_nocancel_args *uap, __unused
                goto out;
        }
 
                goto out;
        }
 
+       /*
+        * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
+        * if this is a datagram socket; translate for other types.
+        */
+       dgram = (so->so_type == SOCK_DGRAM);
+
        /* Get socket address now before we obtain socket lock */
        if (uap->namelen > sizeof (ss)) {
        /* Get socket address now before we obtain socket lock */
        if (uap->namelen > sizeof (ss)) {
-               error = getsockaddr(so, &sa, uap->name, uap->namelen);
+               error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
        } else {
        } else {
-               error = getsockaddr_s(so, &ss, uap->name, uap->namelen);
+               error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
                if (error == 0) {
                        sa = (struct sockaddr *)&ss;
                        want_free = FALSE;
                if (error == 0) {
                        sa = (struct sockaddr *)&ss;
                        want_free = FALSE;
@@ -827,10 +834,10 @@ sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
        if (mp->msg_name != USER_ADDR_NULL) {
                if (mp->msg_namelen > sizeof (ss)) {
                        error = getsockaddr(so, &to, mp->msg_name,
        if (mp->msg_name != USER_ADDR_NULL) {
                if (mp->msg_namelen > sizeof (ss)) {
                        error = getsockaddr(so, &to, mp->msg_name,
-                           mp->msg_namelen);
+                           mp->msg_namelen, TRUE);
                } else {
                        error = getsockaddr_s(so, &ss, mp->msg_name,
                } else {
                        error = getsockaddr_s(so, &ss, mp->msg_name,
-                           mp->msg_namelen);
+                           mp->msg_namelen, TRUE);
                        if (error == 0) {
                                to = (struct sockaddr *)&ss;
                                want_free = FALSE;
                        if (error == 0) {
                                to = (struct sockaddr *)&ss;
                                want_free = FALSE;
@@ -1840,7 +1847,7 @@ sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type)
  */
 static int
 getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
  */
 static int
 getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
-    size_t len)
+    size_t len, boolean_t translate_unspec)
 {
        struct sockaddr *sa;
        int error;
 {
        struct sockaddr *sa;
        int error;
@@ -1865,7 +1872,7 @@ getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
                 * sockets we leave it unchanged and let the lower layer
                 * handle it.
                 */
                 * sockets we leave it unchanged and let the lower layer
                 * handle it.
                 */
-               if (sa->sa_family == AF_UNSPEC &&
+               if (translate_unspec && sa->sa_family == AF_UNSPEC &&
                    INP_CHECK_SOCKAF(so, AF_INET) &&
                    len == sizeof (struct sockaddr_in))
                        sa->sa_family = AF_INET;
                    INP_CHECK_SOCKAF(so, AF_INET) &&
                    len == sizeof (struct sockaddr_in))
                        sa->sa_family = AF_INET;
@@ -1878,7 +1885,7 @@ getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
 
 static int
 getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
 
 static int
 getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
-    user_addr_t uaddr, size_t len)
+    user_addr_t uaddr, size_t len, boolean_t translate_unspec)
 {
        int error;
 
 {
        int error;
 
@@ -1902,7 +1909,7 @@ getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
                 * sockets we leave it unchanged and let the lower layer
                 * handle it.
                 */
                 * sockets we leave it unchanged and let the lower layer
                 * handle it.
                 */
-               if (ss->ss_family == AF_UNSPEC &&
+               if (translate_unspec && ss->ss_family == AF_UNSPEC &&
                    INP_CHECK_SOCKAF(so, AF_INET) &&
                    len == sizeof (struct sockaddr_in))
                        ss->ss_family = AF_INET;
                    INP_CHECK_SOCKAF(so, AF_INET) &&
                    len == sizeof (struct sockaddr_in))
                        ss->ss_family = AF_INET;
index 47690269a4562fcb87f9ad58f831080cd12d50c9..e3b16f486462417c600feacbe27bf90bab241be2 100644 (file)
@@ -310,6 +310,11 @@ dlil_write_end(void)
 static int
 proto_hash_value(u_long protocol_family)
 {
 static int
 proto_hash_value(u_long protocol_family)
 {
+       /*
+        * dlil_proto_unplumb_all() depends on the mapping between
+        * the hash bucket index and the protocol family defined
+        * here; future changes must be applied there as well.
+        */
        switch(protocol_family) {
                case PF_INET:
                        return 0;
        switch(protocol_family) {
                case PF_INET:
                        return 0;
@@ -2852,3 +2857,24 @@ dlil_if_release(
                ifnet_lock_done(ifp);
     
 }
                ifnet_lock_done(ifp);
     
 }
+
+__private_extern__ void
+dlil_proto_unplumb_all(struct ifnet *ifp)
+{
+       /*
+        * if_proto_hash[0-3] are for PF_INET, PF_INET6, PF_APPLETALK
+        * and PF_VLAN, where each bucket contains exactly one entry;
+        * PF_VLAN does not need an explicit unplumb.
+        *
+        * if_proto_hash[4] is for other protocols; we expect anything
+        * in this bucket to respond to the DETACHING event (which would
+        * have happened by now) and do the unplumb then.
+        */
+       (void) proto_unplumb(PF_INET, ifp);
+#if INET6
+       (void) proto_unplumb(PF_INET6, ifp);
+#endif /* INET6 */
+#if NETAT
+       (void) proto_unplumb(PF_APPLETALK, ifp);
+#endif /* NETAT */
+}
index 3f19f7108871ea6ec1b5926f2883da9d695a0455..6e3872b799f5700eff4745d30a87418d8f3020a1 100644 (file)
@@ -161,6 +161,7 @@ int dlil_attach_filter(ifnet_t ifp, const struct iff_filter *if_filter,
                                           interface_filter_t *filter_ref);
 void dlil_detach_filter(interface_filter_t filter);
 int dlil_detach_protocol(ifnet_t ifp, u_long protocol);
                                           interface_filter_t *filter_ref);
 void dlil_detach_filter(interface_filter_t filter);
 int dlil_detach_protocol(ifnet_t ifp, u_long protocol);
+extern void dlil_proto_unplumb_all(ifnet_t);
 
 #endif /* BSD_KERNEL_PRIVATE */
 
 
 #endif /* BSD_KERNEL_PRIVATE */
 
index 04b3cadf6087de50ae88a6fc63a0d8008520a6e4..499b4790c617e349e9d1f6471fdc7378c35fc832 100644 (file)
@@ -2048,13 +2048,14 @@ if_down_all(void)
        u_int32_t       count;
        u_int32_t       i;
 
        u_int32_t       count;
        u_int32_t       i;
 
-       if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp, &count) != 0) {
+       if (ifnet_list_get_all(IFNET_FAMILY_ANY, &ifp, &count) == 0) {
                for (i = 0; i < count; i++) {
                        if_down(ifp[i]);
                for (i = 0; i < count; i++) {
                        if_down(ifp[i]);
+                       dlil_proto_unplumb_all(ifp[i]);
                }
                ifnet_list_free(ifp);
        }
                }
                ifnet_list_free(ifp);
        }
-       
+
        return 0;
 }
 
        return 0;
 }
 
index b7b98dd00923a8eef384a94aaa1e1c5a5d1b8e1c..6215515a3d64ca695798f3afdea3dd38983c0241 100644 (file)
@@ -48,8 +48,9 @@
 
 struct socket_filter;
 
 
 struct socket_filter;
 
-#define        SFEF_DETACHUSEZERO      0x1     // Detach when use reaches zero
-#define        SFEF_UNREGISTERING      0x2     // Remove due to unregister
+#define        SFEF_DETACHUSEZERO      0x1     /* Detach when use reaches zero */
+#define        SFEF_UNREGISTERING      0x2     /* Remove due to unregister */
+#define        SFEF_DETACHXREF         0x4     /* Extra reference held for detach */
 
 struct socket_filter_entry {
        struct socket_filter_entry      *sfe_next_onsocket;
 
 struct socket_filter_entry {
        struct socket_filter_entry      *sfe_next_onsocket;
index 1878cde4618715121c256234054a0c566fa5467d..d9dfca3f3eca4e48e740005575d3252a06dab467 100644 (file)
@@ -56,6 +56,9 @@
 extern struct dlil_threading_info *dlil_lo_thread_ptr;
 extern int dlil_multithreaded_input;
 
 extern struct dlil_threading_info *dlil_lo_thread_ptr;
 extern int dlil_multithreaded_input;
 
+static errno_t
+ifnet_list_get_common(ifnet_family_t, boolean_t, ifnet_t **, u_int32_t *);
+
 /*
        Temporary work around until we have real reference counting
        
 /*
        Temporary work around until we have real reference counting
        
@@ -1084,42 +1087,55 @@ ifnet_find_by_name(
 }
 
 errno_t
 }
 
 errno_t
-ifnet_list_get(
-       ifnet_family_t family,
-       ifnet_t **list,
-       u_int32_t *count)
+ifnet_list_get(ifnet_family_t family, ifnet_t **list, u_int32_t *count)
+{
+       return (ifnet_list_get_common(family, FALSE, list, count));
+}
+
+__private_extern__ errno_t
+ifnet_list_get_all(ifnet_family_t family, ifnet_t **list, u_int32_t *count)
+{
+       return (ifnet_list_get_common(family, TRUE, list, count));
+}
+
+static errno_t
+ifnet_list_get_common(ifnet_family_t family, boolean_t get_all, ifnet_t **list,
+    u_int32_t *count)
 {
        struct ifnet *ifp;
        u_int32_t cmax = 0;
        *count = 0;
        errno_t result = 0;
 {
        struct ifnet *ifp;
        u_int32_t cmax = 0;
        *count = 0;
        errno_t result = 0;
-       
-       if (list == NULL || count == NULL) return EINVAL;
-       
+
+       if (list == NULL || count == NULL)
+               return (EINVAL);
+
        ifnet_head_lock_shared();
        ifnet_head_lock_shared();
-       TAILQ_FOREACH(ifp, &ifnet, if_link)
-       {
-               if (ifp->if_eflags & IFEF_DETACHING) continue;
-               if (family == 0 || ifp->if_family == family)
+       TAILQ_FOREACH(ifp, &ifnet, if_link) {
+               if ((ifp->if_eflags & IFEF_DETACHING) && !get_all)
+                       continue;
+               if (family == IFNET_FAMILY_ANY || ifp->if_family == family)
                        cmax++;
        }
                        cmax++;
        }
-       
+
        if (cmax == 0)
                result = ENXIO;
        if (cmax == 0)
                result = ENXIO;
-       
+
        if (result == 0) {
        if (result == 0) {
-               MALLOC(*list, ifnet_t*, sizeof(ifnet_t) * (cmax + 1), M_TEMP, M_NOWAIT);
+               MALLOC(*list, ifnet_t*, sizeof(ifnet_t) * (cmax + 1),
+                   M_TEMP, M_NOWAIT);
                if (*list == NULL)
                        result = ENOMEM;
        }
 
        if (result == 0) {
                if (*list == NULL)
                        result = ENOMEM;
        }
 
        if (result == 0) {
-               TAILQ_FOREACH(ifp, &ifnet, if_link)
-               {
-                       if (ifp->if_eflags & IFEF_DETACHING) continue;
-                       if (*count + 1 > cmax) break;
-                       if (family == 0 || ((ifnet_family_t)ifp->if_family) == family)
-                       {
+               TAILQ_FOREACH(ifp, &ifnet, if_link) {
+                       if ((ifp->if_eflags & IFEF_DETACHING) && !get_all)
+                               continue;
+                       if (*count + 1 > cmax)
+                               break;
+                       if (family == IFNET_FAMILY_ANY ||
+                           ((ifnet_family_t)ifp->if_family) == family) {
                                (*list)[*count] = (ifnet_t)ifp;
                                ifnet_reference((*list)[*count]);
                                (*count)++;
                                (*list)[*count] = (ifnet_t)ifp;
                                ifnet_reference((*list)[*count]);
                                (*count)++;
@@ -1128,23 +1144,22 @@ ifnet_list_get(
                (*list)[*count] = NULL;
        }
        ifnet_head_done();
                (*list)[*count] = NULL;
        }
        ifnet_head_done();
-       
-       return 0;
+
+       return (result);
 }
 
 void
 }
 
 void
-ifnet_list_free(
-       ifnet_t *interfaces)
+ifnet_list_free(ifnet_t *interfaces)
 {
        int i;
 {
        int i;
-       
-       if (interfaces == NULL) return;
-       
-       for (i = 0; interfaces[i]; i++)
-       {
+
+       if (interfaces == NULL)
+               return;
+
+       for (i = 0; interfaces[i]; i++) {
                ifnet_release(interfaces[i]);
        }
                ifnet_release(interfaces[i]);
        }
-       
+
        FREE(interfaces, M_TEMP);
 }
 
        FREE(interfaces, M_TEMP);
 }
 
index 8a0cd2b7c5151479ac8391762d4b6d59f40e485e..dd3101b4ad3481e94c962ea9798379742309f6da 100644 (file)
@@ -1505,6 +1505,25 @@ errno_t ifnet_find_by_name(const char *ifname, ifnet_t *interface);
  */
 errno_t ifnet_list_get(ifnet_family_t family, ifnet_t **interfaces, u_int32_t *count);
 
  */
 errno_t ifnet_list_get(ifnet_family_t family, ifnet_t **interfaces, u_int32_t *count);
 
+#ifdef KERNEL_PRIVATE
+/*!
+       @function ifnet_list_get_all
+       @discussion Get a list of attached interfaces. List will be set to
+               point to an array allocated by ifnet_list_get. The interfaces
+               are refcounted and the counts will be incremented before the
+               function returns. The list of interfaces must be freed using
+               ifnet_list_free.  This is similar to ifnet_list_get, except
+               that it includes interfaces that are detaching.
+       @param family The interface family (i.e. IFNET_FAMILY_ETHERNET). To
+               find interfaces of all families, use IFNET_FAMILY_ANY.
+       @param interfaces A pointer to an array of interface references.
+       @param count A pointer that will be filled in with the number of
+               matching interfaces in the array.
+       @result 0 on success otherwise the errno error.
+ */
+errno_t ifnet_list_get_all(ifnet_family_t family, ifnet_t **interfaces, u_int32_t *count);
+#endif /* KERNEL_PRIVATE */
+
 /*!
        @function ifnet_list_free
        @discussion Free a list of interfaces returned by ifnet_list_get.
 /*!
        @function ifnet_list_free
        @discussion Free a list of interfaces returned by ifnet_list_get.
index 8b3614a49a365da5fea3585bc190266a31381e3f..9b63ec84032abc2051c5d2963f8ca28eb7a3a0bb 100644 (file)
@@ -266,6 +266,7 @@ proto_input_run(void)
                                        }
                                }
                                if (locked) {
                                        }
                                }
                                if (locked) {
+                                       locked = 0;
                                        lck_mtx_unlock(entry->domain->dom_mtx);
                                }       
                }
                                        lck_mtx_unlock(entry->domain->dom_mtx);
                                }       
                }
index ffd62033f8be663e111509e365154bbe9d6f8509..e00ce3eaa10976ab53cd3922a2438c7cfd8c9ec8 100644 (file)
@@ -382,7 +382,7 @@ rtfree_locked(struct rtentry *rt)
         * close routine typically issues RTM_DELETE which clears the RTF_UP
         * flag on the entry so that the code below reclaims the storage.
         */
         * close routine typically issues RTM_DELETE which clears the RTF_UP
         * flag on the entry so that the code below reclaims the storage.
         */
-       if (rnh->rnh_close && rt->rt_refcnt == 0)
+       if (rnh && rnh->rnh_close && rt->rt_refcnt == 0)
                rnh->rnh_close((struct radix_node *)rt, rnh);
 
        /*
                rnh->rnh_close((struct radix_node *)rt, rnh);
 
        /*
index 961549d608bbb7e942c937bfd4f73d45d75c5079..1889c7125d3608d3091061c33734a908ab8f45f2 100644 (file)
@@ -110,7 +110,7 @@ static MALLOC_DEFINE(M_IGMP, "igmp", "igmp state");
 #endif
 
 static struct router_info *
 #endif
 
 static struct router_info *
-               find_rti(struct ifnet *ifp);
+               find_rti(struct ifnet *ifp, int wait);
 
 static struct igmpstat igmpstat;
 
 
 static struct igmpstat igmpstat;
 
@@ -155,7 +155,7 @@ igmp_init(void)
 
 static struct router_info *
 find_rti(
 
 static struct router_info *
 find_rti(
-       struct ifnet *ifp)
+       struct ifnet *ifp, int wait)
 {
        struct router_info *rti = Head;
        
 {
        struct router_info *rti = Head;
        
@@ -173,7 +173,7 @@ find_rti(
                rti = rti->rti_next;
        }
        
                rti = rti->rti_next;
        }
        
-       MALLOC(rti, struct router_info *, sizeof *rti, M_IGMP, M_NOWAIT);
+       MALLOC(rti, struct router_info *, sizeof *rti, M_IGMP, wait);
        if (rti != NULL)
        {
                rti->rti_ifp = ifp;
        if (rti != NULL)
        {
                rti->rti_ifp = ifp;
@@ -243,7 +243,7 @@ igmp_input(
        timer = igmp->igmp_code * PR_FASTHZ / IGMP_TIMER_SCALE;
        if (timer == 0)
                timer = 1;
        timer = igmp->igmp_code * PR_FASTHZ / IGMP_TIMER_SCALE;
        if (timer == 0)
                timer = 1;
-       rti = find_rti(ifp);
+       rti = find_rti(ifp, M_NOWAIT);
        if (rti == NULL) {
                m_freem(m);
                return;
        if (rti == NULL) {
                m_freem(m);
                return;
@@ -398,7 +398,7 @@ igmp_joingroup(struct in_multi *inm)
                inm->inm_timer = 0;
                inm->inm_state = IGMP_OTHERMEMBER;
        } else {
                inm->inm_timer = 0;
                inm->inm_state = IGMP_OTHERMEMBER;
        } else {
-               inm->inm_rti = find_rti(inm->inm_ifp);
+               inm->inm_rti = find_rti(inm->inm_ifp, M_WAITOK);
                if (inm->inm_rti == NULL) return ENOMEM;
                igmp_sendpkt(inm, inm->inm_rti->rti_type, 0);
                inm->inm_timer = IGMP_RANDOM_DELAY(
                if (inm->inm_rti == NULL) return ENOMEM;
                igmp_sendpkt(inm, inm->inm_rti->rti_type, 0);
                inm->inm_timer = IGMP_RANDOM_DELAY(
@@ -438,7 +438,7 @@ igmp_fasttimo(void)
        while (inm != NULL) {
                if (inm->inm_timer == 0) {
                        /* do nothing */
        while (inm != NULL) {
                if (inm->inm_timer == 0) {
                        /* do nothing */
-               } else if (--inm->inm_timer == 0) {
+               } else if ((--inm->inm_timer == 0) && (inm->inm_rti != NULL)) {
                        igmp_sendpkt(inm, inm->inm_rti->rti_type, 0);
                        inm->inm_state = IGMP_IREPORTEDLAST;
                } else {
                        igmp_sendpkt(inm, inm->inm_rti->rti_type, 0);
                        inm->inm_state = IGMP_IREPORTEDLAST;
                } else {
index 8ed004c8244c780c6aba7ab139556f122b6089b0..7f23a9e6a6168eb1020c76fa158aa36f6cbd5a32 100644 (file)
@@ -440,7 +440,7 @@ struct ip_opts {
 
 #define        IP_TRAFFIC_MGT_BACKGROUND       65   /* int*; get background IO flags; set background IO */
 
 
 #define        IP_TRAFFIC_MGT_BACKGROUND       65   /* int*; get background IO flags; set background IO */
 
-#if CONFIG_FORCE_OUT_IFP
+#ifdef PRIVATE
 /* This is a hack, this is only a hack. */
 #define        IP_FORCE_OUT_IFP        69      /* char ifname[] - send traffic on this interface */
 #endif
 /* This is a hack, this is only a hack. */
 #define        IP_FORCE_OUT_IFP        69      /* char ifname[] - send traffic on this interface */
 #endif
index cd1514ffd7769ef76599b2ed2f526bad8b64fd76..1e36b65a9f70a4006a7527641036faecd9fe5015 100644 (file)
@@ -432,6 +432,7 @@ struct _ipfw_dyn_rule {
  * Main firewall chains definitions and global var's definitions.
  */
 #ifdef KERNEL
  * Main firewall chains definitions and global var's definitions.
  */
 #ifdef KERNEL
+#if IPFIREWALL
 
 #define        IP_FW_PORT_DYNT_FLAG    0x10000
 #define        IP_FW_PORT_TEE_FLAG     0x20000
 
 #define        IP_FW_PORT_DYNT_FLAG    0x10000
 #define        IP_FW_PORT_TEE_FLAG     0x20000
@@ -457,6 +458,7 @@ struct ip_fw_args {
        u_int16_t       divert_rule;    /* divert cookie                */
        u_int32_t       retval;
 };
        u_int16_t       divert_rule;    /* divert cookie                */
        u_int32_t       retval;
 };
+//struct ip_fw_args;
 
 /*
  * Function definitions.
 
 /*
  * Function definitions.
@@ -476,6 +478,7 @@ extern ip_fw_ctl_t *ip_fw_ctl_ptr;
 extern int fw_one_pass;
 extern int fw_enable;
 #define        IPFW_LOADED     (ip_fw_chk_ptr != NULL)
 extern int fw_one_pass;
 extern int fw_enable;
 #define        IPFW_LOADED     (ip_fw_chk_ptr != NULL)
+#endif /* IPFIREWALL */
 #endif /* KERNEL */
 
 #endif /* !__LP64__ */
 #endif /* KERNEL */
 
 #endif /* !__LP64__ */
index 225164fd6bbb60558abce9a3630f9e7c67ef76b9..8743d9178d9f31d484c35d3ae619d1c513c5c406 100644 (file)
@@ -258,6 +258,7 @@ SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
 
 
 /* Firewall hooks */
 
 
 /* Firewall hooks */
+#if IPFIREWALL
 ip_fw_chk_t *ip_fw_chk_ptr;
 int fw_enable = 1;
 int fw_bypass = 1;
 ip_fw_chk_t *ip_fw_chk_ptr;
 int fw_enable = 1;
 int fw_bypass = 1;
@@ -268,6 +269,7 @@ ip_dn_io_t *ip_dn_io_ptr;
 #endif
 
 int (*fr_checkp)(struct ip *, int, struct ifnet *, int, struct mbuf **) = NULL;
 #endif
 
 int (*fr_checkp)(struct ip *, int, struct ifnet *, int, struct mbuf **) = NULL;
+#endif /* IPFIREWALL */
 
 SYSCTL_NODE(_net_inet_ip, OID_AUTO, linklocal, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "link local");
 
 
 SYSCTL_NODE(_net_inet_ip, OID_AUTO, linklocal, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "link local");
 
@@ -531,7 +533,9 @@ ip_input(struct mbuf *m)
        u_short sum;
        struct in_addr pkt_dst;
        u_int32_t div_info = 0;         /* packet divert/tee info */
        u_short sum;
        struct in_addr pkt_dst;
        u_int32_t div_info = 0;         /* packet divert/tee info */
+#if IPFIREWALL
        struct ip_fw_args args;
        struct ip_fw_args args;
+#endif
        ipfilter_t inject_filter_ref = 0;
        struct m_tag    *tag;
        struct route    ipforward_rt;
        ipfilter_t inject_filter_ref = 0;
        struct m_tag    *tag;
        struct route    ipforward_rt;
@@ -557,6 +561,7 @@ ip_input(struct mbuf *m)
        }
 #endif /* DUMMYNET */
 
        }
 #endif /* DUMMYNET */
 
+#if IPDIVERT
        if ((tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DIVERT, NULL)) != NULL) {
                struct divert_tag       *div_tag;
                
        if ((tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DIVERT, NULL)) != NULL) {
                struct divert_tag       *div_tag;
                
@@ -565,6 +570,8 @@ ip_input(struct mbuf *m)
 
                m_tag_delete(m, tag);
        }
 
                m_tag_delete(m, tag);
        }
+#endif
+
        if ((tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) {
                struct ip_fwd_tag       *ipfwd_tag;
                
        if ((tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) {
                struct ip_fwd_tag       *ipfwd_tag;
                
@@ -815,7 +822,11 @@ pass:
         * to be sent and the original packet to be freed).
         */
        ip_nhops = 0;           /* for source routed packets */
         * to be sent and the original packet to be freed).
         */
        ip_nhops = 0;           /* for source routed packets */
+#if IPFIREWALL
        if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, args.next_hop, &ipforward_rt)) {
        if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, args.next_hop, &ipforward_rt)) {
+#else
+       if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, NULL, &ipforward_rt)) {
+#endif
                return;
        }
 
                return;
        }
 
@@ -842,8 +853,12 @@ pass:
         * Cache the destination address of the packet; this may be
         * changed by use of 'ipfw fwd'.
         */
         * Cache the destination address of the packet; this may be
         * changed by use of 'ipfw fwd'.
         */
+#if IPFIREWALL
        pkt_dst = args.next_hop == NULL ?
            ip->ip_dst : args.next_hop->sin_addr;
        pkt_dst = args.next_hop == NULL ?
            ip->ip_dst : args.next_hop->sin_addr;
+#else
+       pkt_dst = ip->ip_dst;
+#endif
 
        /*
         * Enable a consistency check between the destination address
 
        /*
         * Enable a consistency check between the destination address
@@ -860,8 +875,12 @@ pass:
         * the packets are received.
         */
        checkif = ip_checkinterface && (ipforwarding == 0) && 
         * the packets are received.
         */
        checkif = ip_checkinterface && (ipforwarding == 0) && 
-           ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) &&
-           (args.next_hop == NULL);
+           ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0)
+#if IPFIREWALL
+           && (args.next_hop == NULL);
+#else
+               ;
+#endif
 
        lck_mtx_lock(rt_mtx);
        TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
 
        lck_mtx_lock(rt_mtx);
        TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
@@ -989,7 +1008,11 @@ pass:
                OSAddAtomic(1, (SInt32*)&ipstat.ips_cantforward);
                m_freem(m);
        } else {
                OSAddAtomic(1, (SInt32*)&ipstat.ips_cantforward);
                m_freem(m);
        } else {
+#if IPFIREWALL
                ip_forward(m, 0, args.next_hop, &ipforward_rt);
                ip_forward(m, 0, args.next_hop, &ipforward_rt);
+#else
+               ip_forward(m, 0, NULL, &ipforward_rt);
+#endif
                if (ipforward_rt.ro_rt != NULL) {
                        rtfree(ipforward_rt.ro_rt);
                        ipforward_rt.ro_rt = NULL;
                if (ipforward_rt.ro_rt != NULL) {
                        rtfree(ipforward_rt.ro_rt);
                        ipforward_rt.ro_rt = NULL;
@@ -1184,6 +1207,7 @@ found:
         */
        OSAddAtomic(1, (SInt32*)&ipstat.ips_delivered);
        {
         */
        OSAddAtomic(1, (SInt32*)&ipstat.ips_delivered);
        {
+#if IPFIREWALL
                if (args.next_hop && ip->ip_p == IPPROTO_TCP) {
                        /* TCP needs IPFORWARD info if available */
                        struct m_tag *fwd_tag;
                if (args.next_hop && ip->ip_p == IPPROTO_TCP) {
                        /* TCP needs IPFORWARD info if available */
                        struct m_tag *fwd_tag;
@@ -1212,6 +1236,9 @@ found:
                
                        ip_proto_dispatch_in(m, hlen, ip->ip_p, 0);
                }
                
                        ip_proto_dispatch_in(m, hlen, ip->ip_p, 0);
                }
+#else
+               ip_proto_dispatch_in(m, hlen, ip->ip_p, 0);
+#endif
                
                return;
        }
                
                return;
        }
index c065797e73441f6bc823ffbbb070fb48abb4a16e..db39fe174d3ca7277baad8bd55a0b98a89bb8c62 100644 (file)
@@ -249,7 +249,9 @@ ip_output_list(
 #if IPFIREWALL_FORWARD
        int fwd_rewrite_src = 0;
 #endif
 #if IPFIREWALL_FORWARD
        int fwd_rewrite_src = 0;
 #endif
+#if IPFIREWALL
        struct ip_fw_args args;
        struct ip_fw_args args;
+#endif
        int didfilter = 0;
        ipfilter_t inject_filter_ref = 0;
        struct m_tag    *tag;
        int didfilter = 0;
        ipfilter_t inject_filter_ref = 0;
        struct m_tag    *tag;
@@ -261,8 +263,8 @@ ip_output_list(
        KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
 
        packetlist = m0;
        KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
 
        packetlist = m0;
-       args.next_hop = NULL;
 #if IPFIREWALL
 #if IPFIREWALL
+       args.next_hop = NULL;
        args.eh = NULL;
        args.rule = NULL;
        args.divert_rule = 0;                   /* divert cookie */
        args.eh = NULL;
        args.rule = NULL;
        args.divert_rule = 0;                   /* divert cookie */
@@ -297,7 +299,6 @@ ip_output_list(
                m_tag_delete(m0, tag);
        }
 #endif /* IPDIVERT */
                m_tag_delete(m0, tag);
        }
 #endif /* IPDIVERT */
-#endif /* IPFIREWALL */
 
        if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) {
                struct ip_fwd_tag       *ipfwd_tag;
 
        if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) {
                struct ip_fwd_tag       *ipfwd_tag;
@@ -307,6 +308,7 @@ ip_output_list(
                
                m_tag_delete(m0, tag);
        }
                
                m_tag_delete(m0, tag);
        }
+#endif /* IPFIREWALL */
 
        m = m0;
        
 
        m = m0;
        
@@ -356,7 +358,11 @@ loopit:
                hlen = len;
        }
        ip = mtod(m, struct ip *);
                hlen = len;
        }
        ip = mtod(m, struct ip *);
+#if IPFIREWALL
        pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
        pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
+#else
+       pkt_dst = ip->ip_dst;
+#endif
 
        /*
         * Fill in IP header.
 
        /*
         * Fill in IP header.
index df763aac54358bdc804ad3c13c30289d95b88e5a..e30687513e2f8ecd23b1ba5dc1f47024e94a8385 100644 (file)
@@ -125,10 +125,12 @@ struct    inpcbhead ripcb;
 struct inpcbinfo ripcbinfo;
 
 /* control hooks for ipfw and dummynet */
 struct inpcbinfo ripcbinfo;
 
 /* control hooks for ipfw and dummynet */
+#if IPFIREWALL
 ip_fw_ctl_t *ip_fw_ctl_ptr;
 #if DUMMYNET
 ip_dn_ctl_t *ip_dn_ctl_ptr;
 #endif /* DUMMYNET */
 ip_fw_ctl_t *ip_fw_ctl_ptr;
 #if DUMMYNET
 ip_dn_ctl_t *ip_dn_ctl_ptr;
 #endif /* DUMMYNET */
+#endif /* IPFIREWALL */
 
 /*
  * Nominal space allocated to a raw ip socket.
 
 /*
  * Nominal space allocated to a raw ip socket.
index 39a5fc252cce1ceebf3cc90487a04efadf3d48f8..302ab9431bb26343bbcb76f8af1466c45529c2ef 100644 (file)
@@ -1462,13 +1462,6 @@ findpcb:
                                 * Grow the congestion window, if the
                                 * connection is cwnd bound.
                                 */
                                 * Grow the congestion window, if the
                                 * connection is cwnd bound.
                                 */
-                               if (tp->snd_cwnd < tp->snd_wnd) {
-                                       tp->t_bytes_acked += acked;
-                                       if (tp->t_bytes_acked > tp->snd_cwnd) {
-                                               tp->t_bytes_acked -= tp->snd_cwnd;
-                                               tp->snd_cwnd += tp->t_maxseg;
-                                       }
-                               }
                                sbdrop(&so->so_snd, acked);
                                if (SEQ_GT(tp->snd_una, tp->snd_recover) &&
                                    SEQ_LEQ(th->th_ack, tp->snd_recover))
                                sbdrop(&so->so_snd, acked);
                                if (SEQ_GT(tp->snd_una, tp->snd_recover) &&
                                    SEQ_LEQ(th->th_ack, tp->snd_recover))
@@ -1794,7 +1787,6 @@ findpcb:
                                tp->ecn_flags &= ~TE_SENDIPECT;
                        }
                        
                                tp->ecn_flags &= ~TE_SENDIPECT;
                        }
                        
-                       soisconnected(so);
 #if CONFIG_MACF_NET && CONFIG_MACF_SOCKET
                        /* XXXMAC: recursive lock: SOCK_LOCK(so); */
                        mac_socketpeer_label_associate_mbuf(m, so);
 #if CONFIG_MACF_NET && CONFIG_MACF_SOCKET
                        /* XXXMAC: recursive lock: SOCK_LOCK(so); */
                        mac_socketpeer_label_associate_mbuf(m, so);
@@ -1835,6 +1827,10 @@ findpcb:
                                tp->t_state = TCPS_ESTABLISHED;
                                tp->t_timer[TCPT_KEEP] = TCP_KEEPIDLE(tp);
                        }
                                tp->t_state = TCPS_ESTABLISHED;
                                tp->t_timer[TCPT_KEEP] = TCP_KEEPIDLE(tp);
                        }
+                       /* soisconnected may lead to socket_unlock in case of upcalls,
+                        * make sure this is done when everything is setup.
+                        */
+                       soisconnected(so);
                } else {
                /*
                 *  Received initial SYN in SYN-SENT[*] state => simul-
                } else {
                /*
                 *  Received initial SYN in SYN-SENT[*] state => simul-
@@ -2223,7 +2219,6 @@ trimthenstep6:
        case TCPS_SYN_RECEIVED:
 
                tcpstat.tcps_connects++;
        case TCPS_SYN_RECEIVED:
 
                tcpstat.tcps_connects++;
-               soisconnected(so);
 
                /* Do window scaling? */
                if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
 
                /* Do window scaling? */
                if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
@@ -2252,8 +2247,14 @@ trimthenstep6:
                        (void) tcp_reass(tp, (struct tcphdr *)0, &tlen,
                            (struct mbuf *)0);
                tp->snd_wl1 = th->th_seq - 1;
                        (void) tcp_reass(tp, (struct tcphdr *)0, &tlen,
                            (struct mbuf *)0);
                tp->snd_wl1 = th->th_seq - 1;
+
                /* FALLTHROUGH */
 
                /* FALLTHROUGH */
 
+               /* soisconnected may lead to socket_unlock in case of upcalls,
+                * make sure this is done when everything is setup.
+                */
+               soisconnected(so);
+
        /*
         * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
         * ACKs.  If the ack is in the range
        /*
         * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
         * ACKs.  If the ack is in the range
@@ -2542,30 +2543,45 @@ process_ACK:
                        register u_int cw = tp->snd_cwnd;
                        register u_int incr = tp->t_maxseg;
 
                        register u_int cw = tp->snd_cwnd;
                        register u_int incr = tp->t_maxseg;
 
-                       if (cw >= tp->snd_ssthresh) {
-                               tp->t_bytes_acked += acked;
-                               if (tp->t_bytes_acked >= cw) {
+                       if ((acked > incr) && tcp_do_rfc3465) {
+                               if (cw >= tp->snd_ssthresh) {
+                                       tp->t_bytes_acked += acked;
+                                       if (tp->t_bytes_acked >= cw) {
                                        /* Time to increase the window. */
                                        /* Time to increase the window. */
-                                       tp->t_bytes_acked -= cw;
-                               } else {
+                                               tp->t_bytes_acked -= cw;
+                                       } else {
                                        /* No need to increase yet. */
                                        /* No need to increase yet. */
-                                       incr = 0;
+                                               incr = 0;
+                                       }
+                               } else {
+                                       /*
+                                        * If the user explicitly enables RFC3465
+                                        * use 2*SMSS for the "L" param.  Otherwise
+                                        * use the more conservative 1*SMSS.
+                                        *
+                                        * (See RFC 3465 2.3 Choosing the Limit)
+                                        */
+                                       u_int abc_lim;
+
+                                       abc_lim = (tcp_do_rfc3465 == 0) ?
+                                           incr : incr * 2;
+                                       incr = lmin(acked, abc_lim);
                                }
                                }
-                       } else {
+                       }
+                       else {
                                /*
                                /*
-                                * If the user explicitly enables RFC3465
-                                * use 2*SMSS for the "L" param.  Otherwise
-                                * use the more conservative 1*SMSS.
-                                *
-                                * (See RFC 3465 2.3 Choosing the Limit)
+                                * If the window gives us less than ssthresh packets
+                                * in flight, open exponentially (segsz per packet).
+                                * Otherwise open linearly: segsz per window
+                                * (segsz^2 / cwnd per packet).
                                 */
                                 */
-                               u_int abc_lim;
-
-                               abc_lim = (tcp_do_rfc3465 == 0) ?
-                                   incr : incr * 2;
-                               incr = min(acked, abc_lim);
+               
+                                       if (cw >= tp->snd_ssthresh) {
+                                               incr = incr * incr / cw;
+                                       }
                        }
 
                        }
 
+
                        tp->snd_cwnd = min(cw+incr, TCP_MAXWIN<<tp->snd_scale);
                }
                if (acked > so->so_snd.sb_cc) {
                        tp->snd_cwnd = min(cw+incr, TCP_MAXWIN<<tp->snd_scale);
                }
                if (acked > so->so_snd.sb_cc) {
@@ -2577,7 +2593,6 @@ process_ACK:
                        tp->snd_wnd -= acked;
                        ourfinisacked = 0;
                }
                        tp->snd_wnd -= acked;
                        ourfinisacked = 0;
                }
-               sowwakeup(so);
                /* detect una wraparound */
                if ((tcp_do_newreno || tp->sack_enable) &&
                    !IN_FASTRECOVERY(tp) &&
                /* detect una wraparound */
                if ((tcp_do_newreno || tp->sack_enable) &&
                    !IN_FASTRECOVERY(tp) &&
@@ -2595,6 +2610,12 @@ process_ACK:
                }
                if (SEQ_LT(tp->snd_nxt, tp->snd_una))
                        tp->snd_nxt = tp->snd_una;
                }
                if (SEQ_LT(tp->snd_nxt, tp->snd_una))
                        tp->snd_nxt = tp->snd_una;
+                       
+               /*
+                * sowwakeup must happen after snd_una, et al. are updated so that
+                * the sequence numbers are in sync with so_snd
+                */
+               sowwakeup(so);
 
                switch (tp->t_state) {
 
 
                switch (tp->t_state) {
 
@@ -2613,9 +2634,9 @@ process_ACK:
                                 * we'll hang forever.
                                 */
                                if (so->so_state & SS_CANTRCVMORE) {
                                 * we'll hang forever.
                                 */
                                if (so->so_state & SS_CANTRCVMORE) {
-                                       soisdisconnected(so);
                                        tp->t_timer[TCPT_2MSL] = tcp_maxidle;
                                        add_to_time_wait(tp);
                                        tp->t_timer[TCPT_2MSL] = tcp_maxidle;
                                        add_to_time_wait(tp);
+                                       soisdisconnected(so);
                                }
                                tp->t_state = TCPS_FIN_WAIT_2;
                                goto drop;
                                }
                                tp->t_state = TCPS_FIN_WAIT_2;
                                goto drop;
index 250d4a2d66ef6c9582fef571065d6718a3cdf53c..db82d4d90a5517ce13db2c728c088b6d7619ea79 100644 (file)
@@ -167,8 +167,10 @@ extern int ipsec_bypass;
 
 extern int slowlink_wsize;     /* window correction for slow links */
 extern u_long  route_generation;
 
 extern int slowlink_wsize;     /* window correction for slow links */
 extern u_long  route_generation;
+#if IPFIREWALL
 extern int fw_enable;          /* firewall check for packet chaining */
 extern int fw_bypass;          /* firewall check: disable packet chaining if there is rules */
 extern int fw_enable;          /* firewall check for packet chaining */
 extern int fw_bypass;          /* firewall check: disable packet chaining if there is rules */
+#endif /* IPFIREWALL */
 
 extern vm_size_t       so_cache_zone_element_size;
 
 
 extern vm_size_t       so_cache_zone_element_size;
 
@@ -677,10 +679,19 @@ after_sack_rexmit:
                long adv = lmin(recwin, (long)TCP_MAXWIN << tp->rcv_scale) -
                        (tp->rcv_adv - tp->rcv_nxt);
 
                long adv = lmin(recwin, (long)TCP_MAXWIN << tp->rcv_scale) -
                        (tp->rcv_adv - tp->rcv_nxt);
 
-               if (adv >= (long) (2 * tp->t_maxseg))
-                       goto send;
-               if (2 * adv >= (long) so->so_rcv.sb_hiwat)
-                       goto send;
+               if (adv >= (long) (2 * tp->t_maxseg)) {
+                       
+                       /* 
+                        * Update only if the resulting scaled value of the window changed, or
+                        * if there is a change in the sequence since the last ack.
+                        * This avoids what appears as dupe ACKS (see rdar://5640997)
+                        */
+
+                       if ((tp->last_ack_sent != tp->rcv_nxt) || (((recwin + adv) >> tp->rcv_scale) > recwin)) 
+                               goto send;
+               }
+               if (2 * adv >= (long) so->so_rcv.sb_hiwat) 
+                               goto send;
        }
 
        /*
        }
 
        /*
@@ -1239,6 +1250,8 @@ send:
                tp->sackhint.sack_bytes_rexmit += len;
        }
        th->th_ack = htonl(tp->rcv_nxt);
                tp->sackhint.sack_bytes_rexmit += len;
        }
        th->th_ack = htonl(tp->rcv_nxt);
+       tp->last_ack_sent = tp->rcv_nxt;
+
        if (optlen) {
                bcopy(opt, th + 1, optlen);
                th->th_off = (sizeof (struct tcphdr) + optlen) >> 2;
        if (optlen) {
                bcopy(opt, th + 1, optlen);
                th->th_off = (sizeof (struct tcphdr) + optlen) >> 2;
@@ -1623,6 +1636,11 @@ tcp_ip_output(struct socket *so, struct tcpcb *tp, struct mbuf *pkt,
        boolean_t chain;
        boolean_t unlocked = FALSE;
 
        boolean_t chain;
        boolean_t unlocked = FALSE;
 
+       /* Make sure ACK/DELACK conditions are cleared before
+        * we unlock the socket.
+        */
+
+       tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
        /*
         * If allowed, unlock TCP socket while in IP 
         * but only if the connection is established and
        /*
         * If allowed, unlock TCP socket while in IP 
         * but only if the connection is established and
@@ -1642,11 +1660,15 @@ tcp_ip_output(struct socket *so, struct tcpcb *tp, struct mbuf *pkt,
         * - there is a non default rule set for the firewall
         */
 
         * - there is a non default rule set for the firewall
         */
 
-       chain = tcp_packet_chaining > 1 &&
+       chain = tcp_packet_chaining > 1
 #if IPSEC
 #if IPSEC
-               ipsec_bypass &&
+               && ipsec_bypass
+#endif
+#if IPFIREWALL
+               && (fw_enable == 0 || fw_bypass)
 #endif
 #endif
-               (fw_enable == 0 || fw_bypass);
+               ; // I'm important, not extraneous
+
 
        while (pkt != NULL) {
                struct mbuf *npkt = pkt->m_nextpkt;
 
        while (pkt != NULL) {
                struct mbuf *npkt = pkt->m_nextpkt;
index 739e2816bcb1473715ed093075847c90d53d851a..833caaf4cdbe179d937067052c5735ba4ea42c15 100644 (file)
@@ -358,7 +358,7 @@ static int bg_cnt = 0;
 void
 tcp_slowtimo()
 {
 void
 tcp_slowtimo()
 {
-       struct inpcb *inp;
+       struct inpcb *inp, *nxt;
        struct tcpcb *tp;
        struct socket *so;
        int i;
        struct tcpcb *tp;
        struct socket *so;
        int i;
@@ -537,12 +537,12 @@ twunlock:
        }
 
 
        }
 
 
-       LIST_FOREACH(inp, &tcb, inp_list) {
+       LIST_FOREACH_SAFE(inp, &tcb, inp_list, nxt) {
                tcp_garbage_collect(inp, 0);
        }
 
        /* Now cleanup the time wait ones */
                tcp_garbage_collect(inp, 0);
        }
 
        /* Now cleanup the time wait ones */
-       LIST_FOREACH(inp, &time_wait_slots[cur_tw_slot], inp_list) {
+       LIST_FOREACH_SAFE(inp, &time_wait_slots[cur_tw_slot], inp_list, nxt) {
                tcp_garbage_collect(inp, 1);
        }
 
                tcp_garbage_collect(inp, 1);
        }
 
index 1bee938d448a6a40ff83e8c6d0c11ee361b32b8f..f0c838d588a09a52a7009331ab8531f83b23ad96 100644 (file)
@@ -104,7 +104,7 @@ int nd6_debug = 0;
 static int nd6_inuse, nd6_allocated;
 
 struct llinfo_nd6 llinfo_nd6 = {&llinfo_nd6, &llinfo_nd6, NULL, NULL, 0, 0, 0, 0, 0 };
 static int nd6_inuse, nd6_allocated;
 
 struct llinfo_nd6 llinfo_nd6 = {&llinfo_nd6, &llinfo_nd6, NULL, NULL, 0, 0, 0, 0, 0 };
-size_t nd_ifinfo_indexlim = 8;
+size_t nd_ifinfo_indexlim = 32; /* increased for 5589193 */
 struct nd_ifinfo *nd_ifinfo = NULL;
 struct nd_drhead nd_defrouter;
 struct nd_prhead nd_prefix = { 0 };
 struct nd_ifinfo *nd_ifinfo = NULL;
 struct nd_drhead nd_defrouter;
 struct nd_prhead nd_prefix = { 0 };
@@ -166,7 +166,13 @@ nd6_ifattach(
                bzero(q, n);
                if (nd_ifinfo) {
                        bcopy((caddr_t)nd_ifinfo, q, n/2);
                bzero(q, n);
                if (nd_ifinfo) {
                        bcopy((caddr_t)nd_ifinfo, q, n/2);
+                       /* Radar 5589193:
+                        * SU fix purposely leaks the old nd_ifinfo array
+                        * if we grow the arraw to more than 32 interfaces
+                        * Fix for future release is to use proper locking.
+
                        FREE((caddr_t)nd_ifinfo, M_IP6NDP);
                        FREE((caddr_t)nd_ifinfo, M_IP6NDP);
+                       */
                }
                nd_ifinfo = (struct nd_ifinfo *)q;
        }
                }
                nd_ifinfo = (struct nd_ifinfo *)q;
        }
index fdb3ae1c5a9b2fb9319618206376008bc6adb138..48694c9ee71ff7ab503a8f56463455473d0dcd4f 100644 (file)
@@ -318,6 +318,8 @@ nfs_connect(struct nfsmount *nmp)
                lck_mtx_unlock(&nmp->nm_lock);
                goto bad;
        }
                lck_mtx_unlock(&nmp->nm_lock);
                goto bad;
        }
+       /* just playin' it safe */
+       sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
 
        if (!(nmp->nm_flag & NFSMNT_INT))
                sock_nointerrupt(so, 1);
 
        if (!(nmp->nm_flag & NFSMNT_INT))
                sock_nointerrupt(so, 1);
index 1a4f5bb0e926033dcb5373dcc80dbb338966646a..9d4d4f3097ebb1f24898e19d587dd55c61f0d3af 100644 (file)
@@ -781,6 +781,8 @@ nfssvc_addsock(socket_t so, mbuf_t mynam)
        so->so_upcall = nfsrv_rcv;
        so->so_rcv.sb_flags |= SB_UPCALL;
        socket_unlock(so, 1);
        so->so_upcall = nfsrv_rcv;
        so->so_rcv.sb_flags |= SB_UPCALL;
        socket_unlock(so, 1);
+       /* just playin' it safe */
+       sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
 
        /* mark that the socket is not in the nfsrv_sockwg list */
        slp->ns_wgq.tqe_next = SLPNOLIST;
 
        /* mark that the socket is not in the nfsrv_sockwg list */
        slp->ns_wgq.tqe_next = SLPNOLIST;
index 3d6d36b6599fc154a09062aaa0ac7492a55864aa..2daeebc9cd80ab68afb7d413b94273db61da23b1 100644 (file)
@@ -513,11 +513,13 @@ int nfsm_chain_trim_data(struct nfsm_chain *, int, int *);
 /* get a pointer to the next consecutive bytes in an mbuf chain */
 #define nfsm_chain_get_opaque_pointer(E, NMC, LEN, PTR) \
        do { \
 /* get a pointer to the next consecutive bytes in an mbuf chain */
 #define nfsm_chain_get_opaque_pointer(E, NMC, LEN, PTR) \
        do { \
+               uint32_t rndlen; \
                if (E) break; \
                if (E) break; \
-               if ((NMC)->nmc_left >= (uint32_t)(LEN)) { \
+               rndlen = nfsm_rndup(LEN); \
+               if ((NMC)->nmc_left >= rndlen) { \
                        (PTR) = (void*)(NMC)->nmc_ptr; \
                        (PTR) = (void*)(NMC)->nmc_ptr; \
-                       (NMC)->nmc_left -= nfsm_rndup(LEN); \
-                       (NMC)->nmc_ptr += nfsm_rndup(LEN); \
+                       (NMC)->nmc_left -= rndlen; \
+                       (NMC)->nmc_ptr += rndlen; \
                } else { \
                        (E) = nfsm_chain_get_opaque_pointer_f((NMC), (LEN), (u_char**)&(PTR)); \
                } \
                } else { \
                        (E) = nfsm_chain_get_opaque_pointer_f((NMC), (LEN), (u_char**)&(PTR)); \
                } \
@@ -526,11 +528,13 @@ int nfsm_chain_trim_data(struct nfsm_chain *, int, int *);
 /* copy the next consecutive bytes of opaque data from an mbuf chain */
 #define nfsm_chain_get_opaque(E, NMC, LEN, PTR) \
        do { \
 /* copy the next consecutive bytes of opaque data from an mbuf chain */
 #define nfsm_chain_get_opaque(E, NMC, LEN, PTR) \
        do { \
+               uint32_t rndlen; \
                if (E) break; \
                if (E) break; \
-               if ((NMC)->nmc_left >= (LEN)) { \
+               rndlen = nfsm_rndup(LEN); \
+               if ((NMC)->nmc_left >= rndlen) { \
                        u_char *__tmpptr = (u_char*)(NMC)->nmc_ptr; \
                        u_char *__tmpptr = (u_char*)(NMC)->nmc_ptr; \
-                       (NMC)->nmc_left -= nfsm_rndup(LEN); \
-                       (NMC)->nmc_ptr += nfsm_rndup(LEN); \
+                       (NMC)->nmc_left -= rndlen; \
+                       (NMC)->nmc_ptr += rndlen; \
                        bcopy(__tmpptr, (PTR), (LEN)); \
                } else { \
                        (E) = nfsm_chain_get_opaque_f((NMC), (LEN), (u_char*)(PTR)); \
                        bcopy(__tmpptr, (PTR), (LEN)); \
                } else { \
                        (E) = nfsm_chain_get_opaque_f((NMC), (LEN), (u_char*)(PTR)); \
index 93858337300703760fc51a5a094bfa39eb3fd79b..bb0a7d7c52aa6f1d9b75b4769fd26b2f8ab7e2e6 100644 (file)
@@ -75,7 +75,7 @@ struct aiocb {
 
 struct user_aiocb {
        int             aio_fildes;             /* File descriptor */
 
 struct user_aiocb {
        int             aio_fildes;             /* File descriptor */
-       off_t           aio_offset;             /* File offset */
+       off_t           aio_offset __attribute((aligned(8))); /* File offset */
        user_addr_t     aio_buf __attribute((aligned(8)));              /* Location of buffer */
        user_size_t     aio_nbytes;             /* Length of transfer */
        int             aio_reqprio;    /* Request priority offset */
        user_addr_t     aio_buf __attribute((aligned(8)));              /* Location of buffer */
        user_size_t     aio_nbytes;             /* Length of transfer */
        int             aio_reqprio;    /* Request priority offset */
index 754a272dc4e4c3a66db87f02e0ee2fe458fe2a51..b6f9c2cd2ac81688e3ec59c857f7988f167afe6f 100644 (file)
@@ -35,6 +35,7 @@
 #include <kern/lock.h>
 #include <kern/locks.h>
 #include <kern/thread_call.h>
 #include <kern/lock.h>
 #include <kern/locks.h>
 #include <kern/thread_call.h>
+#include <kern/thread.h>
 #include <machine/machine_routines.h>
 #include <sys/syslog.h>
 #include <sys/ucred.h>
 #include <machine/machine_routines.h>
 #include <sys/syslog.h>
 #include <sys/ucred.h>
index a11877c5228526fc6a0c5777064aa1d9d6fcf822..67aff41a164a6745d582ed167c6ffc993e57333d 100644 (file)
@@ -257,5 +257,6 @@ __END_DECLS
 /* pseudo-errors returned inside kernel to modify return to process */
 #define        ERESTART        (-1)            /* restart syscall */
 #define        EJUSTRETURN     (-2)            /* don't modify regs, just return */
 /* pseudo-errors returned inside kernel to modify return to process */
 #define        ERESTART        (-1)            /* restart syscall */
 #define        EJUSTRETURN     (-2)            /* don't modify regs, just return */
+#define ERECYCLE    (-5)               /* restart lookup under heavy vnode pressure/recycling */
 #endif
 #endif /* _SYS_ERRNO_H_ */
 #endif
 #endif /* _SYS_ERRNO_H_ */
index c0a8368bad839c52424ac453ed86ca99cd6b7259..50706beec20ec5cec01b306fe427ccf5644482d2 100644 (file)
@@ -220,7 +220,7 @@ int relookup(struct vnode *dvp, struct vnode **vpp,
  */
 void    cache_purgevfs(mount_t mp);
 int            cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp,
  */
 void    cache_purgevfs(mount_t mp);
 int            cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp,
-                         vfs_context_t context, int *trailing_slash, int *dp_authorized);
+                         vfs_context_t context, int *trailing_slash, int *dp_authorized, vnode_t last_dp);
 
 void           vnode_cache_authorized_action(vnode_t vp, vfs_context_t context, kauth_action_t action);
 void           vnode_uncache_authorized_action(vnode_t vp, kauth_action_t action);
 
 void           vnode_cache_authorized_action(vnode_t vp, vfs_context_t context, kauth_action_t action);
 void           vnode_uncache_authorized_action(vnode_t vp, kauth_action_t action);
index 83d48fbc4b66565ea542a3f929b4ee1fe7cff3cc..67b55c7863448a635688c59aece57721ac6f88ef 100644 (file)
@@ -88,6 +88,7 @@
 #define RB_UNIPROC     0x80    /* don't start slaves */
 #define RB_SAFEBOOT    0x100   /* booting safe */
 #define RB_UPSDELAY 0x200   /* Delays restart by 5 minutes */
 #define RB_UNIPROC     0x80    /* don't start slaves */
 #define RB_SAFEBOOT    0x100   /* booting safe */
 #define RB_UPSDELAY 0x200   /* Delays restart by 5 minutes */
+#define RB_QUICK       0x400   /* quick and ungraceful reboot with file system caches flushed*/
 #define RB_PANIC       0       /* reboot due to panic */
 #define RB_BOOT                1       /* reboot due to boot() */
 
 #define RB_PANIC       0       /* reboot due to panic */
 #define RB_BOOT                1       /* reboot due to boot() */
 
index 6d7d934658915e6c91da4257cc1b976794dd4f42..4048673b59c7906b41192556af71506f3472ac67 100644 (file)
@@ -193,6 +193,7 @@ struct iovec {
 #define SO_REUSESHAREUID       0x1025          /* APPLE: Allow reuse of port/socket by different userids */
 #ifdef __APPLE_API_PRIVATE
 #define SO_NOTIFYCONFLICT      0x1026  /* APPLE: send notification if there is a bind on a port which is already in use */
 #define SO_REUSESHAREUID       0x1025          /* APPLE: Allow reuse of port/socket by different userids */
 #ifdef __APPLE_API_PRIVATE
 #define SO_NOTIFYCONFLICT      0x1026  /* APPLE: send notification if there is a bind on a port which is already in use */
+#define        SO_UPCALLCLOSEWAIT      0x1027  /* APPLE: block on close until an upcall returns */
 #endif
 #define SO_LINGER_SEC  0x1080          /* linger on close if data present (in seconds) */
 #define SO_RESTRICTIONS        0x1081  /* APPLE: deny inbound/outbound/both/flag set */
 #endif
 #define SO_LINGER_SEC  0x1080          /* linger on close if data present (in seconds) */
 #define SO_RESTRICTIONS        0x1081  /* APPLE: deny inbound/outbound/both/flag set */
index 988ec8d82a53b5d8dbcdc1d02535ad05089fab74..9f55d37a6d2adafb2ce7565dae292b260bcffb3b 100644 (file)
@@ -229,6 +229,7 @@ struct socket {
 #ifdef __APPLE_API_PRIVATE
 #define SOF_NOTIFYCONFLICT 0x400       /* notify that a bind was done on a port already in use */
 #endif
 #ifdef __APPLE_API_PRIVATE
 #define SOF_NOTIFYCONFLICT 0x400       /* notify that a bind was done on a port already in use */
 #endif
+#define        SOF_UPCALLCLOSEWAIT 0x800 /* block on close until an upcall returns  */
        int     so_usecount;    /* refcounting of socket use */;
        int     so_retaincnt;
        u_int32_t so_filteruse; /* usecount for the socket filters */
        int     so_usecount;    /* refcounting of socket use */;
        int     so_retaincnt;
        u_int32_t so_filteruse; /* usecount for the socket filters */
index ed6ea3203bbe1d81ad5e404abf171121b79e841d..c4e93ab8e4742db543f2d0f298c94792b4c2b958 100644 (file)
@@ -827,10 +827,12 @@ boolean_t vnode_cache_is_stale(vnode_t vp)
 
 /*
  * Returns:    0                       Success
 
 /*
  * Returns:    0                       Success
- *             ENOENT                  No such file or directory
+ *             ERECYCLE                vnode was recycled from underneath us.  Force lookup to be re-driven from namei.
+ *                                             This errno value should not be seen by anyone outside of the kernel.
  */
 int 
  */
 int 
-cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, vfs_context_t ctx, int *trailing_slash, int *dp_authorized)
+cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, 
+               vfs_context_t ctx, int *trailing_slash, int *dp_authorized, vnode_t last_dp)
 {
        char            *cp;            /* pointer into pathname argument */
        int             vid;
 {
        char            *cp;            /* pointer into pathname argument */
        int             vid;
@@ -840,11 +842,9 @@ cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp,
        kauth_cred_t    ucred;
        boolean_t       ttl_enabled = FALSE;
        struct timeval  tv;
        kauth_cred_t    ucred;
        boolean_t       ttl_enabled = FALSE;
        struct timeval  tv;
-        mount_t                mp;
+    mount_t            mp;
        unsigned int    hash;
        unsigned int    hash;
-#if CONFIG_MACF
-       int             error;
-#endif
+       int             error = 0;
 
        ucred = vfs_context_ucred(ctx);
        *trailing_slash = 0;
 
        ucred = vfs_context_ucred(ctx);
        *trailing_slash = 0;
@@ -945,7 +945,7 @@ skiprsrcfork:
                        error = mac_vnode_check_lookup(ctx, dp, cnp);
                        if (error) {
                                name_cache_unlock();
                        error = mac_vnode_check_lookup(ctx, dp, cnp);
                        if (error) {
                                name_cache_unlock();
-                               return (error);
+                               goto errorout;
                        }
                }
 #endif /* MAC */
                        }
                }
 #endif /* MAC */
@@ -1052,35 +1052,41 @@ skiprsrcfork:
                dp = NULLVP;
        } else {
 need_dp:
                dp = NULLVP;
        } else {
 need_dp:
-               /*
+               /*
                 * return the last directory we looked at
                 * return the last directory we looked at
-                * with an io reference held
+                * with an io reference held. If it was the one passed
+                * in as a result of the last iteration of VNOP_LOOKUP,
+                * it should already hold an io ref. No need to increase ref.
                 */
                 */
-               if (dp == ndp->ni_usedvp) {
-                       /*
-                        * if this vnode matches the one passed in via USEDVP
-                        * than this context already holds an io_count... just
-                        * use vnode_get to get an extra ref for lookup to play
-                        * with... can't use the getwithvid variant here because
-                        * it will block behind a vnode_drain which would result
-                        * in a deadlock (since we already own an io_count that the
-                        * vnode_drain is waiting on)... vnode_get grabs the io_count
-                        * immediately w/o waiting... it always succeeds
-                        */
-                       vnode_get(dp);
-               } else if ( (vnode_getwithvid(dp, vid)) ) {
-                       /*
-                        * failure indicates the vnode
-                        * changed identity or is being
-                        * TERMINATED... in either case
-                        * punt this lookup.
-                        * 
-                        * don't necessarily return ENOENT, though, because
-                        * we really want to go back to disk and make sure it's
-                        * there or not if someone else is changing this
-                        * vnode.
-                        */
-                       return (ERESTART);
+               if (last_dp != dp){
+                       
+                       if (dp == ndp->ni_usedvp) {
+                               /*
+                                * if this vnode matches the one passed in via USEDVP
+                                * than this context already holds an io_count... just
+                                * use vnode_get to get an extra ref for lookup to play
+                                * with... can't use the getwithvid variant here because
+                                * it will block behind a vnode_drain which would result
+                                * in a deadlock (since we already own an io_count that the
+                                * vnode_drain is waiting on)... vnode_get grabs the io_count
+                                * immediately w/o waiting... it always succeeds
+                                */
+                               vnode_get(dp);
+                       } else if ( (vnode_getwithvid(dp, vid)) ) {
+                               /*
+                                * failure indicates the vnode
+                                * changed identity or is being
+                                * TERMINATED... in either case
+                                * punt this lookup.
+                                * 
+                                * don't necessarily return ENOENT, though, because
+                                * we really want to go back to disk and make sure it's
+                                * there or not if someone else is changing this
+                                * vnode.
+                                */
+                               error = ERECYCLE;
+                               goto errorout;
+                       }
                }
        }
        if (vp != NULLVP) {
                }
        }
        if (vp != NULLVP) {
@@ -1104,7 +1110,22 @@ need_dp:
        ndp->ni_dvp = dp;
        ndp->ni_vp  = vp;
 
        ndp->ni_dvp = dp;
        ndp->ni_vp  = vp;
 
-       return (0);
+errorout:
+       /* 
+        * If we came into cache_lookup_path after an iteration of the lookup loop that
+        * resulted in a call to VNOP_LOOKUP, then VNOP_LOOKUP returned a vnode with a io ref
+        * on it.  It is now the job of cache_lookup_path to drop the ref on this vnode 
+        * when it is no longer needed.  If we get to this point, and last_dp is not NULL
+        * and it is ALSO not the dvp we want to return to caller of this function, it MUST be
+        * the case that we got to a subsequent path component and this previous vnode is 
+        * no longer needed.  We can then drop the io ref on it.
+        */
+       if ((last_dp != NULLVP) && (last_dp != ndp->ni_dvp)){
+               vnode_put(last_dp);
+       }
+       
+       //initialized to 0, should be the same if no error cases occurred.
+       return error;
 }
 
 
 }
 
 
index 8f7145d19d5826bd3d40b1bc8e843f651d88f7f6..df54d26b1db4456b1b5e751ec7428c2f1c91329d 100644 (file)
@@ -2843,8 +2843,8 @@ end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void
                blhdr->checksum = 0;
                blhdr->checksum = calc_checksum((char *)blhdr, BLHDR_CHECKSUM_SIZE);
 
                blhdr->checksum = 0;
                blhdr->checksum = calc_checksum((char *)blhdr, BLHDR_CHECKSUM_SIZE);
 
-               if (kmem_alloc(kernel_map, (vm_offset_t *)&bparray, tr->blhdr->num_blocks * sizeof(struct buf *))) {
-                   panic("can't allocate %lu bytes for bparray\n", tr->blhdr->num_blocks * sizeof(struct buf *));
+               if (kmem_alloc(kernel_map, (vm_offset_t *)&bparray, blhdr->num_blocks * sizeof(struct buf *))) {
+                   panic("can't allocate %lu bytes for bparray\n", blhdr->num_blocks * sizeof(struct buf *));
                }
 
                // calculate individual block checksums
                }
 
                // calculate individual block checksums
@@ -2867,7 +2867,7 @@ end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void
                    blhdr->binfo[i].b.bp = bparray[i];
                }
 
                    blhdr->binfo[i].b.bp = bparray[i];
                }
 
-               kmem_free(kernel_map, (vm_offset_t)bparray, tr->blhdr->num_blocks * sizeof(struct buf *));
+               kmem_free(kernel_map, (vm_offset_t)bparray, blhdr->num_blocks * sizeof(struct buf *));
 
                if (ret != amt) {
                        printf("jnl: %s: end_transaction: only wrote %d of %d bytes to the journal!\n",
 
                if (ret != amt) {
                        printf("jnl: %s: end_transaction: only wrote %d of %d bytes to the journal!\n",
index aaabf7bc100137097945deeffab293f60705de16..bb8c5dd2bec6624db0d7cd283560a261a9f42b42 100644 (file)
@@ -141,7 +141,9 @@ static int vfs_getrealpath(const char * path, char * realpath, size_t bufsize, v
  *             lookup:EROFS
  *             lookup:EACCES
  *             lookup:EPERM
  *             lookup:EROFS
  *             lookup:EACCES
  *             lookup:EPERM
- *             lookup:???
+ *             lookup:ERECYCLE  vnode was recycled from underneath us in lookup.
+ *                                              This means we should re-drive lookup from this point.
+ *             lookup: ???
  *             VNOP_READLINK:???
  */
 int
  *             VNOP_READLINK:???
  */
 int
@@ -150,6 +152,9 @@ namei(struct nameidata *ndp)
        struct filedesc *fdp;   /* pointer to file descriptor state */
        char *cp;               /* pointer into pathname argument */
        struct vnode *dp;       /* the directory we are searching */
        struct filedesc *fdp;   /* pointer to file descriptor state */
        char *cp;               /* pointer into pathname argument */
        struct vnode *dp;       /* the directory we are searching */
+       struct vnode *usedvp = ndp->ni_dvp;  /* store pointer to vp in case we must loop due to
+                                                                                       heavy vnode pressure */
+       u_long cnpflags = ndp->ni_cnd.cn_flags; /* store in case we have to restore after loop */
        uio_t auio;
        int error;
        struct componentname *cnp = &ndp->ni_cnd;
        uio_t auio;
        int error;
        struct componentname *cnp = &ndp->ni_cnd;
@@ -170,6 +175,8 @@ namei(struct nameidata *ndp)
 #endif
        fdp = p->p_fd;
 
 #endif
        fdp = p->p_fd;
 
+vnode_recycled:
+
        /*
         * Get a buffer for the name to be translated, and copy the
         * name into the buffer.
        /*
         * Get a buffer for the name to be translated, and copy the
         * name into the buffer.
@@ -413,6 +420,14 @@ retry_copy:
        }
        cnp->cn_pnbuf = NULL;
        ndp->ni_vp = NULLVP;
        }
        cnp->cn_pnbuf = NULL;
        ndp->ni_vp = NULLVP;
+       if (error == ERECYCLE){
+               /* vnode was recycled underneath us. re-drive lookup to start at 
+                  the beginning again, since recycling invalidated last lookup*/
+               ndp->ni_cnd.cn_flags = cnpflags;
+               ndp->ni_dvp = usedvp;
+               goto vnode_recycled;
+       }
+
 
        return (error);
 }
 
        return (error);
 }
@@ -462,7 +477,7 @@ retry_copy:
  *             ENOTDIR                 Not a directory
  *             EROFS                   Read-only file system [CREATE]
  *             EISDIR                  Is a directory [CREATE]
  *             ENOTDIR                 Not a directory
  *             EROFS                   Read-only file system [CREATE]
  *             EISDIR                  Is a directory [CREATE]
- *             cache_lookup_path:ENOENT
+ *             cache_lookup_path:ERECYCLE  (vnode was recycled from underneath us, redrive lookup again)
  *             vnode_authorize:EROFS
  *             vnode_authorize:EACCES
  *             vnode_authorize:EPERM
  *             vnode_authorize:EROFS
  *             vnode_authorize:EACCES
  *             vnode_authorize:EPERM
@@ -495,6 +510,7 @@ lookup(struct nameidata *ndp)
        int current_mount_generation = 0;
        int vbusyflags = 0;
        int nc_generation = 0;
        int current_mount_generation = 0;
        int vbusyflags = 0;
        int nc_generation = 0;
+       vnode_t last_dp = NULLVP;
 
        /*
         * Setup: break out flag bits into variables.
 
        /*
         * Setup: break out flag bits into variables.
@@ -526,7 +542,7 @@ lookup(struct nameidata *ndp)
 dirloop: 
        ndp->ni_vp = NULLVP;
 
 dirloop: 
        ndp->ni_vp = NULLVP;
 
-       if ( (error = cache_lookup_path(ndp, cnp, dp, ctx, &trailing_slash, &dp_authorized)) ) {
+       if ( (error = cache_lookup_path(ndp, cnp, dp, ctx, &trailing_slash, &dp_authorized, last_dp)) ) {
                dp = NULLVP;
                goto bad;
        }
                dp = NULLVP;
                goto bad;
        }
@@ -865,7 +881,12 @@ nextname:
                if (*cp == '\0')
                        goto emptyname;
 
                if (*cp == '\0')
                        goto emptyname;
 
-               vnode_put(dp);
+               /*
+                * cache_lookup_path is now responsible for dropping io ref on dp
+                * when it is called again in the dirloop.  This ensures we hold
+                * a ref on dp until we complete the next round of lookup.
+                */
+               last_dp = dp;
                goto dirloop;
        }
                                  
                goto dirloop;
        }
                                  
index be1ba3291a06ae7516bf95c53d6c888458854bc0..535603224f504b7e13817bc507a199f781f0b75e 100644 (file)
@@ -2785,19 +2785,23 @@ vfs_init_io_attributes(vnode_t devvp, mount_t mp)
 }
 
 static struct klist fs_klist;
 }
 
 static struct klist fs_klist;
+lck_grp_t *fs_klist_lck_grp;
+lck_mtx_t *fs_klist_lock;
 
 void
 vfs_event_init(void)
 {
 
 void
 vfs_event_init(void)
 {
-
        klist_init(&fs_klist);
        klist_init(&fs_klist);
+       fs_klist_lck_grp = lck_grp_alloc_init("fs_klist", NULL);
+       fs_klist_lock = lck_mtx_alloc_init(fs_klist_lck_grp, NULL);
 }
 
 void
 vfs_event_signal(__unused fsid_t *fsid, u_int32_t event, __unused intptr_t data)
 {
 }
 
 void
 vfs_event_signal(__unused fsid_t *fsid, u_int32_t event, __unused intptr_t data)
 {
-
+       lck_mtx_lock(fs_klist_lock);
        KNOTE(&fs_klist, event);
        KNOTE(&fs_klist, event);
+       lck_mtx_unlock(fs_klist_lock);
 }
 
 /*
 }
 
 /*
@@ -3124,16 +3128,19 @@ static int
 filt_fsattach(struct knote *kn)
 {
 
 filt_fsattach(struct knote *kn)
 {
 
+       lck_mtx_lock(fs_klist_lock);
        kn->kn_flags |= EV_CLEAR;
        KNOTE_ATTACH(&fs_klist, kn);
        kn->kn_flags |= EV_CLEAR;
        KNOTE_ATTACH(&fs_klist, kn);
+       lck_mtx_unlock(fs_klist_lock);
        return (0);
 }
 
 static void
 filt_fsdetach(struct knote *kn)
 {
        return (0);
 }
 
 static void
 filt_fsdetach(struct knote *kn)
 {
-
+       lck_mtx_lock(fs_klist_lock);
        KNOTE_DETACH(&fs_klist, kn);
        KNOTE_DETACH(&fs_klist, kn);
+       lck_mtx_unlock(fs_klist_lock);
 }
 
 static int
 }
 
 static int
@@ -3794,11 +3801,18 @@ vnode_reclaim_internal(struct vnode * vp, int locked, int reuse, int flags)
                vgone(vp, flags);               /* clean and reclaim the vnode */
 
        /*
                vgone(vp, flags);               /* clean and reclaim the vnode */
 
        /*
-        * give the vnode a new identity so
-        * that vnode_getwithvid will fail
-        * on any stale cache accesses
+        * give the vnode a new identity so that vnode_getwithvid will fail
+        * on any stale cache accesses...
+        * grab the list_lock so that if we're in "new_vnode"
+        * behind the list_lock trying to steal this vnode, the v_id is stable...
+        * once new_vnode drops the list_lock, it will block trying to take
+        * the vnode lock until we release it... at that point it will evaluate
+        * whether the v_vid has changed
         */
         */
+       vnode_list_lock();
        vp->v_id++;
        vp->v_id++;
+       vnode_list_unlock();
+
        if (isfifo) {
                struct fifoinfo * fip;
 
        if (isfifo) {
                struct fifoinfo * fip;
 
index e9ef928505d0de12024f89716b0c06131c32cc49..30e62d8ce8c2a9f989d0a7f92a3cd92b1b82434a 100644 (file)
@@ -3041,7 +3041,7 @@ lock_xattrfile(vnode_t xvp, short locktype, vfs_context_t context)
        lf.l_len = 0;
        lf.l_type = locktype; /* F_WRLCK or F_RDLCK */
        /* Note: id is just a kernel address that's not a proc */
        lf.l_len = 0;
        lf.l_type = locktype; /* F_WRLCK or F_RDLCK */
        /* Note: id is just a kernel address that's not a proc */
-       error = VNOP_ADVLOCK(xvp, (caddr_t)xvp, F_SETLK, &lf, F_FLOCK, context);
+       error = VNOP_ADVLOCK(xvp, (caddr_t)xvp, F_SETLK, &lf, F_FLOCK|F_WAIT, context);
        return (error == ENOTSUP ? 0 : error);
 }
 
        return (error == ENOTSUP ? 0 : error);
 }
 
index 56c2201dca6e94c18bce4fd5560f3a471e2a4238..54e6d30c6f1ea5c7c5358da6a38d1f661a94550d 100644 (file)
  * Sysctl's related to data/stack execution.  See osfmk/vm/vm_map.c
  */
 
  * Sysctl's related to data/stack execution.  See osfmk/vm/vm_map.c
  */
 
+#ifndef SECURE_KERNEL
 extern int allow_stack_exec, allow_data_exec;
 
 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW, &allow_stack_exec, 0, "");
 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW, &allow_data_exec, 0, "");
 extern int allow_stack_exec, allow_data_exec;
 
 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW, &allow_stack_exec, 0, "");
 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW, &allow_data_exec, 0, "");
+#endif /* !SECURE_KERNEL */
 
 #if CONFIG_NO_PRINTF_STRINGS
 void
 
 #if CONFIG_NO_PRINTF_STRINGS
 void
@@ -819,7 +821,7 @@ shared_region_map_np(
        memory_object_size_t            file_size;
        user_addr_t                     user_mappings;
        struct shared_file_mapping_np   *mappings;
        memory_object_size_t            file_size;
        user_addr_t                     user_mappings;
        struct shared_file_mapping_np   *mappings;
-#define SFM_MAX_STACK  4
+#define SFM_MAX_STACK  8
        struct shared_file_mapping_np   stack_mappings[SFM_MAX_STACK];
        unsigned int                    mappings_count;
        vm_size_t                       mappings_size;
        struct shared_file_mapping_np   stack_mappings[SFM_MAX_STACK];
        unsigned int                    mappings_count;
        vm_size_t                       mappings_size;
index 678fe7d7a653daa0998de489a07c52b7a35806d8..745ad66f22f6471f1f17c156668cf63ae193893d 100644 (file)
@@ -597,6 +597,7 @@ _ubc_isinuse
 _ubc_msync
 _ubc_offtoblk
 _ubc_page_op
 _ubc_msync
 _ubc_offtoblk
 _ubc_page_op
+_ubc_pages_resident
 _ubc_range_op
 _ubc_setcred
 _ubc_setsize
 _ubc_range_op
 _ubc_setcred
 _ubc_setsize
index fd0f8de5983005dfc6dc52c40330c1692ec3ab49..f14615395d67a43c0fe03c04b6b46ece26979427 100644 (file)
@@ -119,7 +119,6 @@ __Z17IODTMapInterruptsP15IORegistryEntry
 __Z17IODeviceTreeAllocPv
 __Z17IOServiceOrderingPK15OSMetaClassBaseS1_Pv
 __Z18IODTCompareNubNamePK15IORegistryEntryP8OSStringPS3_
 __Z17IODeviceTreeAllocPv
 __Z17IOServiceOrderingPK15OSMetaClassBaseS1_Pv
 __Z18IODTCompareNubNamePK15IORegistryEntryP8OSStringPS3_
-__Z19IODTMapOneInterruptP15IORegistryEntryPmPP6OSDataPPK8OSSymbol
 __Z19printDictionaryKeysP12OSDictionaryPc
 __Z19tellAppWithResponseP8OSObjectPv
 __Z20IODTMakeNVDescriptorP15IORegistryEntryP17IONVRAMDescriptor
 __Z19printDictionaryKeysP12OSDictionaryPc
 __Z19tellAppWithResponseP8OSObjectPv
 __Z20IODTMakeNVDescriptorP15IORegistryEntryP17IONVRAMDescriptor
@@ -127,7 +126,6 @@ __Z20IODTMatchNubWithKeysP15IORegistryEntryPKc
 __Z21IODTResolveAddressingP15IORegistryEntryPKcP14IODeviceMemory
 __Z22IODTResolveAddressCellP15IORegistryEntryPmS1_S1_
 __Z22tellClientWithResponseP8OSObjectPv
 __Z21IODTResolveAddressingP15IORegistryEntryPKcP14IODeviceMemory
 __Z22IODTResolveAddressCellP15IORegistryEntryPmS1_S1_
 __Z22tellClientWithResponseP8OSObjectPv
-__Z23IODTFindInterruptParentP15IORegistryEntry
 __Z23IODTFindMatchingEntriesP15IORegistryEntrymPKc
 __Z24broadcast_aggressivenessP8OSObjectPvS1_S1_S1_
 __Z26serializedAllowPowerChangeP8OSObjectPvS1_S1_S1_
 __Z23IODTFindMatchingEntriesP15IORegistryEntrymPKc
 __Z24broadcast_aggressivenessP8OSObjectPvS1_S1_S1_
 __Z26serializedAllowPowerChangeP8OSObjectPvS1_S1_S1_
@@ -561,6 +559,8 @@ __ZN14IOPMrootDomain23requestPowerDomainStateEmP17IOPowerConnectionm
 __ZN14IOPMrootDomain23setQuickSpinDownTimeoutEv
 __ZN14IOPMrootDomain24displayWranglerPublishedEPvS0_P9IOService
 __ZN14IOPMrootDomain24receivePowerNotificationEm
 __ZN14IOPMrootDomain23setQuickSpinDownTimeoutEv
 __ZN14IOPMrootDomain24displayWranglerPublishedEPvS0_P9IOService
 __ZN14IOPMrootDomain24receivePowerNotificationEm
+__ZN14IOPMrootDomain24systemPowerEventOccurredEPK8OSSymbolP8OSObject
+__ZN14IOPMrootDomain24systemPowerEventOccurredEPK8OSSymbolj
 __ZN14IOPMrootDomain25announcePowerSourceChangeEv
 __ZN14IOPMrootDomain26handleSleepTimerExpirationEv
 __ZN14IOPMrootDomain26restoreUserSpinDownTimeoutEv
 __ZN14IOPMrootDomain25announcePowerSourceChangeEv
 __ZN14IOPMrootDomain26handleSleepTimerExpirationEv
 __ZN14IOPMrootDomain26restoreUserSpinDownTimeoutEv
index dfaa71992b1320d8fb4bbb1300cb22ebbda05869..6f5ee0f98e7b00589edec043211905663dd66dd3 100644 (file)
@@ -1,4 +1,4 @@
-9.1.0
+9.2.0
 
 # The first line of this file contains the master version number for the kernel.
 # All other instances of the kernel version in xnu are derived from this file.
 
 # The first line of this file contains the master version number for the kernel.
 # All other instances of the kernel version in xnu are derived from this file.
index 71c1ae0fd2e51a76394b2b501b33c69366646b9b..efaa7c6053caa82caeda15e425e997f7c4059e21 100644 (file)
@@ -186,7 +186,6 @@ __Z17IODTMapInterruptsP15IORegistryEntry
 __Z17IODeviceTreeAllocPv
 __Z17IOServiceOrderingPK15OSMetaClassBaseS1_Pv
 __Z18IODTCompareNubNamePK15IORegistryEntryP8OSStringPS3_
 __Z17IODeviceTreeAllocPv
 __Z17IOServiceOrderingPK15OSMetaClassBaseS1_Pv
 __Z18IODTCompareNubNamePK15IORegistryEntryP8OSStringPS3_
-__Z19IODTMapOneInterruptP15IORegistryEntryPmPP6OSDataPPK8OSSymbol
 __Z19printDictionaryKeysP12OSDictionaryPc
 __Z19tellAppWithResponseP8OSObjectPv
 __Z20IODTMakeNVDescriptorP15IORegistryEntryP17IONVRAMDescriptor
 __Z19printDictionaryKeysP12OSDictionaryPc
 __Z19tellAppWithResponseP8OSObjectPv
 __Z20IODTMakeNVDescriptorP15IORegistryEntryP17IONVRAMDescriptor
@@ -194,7 +193,6 @@ __Z20IODTMatchNubWithKeysP15IORegistryEntryPKc
 __Z21IODTResolveAddressingP15IORegistryEntryPKcP14IODeviceMemory
 __Z22IODTResolveAddressCellP15IORegistryEntryPmS1_S1_
 __Z22tellClientWithResponseP8OSObjectPv
 __Z21IODTResolveAddressingP15IORegistryEntryPKcP14IODeviceMemory
 __Z22IODTResolveAddressCellP15IORegistryEntryPmS1_S1_
 __Z22tellClientWithResponseP8OSObjectPv
-__Z23IODTFindInterruptParentP15IORegistryEntry
 __Z23IODTFindMatchingEntriesP15IORegistryEntrymPKc
 __Z24broadcast_aggressivenessP8OSObjectPvS1_S1_S1_
 __Z26serializedAllowPowerChangeP8OSObjectPvS1_S1_S1_
 __Z23IODTFindMatchingEntriesP15IORegistryEntrymPKc
 __Z24broadcast_aggressivenessP8OSObjectPvS1_S1_S1_
 __Z26serializedAllowPowerChangeP8OSObjectPvS1_S1_S1_
index 362c5ed31de110a9d2b6dd9fbc980f618ee614ec..3e447c36cda531417732007e2f4e0926f21a8244 100644 (file)
@@ -252,13 +252,24 @@ enum {
     kInflowForciblyEnabledBit = (1 << 0)
 };
 
     kInflowForciblyEnabledBit = (1 << 0)
 };
 
+/* kIOPMMessageInternalBatteryFullyDischarged
+ * The battery has drained completely to its "Fully Discharged" state. 
+ */
 #define kIOPMMessageInternalBatteryFullyDischarged  \
                 iokit_family_msg(sub_iokit_powermanagement, 0x120)
 
 #define kIOPMMessageInternalBatteryFullyDischarged  \
                 iokit_family_msg(sub_iokit_powermanagement, 0x120)
 
+/* kIOPMMessageSystemPowerEventOccurred
+ * Some major system thermal property has changed, and interested clients may
+ * modify their behavior.
+ */
+#define kIOPMMessageSystemPowerEventOccurred  \
+                iokit_family_msg(sub_iokit_powermanagement, 0x130)
+
 
 /*******************************************************************************
  *
  * Power commands issued to root domain
 
 /*******************************************************************************
  *
  * Power commands issued to root domain
+ * Use with IOPMrootDomain::receivePowerNotification()
  *
  * These commands are issued from system drivers only:
  *      ApplePMU, AppleSMU, IOGraphics, AppleACPIFamily
  *
  * These commands are issued from system drivers only:
  *      ApplePMU, AppleSMU, IOGraphics, AppleACPIFamily
@@ -278,6 +289,7 @@ enum {
   kIOPMClamshellOpened          = (1<<10)  // clamshell was opened
 };
 
   kIOPMClamshellOpened          = (1<<10)  // clamshell was opened
 };
 
+
 /*******************************************************************************
  *
  * Power Management Return Codes
 /*******************************************************************************
  *
  * Power Management Return Codes
@@ -378,6 +390,76 @@ enum {
 #define kIOPMPSPostDishargeWaitSecondsKey      "PostDischargeWaitSeconds"
 
 
 #define kIOPMPSPostDishargeWaitSecondsKey      "PostDischargeWaitSeconds"
 
 
+/* CPU Power Management status keys
+ * Pass as arguments to IOPMrootDomain::systemPowerEventOccurred
+ * Or as arguments to IOPMSystemPowerEventOccurred()
+ * Or to decode the dictionary obtained from IOPMCopyCPUPowerStatus()
+ * These keys reflect restrictions placed on the CPU by the system
+ * to bring the CPU's power consumption within allowable thermal and 
+ * power constraints.
+ */
+
+
+/* kIOPMGraphicsPowerLimitsKey
+ *   The key representing the dictionary of graphics power limits.
+ *   The dictionary contains the other kIOPMCPUPower keys & their associated
+ *   values (e.g. Speed limit, Processor Count, and Schedule limits).
+ */
+#define kIOPMGraphicsPowerLimitsKey                     "Graphics_Power_Limits"
+
+/* kIOPMGraphicsPowerLimitPerformanceKey
+ *   The key representing the percent of overall performance made available
+ *   by the graphics chip as a percentage (integer 0 - 100).
+ */
+#define kIOPMGraphicsPowerLimitPerformanceKey           "Graphics_Power_Performance"
+
+
+
+/* kIOPMCPUPowerLimitsKey
+ *   The key representing the dictionary of CPU Power Limits.
+ *   The dictionary contains the other kIOPMCPUPower keys & their associated
+ *   values (e.g. Speed limit, Processor Count, and Schedule limits).
+ */
+#define kIOPMCPUPowerLimitsKey                          "CPU_Power_Limits"
+
+/* kIOPMCPUPowerLimitProcessorSpeedKey defines the speed & voltage limits placed 
+ *   on the CPU.
+ *   Represented as a percentage (0-100) of maximum CPU speed.
+ */
+#define kIOPMCPUPowerLimitProcessorSpeedKey             "CPU_Speed_Limit"
+
+/* kIOPMCPUPowerLimitProcessorCountKey reflects how many, if any, CPUs have been
+ *   taken offline. Represented as an integer number of CPUs (0 - Max CPUs).
+ */
+#define kIOPMCPUPowerLimitProcessorCountKey             "CPU_Available_CPUs"
+
+/* kIOPMCPUPowerLimitSchedulerTimeKey represents the percentage (0-100) of CPU time 
+ *   available. 100% at normal operation. The OS may limit this time for a percentage
+ *   less than 100%.
+ */
+#define kIOPMCPUPowerLimitSchedulerTimeKey              "CPU_Scheduler_Limit"
+
+
+/* Thermal Level Warning Key
+ * Indicates the thermal constraints placed on the system. This value may 
+ * cause clients to action to consume fewer system resources.
+ * The value associated with this warning is defined by the platform.
+ */
+#define kIOPMThermalLevelWarningKey                     "Thermal_Level_Warning"
+
+/* Thermal Warning Level values
+ *      kIOPMThermalWarningLevelNormal - under normal operating conditions
+ *      kIOPMThermalWarningLevelDanger - thermal pressure may cause system slowdown
+ *      kIOPMThermalWarningLevelCrisis - thermal conditions may cause imminent shutdown
+ *
+ * The platform may define additional thermal levels if necessary.
+ */
+enum {
+  kIOPMThermalWarningLevelNormal    = 0,
+  kIOPMThermalWarningLevelDanger    = 5,
+  kIOPMThermalWarningLevelCrisis    = 10
+};
+
 
 // PM Settings Controller setting types
 // Settings types used primarily with:
 
 // PM Settings Controller setting types
 // Settings types used primarily with:
index 2605169c399554bee0a0ebe7a932b2ac00eda6e8..c528b8c3eb51ef47dd84e671d43997d72b1967bd 100644 (file)
@@ -41,12 +41,15 @@ enum {
     kPCICantSleep                      = 0x00000004
 };
 
     kPCICantSleep                      = 0x00000004
 };
 
+
+
 /* 
  *IOPMrootDomain registry property keys
  */
 #define kRootDomainSupportedFeatures        "Supported Features"
 #define kRootDomainSleepReasonKey           "Last Sleep Reason"
 #define kRootDomainSleepOptionsKey          "Last Sleep Options"
 /* 
  *IOPMrootDomain registry property keys
  */
 #define kRootDomainSupportedFeatures        "Supported Features"
 #define kRootDomainSleepReasonKey           "Last Sleep Reason"
 #define kRootDomainSleepOptionsKey          "Last Sleep Options"
+#define kIOPMRootDomainPowerStatusKey       "Power Status"
 
 /*
  * Possible sleep reasons found under kRootDomainSleepReasonsKey
 
 /*
  * Possible sleep reasons found under kRootDomainSleepReasonsKey
@@ -115,6 +118,22 @@ public:
     virtual IOReturn setProperties ( OSObject * );
     IOReturn shutdownSystem ( void );
     IOReturn restartSystem ( void );
     virtual IOReturn setProperties ( OSObject * );
     IOReturn shutdownSystem ( void );
     IOReturn restartSystem ( void );
+
+/*! @function systemPowerEventOccurred
+    @abstract Other drivers may inform IOPMrootDomain of system PM events
+    @discussion systemPowerEventOccurred is a richer alternative to receivePowerNotification()
+        Only Apple-owned kexts should have reason to call systemPowerEventOccurred.
+    @param event An OSSymbol describing the type of power event.
+    @param value A 32-bit integer value associated with the event.
+    @param shouldUpdate indicates whether the root domain should send a notification
+        to interested parties. Pass false if you're calling systemPowerEventOccurred
+        several times in succession; and pass true only on the last invocatino.
+    @result kIOReturnSuccess on success */
+    IOReturn systemPowerEventOccurred(const OSSymbol *event, 
+                                    uint32_t intValue);
+    IOReturn systemPowerEventOccurred(const OSSymbol *event, 
+                                    OSObject *value);
+    
     virtual IOReturn receivePowerNotification (UInt32 msg);
     virtual void setSleepSupported( IOOptionBits flags );
     virtual IOOptionBits getSleepSupported();
     virtual IOReturn receivePowerNotification (UInt32 msg);
     virtual void setSleepSupported( IOOptionBits flags );
     virtual IOOptionBits getSleepSupported();
index efe64454d498cb6b59fcf4045b50a9b2bea3dda6..5fbfc6715ce16a1f8bb984f31e48bac41f2e7e6d 100644 (file)
@@ -500,7 +500,7 @@ void IOBufferMemoryDescriptor::free()
     IOOptionBits     options   = _options;
     vm_size_t        size      = _capacity;
     void *           buffer    = _buffer;
     IOOptionBits     options   = _options;
     vm_size_t        size      = _capacity;
     void *           buffer    = _buffer;
-    IOVirtualAddress source    = _ranges.v64->address;
+    mach_vm_address_t source   = (_ranges.v) ? _ranges.v64->address : 0;
     IOMemoryMap *    map       = 0;
     vm_offset_t      alignment = _alignment;
 
     IOMemoryMap *    map       = 0;
     vm_offset_t      alignment = _alignment;
 
@@ -524,7 +524,7 @@ void IOBufferMemoryDescriptor::free()
     else if (buffer)
     {
        if (kIOMemoryTypePhysical64 == (flags & kIOMemoryTypeMask))
     else if (buffer)
     {
        if (kIOMemoryTypePhysical64 == (flags & kIOMemoryTypeMask))
-           IOFreePhysical((mach_vm_address_t) source, size);
+           IOFreePhysical(source, size);
         else if (options & kIOMemoryPhysicallyContiguous)
             IOKernelFreeContiguous((mach_vm_address_t) buffer, size);
         else if (alignment > 1)
         else if (options & kIOMemoryPhysicallyContiguous)
             IOKernelFreeContiguous((mach_vm_address_t) buffer, size);
         else if (alignment > 1)
index 9aece35ff8e194ab4e730c36daa1eb869409f06c..75d751afe8b1e2f35508bf44cf3deca8444bccbb 100644 (file)
@@ -263,6 +263,7 @@ IODMACommand::setMemoryDescriptor(const IOMemoryDescriptor *mem, bool autoPrepar
        else
            fInternalState->fCheckAddressing = (fNumAddressBits && (highPage >= (1UL << (fNumAddressBits - PAGE_SHIFT))));
 
        else
            fInternalState->fCheckAddressing = (fNumAddressBits && (highPage >= (1UL << (fNumAddressBits - PAGE_SHIFT))));
 
+       fInternalState->fNewMD = true;
        mem->retain();
        fMemory = mem;
 
        mem->retain();
        fMemory = mem;
 
@@ -857,10 +858,11 @@ IODMACommand::genIOVMSegments(InternalSegmentFunction outSegFunc,
     if (offset >= memLength)
        return kIOReturnOverrun;
 
     if (offset >= memLength)
        return kIOReturnOverrun;
 
-    if ((offset == internalState->fPreparedOffset) || (offset != state->fOffset)) {
+    if ((offset == internalState->fPreparedOffset) || (offset != state->fOffset) || internalState->fNewMD) {
        state->fOffset                 = 0;
        state->fIOVMAddr               = 0;
        internalState->fNextRemapIndex = 0;
        state->fOffset                 = 0;
        state->fIOVMAddr               = 0;
        internalState->fNextRemapIndex = 0;
+       internalState->fNewMD          = false;
        state->fMapped                 = (IS_MAPPED(fMappingOptions) && fMapper);
        mdOp                           = kIOMDFirstSegment;
     };
        state->fMapped                 = (IS_MAPPED(fMappingOptions) && fMapper);
        mdOp                           = kIOMDFirstSegment;
     };
index 4b6a1fdf2ffcce859b225c3dfd200399f52ca840..381022c56a5abe9478b6df56f4919f2946530c74 100644 (file)
@@ -435,15 +435,21 @@ static bool GetUInt32( IORegistryEntry * regEntry, const OSSymbol * name,
         return( false );
 }
 
         return( false );
 }
 
-IORegistryEntry * IODTFindInterruptParent( IORegistryEntry * regEntry )
+static IORegistryEntry * IODTFindInterruptParent( IORegistryEntry * regEntry, IOItemCount index )
 {
     IORegistryEntry *  parent;
     UInt32             phandle;
 {
     IORegistryEntry *  parent;
     UInt32             phandle;
+    OSData         *   data;
+    unsigned int       len;
 
 
-    if( GetUInt32( regEntry, gIODTInterruptParentKey, &phandle))
-        parent = FindPHandle( phandle );
+    if( (data = OSDynamicCast( OSData, regEntry->getProperty( gIODTInterruptParentKey )))
+      && (sizeof(UInt32) <= (len = data->getLength()))) {
+       if (((index + 1) * sizeof(UInt32)) > len)
+           index = 0;
+       phandle = ((UInt32 *) data->getBytesNoCopy())[index];
+       parent = FindPHandle( phandle );
 
 
-    else if( 0 == regEntry->getProperty( "interrupt-controller"))
+    else if( 0 == regEntry->getProperty( "interrupt-controller"))
         parent = regEntry->getParentEntry( gIODTPlane);
     else
         parent = 0;
         parent = regEntry->getParentEntry( gIODTPlane);
     else
         parent = 0;
@@ -481,8 +487,8 @@ static void IODTGetICellCounts( IORegistryEntry * regEntry,
         *aCellCount = 0;
 }
 
         *aCellCount = 0;
 }
 
-UInt32 IODTMapOneInterrupt( IORegistryEntry * regEntry, UInt32 * intSpec,
-                               OSData ** spec, const OSSymbol ** controller )
+static UInt32 IODTMapOneInterrupt( IORegistryEntry * regEntry, UInt32 * intSpec, UInt32 index,
+                                   OSData ** spec, const OSSymbol ** controller )
 {
     IORegistryEntry *parent = 0;
     OSData                     *data;
 {
     IORegistryEntry *parent = 0;
     OSData                     *data;
@@ -494,7 +500,7 @@ UInt32 IODTMapOneInterrupt( IORegistryEntry * regEntry, UInt32 * intSpec,
     UInt32                     i, original_icells;
     bool                       cmp, ok = false;
 
     UInt32                     i, original_icells;
     bool                       cmp, ok = false;
 
-    parent = IODTFindInterruptParent( regEntry );    
+    parent = IODTFindInterruptParent( regEntry, index );    
     IODTGetICellCounts( parent, &icells, &acells );
     addrCmp = 0;
     if( acells) {
     IODTGetICellCounts( parent, &icells, &acells );
     addrCmp = 0;
     if( acells) {
@@ -640,11 +646,12 @@ static bool IODTMapInterruptsSharing( IORegistryEntry * regEntry, OSDictionary *
     OSData *           local2;
     UInt32 *           localBits;
     UInt32 *           localEnd;
     OSData *           local2;
     UInt32 *           localBits;
     UInt32 *           localEnd;
+    IOItemCount                index;
     OSData *           map;
     OSObject *         oneMap;
     OSArray *          mapped;
     OSArray *          controllerInts;
     OSData *           map;
     OSObject *         oneMap;
     OSArray *          mapped;
     OSArray *          controllerInts;
-    const OSSymbol *   controller;
+    const OSSymbol *   controller = 0;
     OSArray *          controllers;
     UInt32             skip = 1;
     bool               ok, nw;
     OSArray *          controllers;
     UInt32             skip = 1;
     bool               ok, nw;
@@ -666,6 +673,7 @@ static bool IODTMapInterruptsSharing( IORegistryEntry * regEntry, OSDictionary *
 
     localBits = (UInt32 *) local->getBytesNoCopy();
     localEnd = localBits + (local->getLength() / sizeof(UInt32));
 
     localBits = (UInt32 *) local->getBytesNoCopy();
     localEnd = localBits + (local->getLength() / sizeof(UInt32));
+    index = 0;
     mapped = OSArray::withCapacity( 1 );
     controllers = OSArray::withCapacity( 1 );
 
     mapped = OSArray::withCapacity( 1 );
     controllers = OSArray::withCapacity( 1 );
 
@@ -673,7 +681,7 @@ static bool IODTMapInterruptsSharing( IORegistryEntry * regEntry, OSDictionary *
 
     if( ok) do {
         if( nw) {
 
     if( ok) do {
         if( nw) {
-            skip = IODTMapOneInterrupt( regEntry, localBits, &map, &controller );
+            skip = IODTMapOneInterrupt( regEntry, localBits, index, &map, &controller );
             if( 0 == skip) {
                 IOLog("%s: error mapping interrupt[%d]\n",
                         regEntry->getName(), mapped->getCount());
             if( 0 == skip) {
                 IOLog("%s: error mapping interrupt[%d]\n",
                         regEntry->getName(), mapped->getCount());
@@ -686,6 +694,7 @@ static bool IODTMapInterruptsSharing( IORegistryEntry * regEntry, OSDictionary *
             controller->retain();
         }
 
             controller->retain();
         }
 
+       index++;
         localBits += skip;
         mapped->setObject( map );
         controllers->setObject( controller );
         localBits += skip;
         mapped->setObject( map );
         controllers->setObject( controller );
index 030368a72f658f89d554758773eb209f515db31a..ae66cb9b8a91f1deb1f1ca66c46fdd48ad61896d 100644 (file)
@@ -1598,7 +1598,10 @@ IOHibernateSystemWake(void)
                const OSSymbol * sym = OSSymbol::withCStringNoCopy(kIOHibernateRTCVariablesKey);
 
                if (sym) {
                const OSSymbol * sym = OSSymbol::withCStringNoCopy(kIOHibernateRTCVariablesKey);
 
                if (sym) {
-                       gIOOptionsEntry->removeProperty(sym);
+                       if (gIOOptionsEntry->getProperty(sym)) {
+                               gIOOptionsEntry->removeProperty(sym);
+                               gIOOptionsEntry->sync();
+                       }
                        sym->release();
                }
        }
                        sym->release();
                }
        }
index afa64a600ba7903f76ad52d3342a74a4223b61f8..a21ff00312759dee3e02947fa2844dde2dbe2dfa 100644 (file)
@@ -113,7 +113,7 @@ struct IODMACommandInternal
     UInt8  fCopyContig;
     UInt8  fPrepared;
     UInt8  fDoubleBuffer;
     UInt8  fCopyContig;
     UInt8  fPrepared;
     UInt8  fDoubleBuffer;
-    UInt8  __pad[1];
+    UInt8  fNewMD;
 
     ppnum_t  fCopyPageAlloc;
     ppnum_t  fCopyPageCount;
 
     ppnum_t  fCopyPageAlloc;
     ppnum_t  fCopyPageCount;
index 3c0b8f7e16a09f3bcff06237835240a55efa45da..43321aac12c2367d21165fa7f50bbd9a230fa6fd 100644 (file)
@@ -1017,6 +1017,8 @@ void IOGeneralMemoryDescriptor::free()
            IODelete(_ranges.v64, IOAddressRange, _rangesCount);
        else
            IODelete(_ranges.v, IOVirtualRange, _rangesCount);
            IODelete(_ranges.v64, IOAddressRange, _rangesCount);
        else
            IODelete(_ranges.v, IOVirtualRange, _rangesCount);
+
+       _ranges.v = NULL;
     }
 
     if (reserved && reserved->devicePager)
     }
 
     if (reserved && reserved->devicePager)
index 9af5919f41bcdf5ce860109357c90039d9a91eb2..81568ee1e1d5dddbeef7d9361ffd9cd385db595c 100644 (file)
@@ -1076,6 +1076,9 @@ void IOPMrootDomain::powerChangeDone ( unsigned long previousState )
             // re-enable this timer for next sleep
             idleSleepPending = false;
             gSleepOrShutdownPending = 0;
             // re-enable this timer for next sleep
             idleSleepPending = false;
             gSleepOrShutdownPending = 0;
+
+            // Invalidate prior activity tickles to allow wake from doze.
+            if (wrangler) wrangler->changePowerStateTo(0);
             break;
             
        case RESTART_STATE:
             break;
             
        case RESTART_STATE:
@@ -1653,12 +1656,87 @@ void IOPMrootDomain::informCPUStateChange(
 #endif __i386__
 }
 
 #endif __i386__
 }
 
+//******************************************************************************
+// systemPowerEventOccurred
+//
+// The power controller is notifying us of a hardware-related power management
+// event that we must handle. 
+//
+// systemPowerEventOccurred covers the same functionality that receivePowerNotification
+// does; it simply provides a richer API for conveying more information.
+//******************************************************************************
+IOReturn IOPMrootDomain::systemPowerEventOccurred(
+    const OSSymbol *event,
+    uint32_t intValue)
+{
+    IOReturn        attempt = kIOReturnSuccess;
+    OSNumber        *newNumber = NULL;
+
+    if (!event) 
+        return kIOReturnBadArgument;
+        
+    newNumber = OSNumber::withNumber(intValue, 8*sizeof(intValue));
+    if (!newNumber)
+        return kIOReturnInternalError;
+
+    attempt = systemPowerEventOccurred(event, (OSObject *)newNumber);
+
+    newNumber->release();
+
+    return attempt;
+}
+
+IOReturn IOPMrootDomain::systemPowerEventOccurred(
+    const OSSymbol *event,
+    OSObject *value)
+{
+    OSDictionary *thermalsDict = NULL;
+    bool shouldUpdate = true;
+    
+    if (!event || !value) 
+        return kIOReturnBadArgument;
+
+    // LOCK
+    // We reuse featuresDict Lock because it already exists and guards
+    // the very infrequently used publish/remove feature mechanism; so there's zero rsk
+    // of stepping on that lock.
+    if (featuresDictLock) IOLockLock(featuresDictLock);
+
+    thermalsDict = (OSDictionary *)getProperty(kIOPMRootDomainPowerStatusKey);
+                   
+    if (thermalsDict && OSDynamicCast(OSDictionary, thermalsDict)) {
+        thermalsDict = OSDictionary::withDictionary(thermalsDict);                        
+    } else {
+        thermalsDict = OSDictionary::withCapacity(1);
+    }
+
+    if (!thermalsDict) {
+        shouldUpdate = false;
+        goto exit;
+    }
+
+    thermalsDict->setObject (event, value);
+
+    setProperty (kIOPMRootDomainPowerStatusKey, thermalsDict);
+
+    thermalsDict->release();
+
+exit:
+    // UNLOCK
+    if (featuresDictLock) IOLockUnlock(featuresDictLock);
+
+    if (shouldUpdate)
+        messageClients (kIOPMMessageSystemPowerEventOccurred, (void *)NULL);
+
+    return kIOReturnSuccess;
+}
+
 
 //******************************************************************************
 // receivePowerNotification
 //
 // The power controller is notifying us of a hardware-related power management
 
 //******************************************************************************
 // receivePowerNotification
 //
 // The power controller is notifying us of a hardware-related power management
-// event that we must handle. This is a result of an 'environment' interrupt from
+// event that we must handle. This may be a result of an 'environment' interrupt from
 // the power mgt micro.
 //******************************************************************************
 
 // the power mgt micro.
 //******************************************************************************
 
index 03d349e4f8513594a0583e043d665d3d18f548c4..1b53461ecd5b7cf14fb2f7522f1236d0a8be6a86 100644 (file)
@@ -46,6 +46,7 @@
 #include <IOKit/system.h>
 
 #include <libkern/c++/OSContainers.h>
 #include <IOKit/system.h>
 
 #include <libkern/c++/OSContainers.h>
+#include <libkern/crypto/sha1.h>
 
 extern "C" {
 #include <machine/machine_routines.h>
 
 extern "C" {
 #include <machine/machine_routines.h>
@@ -858,29 +859,57 @@ void PESetGMTTimeOfDay(long secs)
 void IOPlatformExpert::registerNVRAMController(IONVRAMController * caller)
 {
     OSData *          data;
 void IOPlatformExpert::registerNVRAMController(IONVRAMController * caller)
 {
     OSData *          data;
-    IORegistryEntry * nvram;
-    OSString *        string;
+    IORegistryEntry * entry;
+    OSString *        string = 0;
+    char              uuid[ 36 + 1 ];
 
 
-    nvram = IORegistryEntry::fromPath( "/options", gIODTPlane );
-    if ( nvram )
+    entry = IORegistryEntry::fromPath( "/efi/platform", gIODTPlane );
+    if ( entry )
     {
     {
-        data = OSDynamicCast( OSData, nvram->getProperty( "platform-uuid" ) );
-        if ( data && data->getLength( ) == sizeof( uuid_t ) )
+        data = OSDynamicCast( OSData, entry->getProperty( "system-id" ) );
+        if ( data && data->getLength( ) == 16 )
         {
         {
-            char uuid[ 36 + 1 ];
-            uuid_unparse( ( UInt8 * ) data->getBytesNoCopy( ), uuid );
+            SHA1_CTX     context;
+            uint8_t      digest[ SHA_DIGEST_LENGTH ];
+            const uuid_t space = { 0x2A, 0x06, 0x19, 0x90, 0xD3, 0x8D, 0x44, 0x40, 0xA1, 0x39, 0xC4, 0x97, 0x70, 0x37, 0x65, 0xAC };
 
 
+            SHA1Init( &context );
+            SHA1Update( &context, space, sizeof( space ) );
+            SHA1Update( &context, data->getBytesNoCopy( ), data->getLength( ) );
+            SHA1Final( digest, &context );
+
+            digest[ 6 ] = ( digest[ 6 ] & 0x0F ) | 0x50;
+            digest[ 8 ] = ( digest[ 8 ] & 0x3F ) | 0x80;
+
+            uuid_unparse( digest, uuid );
             string = OSString::withCString( uuid );
             string = OSString::withCString( uuid );
-            if ( string )
-            {
-                getProvider( )->setProperty( kIOPlatformUUIDKey, string );
-                publishResource( kIOPlatformUUIDKey, string );
+        }
 
 
-                string->release( );
+        entry->release( );
+    }
+
+    if ( string == 0 )
+    {
+        entry = IORegistryEntry::fromPath( "/options", gIODTPlane );
+        if ( entry )
+        {
+            data = OSDynamicCast( OSData, entry->getProperty( "platform-uuid" ) );
+            if ( data && data->getLength( ) == sizeof( uuid_t ) )
+            {
+                uuid_unparse( ( uint8_t * ) data->getBytesNoCopy( ), uuid );
+                string = OSString::withCString( uuid );
             }
             }
+
+            entry->release( );
         }
         }
+    }
+
+    if ( string )
+    {
+        getProvider( )->setProperty( kIOPlatformUUIDKey, string );
+        publishResource( kIOPlatformUUIDKey, string );
 
 
-        nvram->release( );
+        string->release( );
     }
 
     publishResource("IONVRAM");
     }
 
     publishResource("IONVRAM");
@@ -1281,7 +1310,7 @@ IOReturn IOPlatformExpertDevice::setProperties( OSObject * properties )
     object = dictionary->getObject( kIOPlatformUUIDKey );
     if ( object )
     {
     object = dictionary->getObject( kIOPlatformUUIDKey );
     if ( object )
     {
-        IORegistryEntry * nvram;
+        IORegistryEntry * entry;
         OSString *        string;
         uuid_t            uuid;
 
         OSString *        string;
         uuid_t            uuid;
 
@@ -1294,11 +1323,11 @@ IOReturn IOPlatformExpertDevice::setProperties( OSObject * properties )
         status = uuid_parse( string->getCStringNoCopy( ), uuid );
         if ( status != 0 ) return kIOReturnBadArgument;
 
         status = uuid_parse( string->getCStringNoCopy( ), uuid );
         if ( status != 0 ) return kIOReturnBadArgument;
 
-        nvram = IORegistryEntry::fromPath( "/options", gIODTPlane );
-        if ( nvram )
+        entry = IORegistryEntry::fromPath( "/options", gIODTPlane );
+        if ( entry )
         {
         {
-            nvram->setProperty( "platform-uuid", uuid, sizeof( uuid_t ) );
-            nvram->release( );
+            entry->setProperty( "platform-uuid", uuid, sizeof( uuid_t ) );
+            entry->release( );
         }
 
         setProperty( kIOPlatformUUIDKey, string );
         }
 
         setProperty( kIOPlatformUUIDKey, string );
index 7621a257f032f718745f1ea39427ba2a29220f67..895f27b986fe2c1107991ca9540746ea680e6991 100644 (file)
@@ -43,6 +43,7 @@ extern "C" {
 
 extern dev_t mdevadd(int devid, ppnum_t base, unsigned int size, int phys);
 extern dev_t mdevlookup(int devid);
 
 extern dev_t mdevadd(int devid, ppnum_t base, unsigned int size, int phys);
 extern dev_t mdevlookup(int devid);
+extern void mdevremoveall(void);
 
 kern_return_t
 IOKitBSDInit( void )
 
 kern_return_t
 IOKitBSDInit( void )
@@ -776,14 +777,19 @@ iofrootx:
 void IOSecureBSDRoot(const char * rootName)
 {
 #if CONFIG_EMBEDDED
 void IOSecureBSDRoot(const char * rootName)
 {
 #if CONFIG_EMBEDDED
+    IOReturn         result;
     IOPlatformExpert *pe;
     IOPlatformExpert *pe;
-    const OSSymbol *functionName = OSSymbol::withCStringNoCopy("SecureRootName");
+    const OSSymbol   *functionName = OSSymbol::withCStringNoCopy("SecureRootName");
     
     while ((pe = IOService::getPlatform()) == 0) IOSleep(1 * 1000);
     
     
     while ((pe = IOService::getPlatform()) == 0) IOSleep(1 * 1000);
     
-    pe->callPlatformFunction(functionName, false, (void *)rootName, (void *)0, (void *)0, (void *)0);
+    // Returns kIOReturnNotPrivileged is the root device is not secure.
+    // Returns kIOReturnUnsupported if "SecureRootName" is not implemented.
+    result = pe->callPlatformFunction(functionName, false, (void *)rootName, (void *)0, (void *)0, (void *)0);
     
     functionName->release();
     
     functionName->release();
+    
+    if (result == kIOReturnNotPrivileged) mdevremoveall();
 #endif
 }
 
 #endif
 }
 
index bf820b20ea9894b34be4af50c5a7b5b6c2cf6f8f..ae3f0e88be400d4b0814dfa9954af1816e53c76b 100644 (file)
@@ -63,7 +63,6 @@ options               IOKITCPP        # C++ implementation            # <iokitcpp>
 options                KDEBUG          # kernel tracing                # <kdebug>
 options                NETWORKING      # kernel networking             # <networking>
 options                CRYPTO          # want crypto code              # <crypto>
 options                KDEBUG          # kernel tracing                # <kdebug>
 options                NETWORKING      # kernel networking             # <networking>
 options                CRYPTO          # want crypto code              # <crypto>
-options                KPIDIRECT       # direct access                 # <kpidirect>
 options     CONFIG_DTRACE # enable dtrace       # <config_dtrace>
 
 #makeoptions   LIBDRIVER = "libDriver_kern.o"                  # <libdriver>
 options     CONFIG_DTRACE # enable dtrace       # <config_dtrace>
 
 #makeoptions   LIBDRIVER = "libDriver_kern.o"                  # <libdriver>
index dea0585dcaf72fd2d61ecd323a4e5172d208266e..3b87d080aace5a15884892ef9cec913a4204c4c1 100644 (file)
@@ -3,7 +3,6 @@
 OPTIONS/iokitcpp                                       optional iokitcpp
 OPTIONS/kdebug                                         optional kdebug
 OPTIONS/networking                                     optional networking
 OPTIONS/iokitcpp                                       optional iokitcpp
 OPTIONS/kdebug                                         optional kdebug
 OPTIONS/networking                                     optional networking
-OPTIONS/kpidirect                                      optional kpidirect
 OPTIONS/hibernation                                    optional hibernation
 OPTIONS/crypto                                         optional crypto
 OPTIONS/config_dtrace                          optional config_dtrace
 OPTIONS/hibernation                                    optional hibernation
 OPTIONS/crypto                                         optional crypto
 OPTIONS/config_dtrace                          optional config_dtrace
index f0f7e3df4b260f92914557b18fdd333e90469f31..8da2cc9b22cce7300b5e202799a6ab2dc39cc99f 100644 (file)
--- a/kgmacros
+++ b/kgmacros
@@ -5383,3 +5383,169 @@ document showMCAstate
 Syntax: showMCAstate
 | Print machine-check register state after MC exception.
 end
 Syntax: showMCAstate
 | Print machine-check register state after MC exception.
 end
+
+define _pt_step
+    #
+    # Step to lower-level page table and print attributes
+    #   $kgm_pt_paddr: current page table entry physical address
+    #   $kgm_pt_index: current page table entry index (0..511)
+    # returns
+    #   $kgm_pt_paddr: next level page table entry physical address
+    #                  or null if invalid
+    # For $kgm_pt_verbose = 0: print nothing
+    #                       1: print basic information
+    #                       2: print basic information and hex table dump
+    # The trickery with kdp_src_high32 is required for accesses above 4GB.
+    #
+    set $kgm_entryp = $kgm_pt_paddr + 8*$kgm_pt_index
+    set kdp_src_high32 = $kgm_pt_paddr >> 32
+    set kdp_trans_off = 1
+    set $entry =  *(pt_entry_t *)($kgm_entryp & 0x0ffffffffULL)
+    if $kgm_pt_verbose == 2
+        x/512g ($kgm_pt_paddr & 0x0ffffffffULL)
+    end
+    set kdp_trans_off = 0
+    set kdp_src_high32 = 0
+    set $kgm_paddr_mask = ~((0xffffULL<<48) | 0xfffULL)
+    if $kgm_pt_verbose == 0
+        if $entry & (0x1 << 0)
+            set $kgm_pt_paddr = $entry & $kgm_paddr_mask
+        else
+            set $kgm_pt_paddr = 0
+        end
+    else
+        printf "0x%016llx:\n\t0x%016llx\n\t", $kgm_entryp, $entry
+        if $entry & (0x1 << 0)
+            printf "valid"     
+            set $kgm_pt_paddr = $entry & $kgm_paddr_mask
+        else
+            printf "invalid"
+            set $kgm_pt_paddr = 0
+        end
+        if $entry & (0x1 << 1)
+            printf " writeable" 
+        else
+            printf " read-only" 
+        end
+        if $entry & (0x1 << 2)
+            printf " user" 
+        else
+            printf " supervisor" 
+        end
+        if $entry & (0x1 << 3)
+            printf " PWT" 
+        end
+        if $entry & (0x1 << 4)
+            printf " PCD" 
+        end
+        if $entry & (0x1 << 5)
+            printf " accessed" 
+        end
+        if $entry & (0x1 << 6)
+            printf " dirty" 
+        end
+        if $entry & (0x1 << 7)
+            printf " PAT" 
+        end
+        if $entry & (0x1 << 8)
+            printf " global" 
+        end
+        if $entry & (0x3 << 9)
+            printf " avail:0x%x", ($entry >> 9) & 0x3
+        end
+        if $entry & (0x1 << 63)
+            printf " noexec" 
+        end
+        printf "\n"
+    end
+end
+
+define _pmap_walk
+    set $kgm_pmap = (pmap_t) $arg0
+    set $kgm_vaddr = $arg1
+    set $kgm_pt_paddr = $kgm_pmap->pm_cr3
+    if $kgm_pt_paddr && cpu_64bit
+        set $kgm_pt_index = ($kgm_vaddr >> 39) & 0x1ffULL
+        if $kgm_pt_verbose
+            printf "pml4 (index %d):\n", $kgm_pt_index
+        end
+        _pt_step
+    end
+    if $kgm_pt_paddr
+        set $kgm_pt_index = ($kgm_vaddr >> 30) & 0x1ffULL
+        if $kgm_pt_verbose
+            printf "pdpt (index %d):\n", $kgm_pt_index
+        end
+        _pt_step
+    end
+    if $kgm_pt_paddr
+        set $kgm_pt_index = ($kgm_vaddr >> 21) & 0x1ffULL
+        if $kgm_pt_verbose
+            printf "pdt (index %d):\n", $kgm_pt_index
+        end
+        _pt_step
+    end
+    if $kgm_pt_paddr
+        set $kgm_pt_index = ($kgm_vaddr >> 12) & 0x1ffULL
+        if $kgm_pt_verbose
+            printf "pt (index %d):\n", $kgm_pt_index
+        end
+        _pt_step
+    end
+    if $kgm_pt_paddr
+        set $kgm_paddr = $kgm_pt_paddr + ($kgm_vaddr & 0xfffULL)
+        set kdp_trans_off = 1
+        set kdp_src_high32 = $kgm_paddr >> 32
+        set $kgm_value = *($kgm_paddr & 0x0ffffffffULL)
+        set kdp_trans_off = 0
+        set kdp_src_high32 = 0
+        printf "phys 0x%016llx: 0x%08x\n", $kgm_paddr, $kgm_value
+    else
+        set $kgm_paddr = 0
+        printf "(no translation)\n"
+    end
+end
+
+define pmap_walk
+    if $kgm_mtype != 7
+        printf "Not available for current architecture.\n"
+    else
+        if $argc != 2
+            printf "pmap_walk <pmap> <vaddr>\n"
+        else
+            if !$kgm_pt_verbose
+                set $kgm_pt_verbose = 1
+            else
+                if $kgm_pt_verbose != 2
+                    set $kgm_pt_verbose = 1
+                end
+            end
+            _pmap_walk $arg0 $arg1
+        end
+    end
+end
+
+document pmap_walk
+Syntax: (gdb) pmap_walk <pmap> <virtual_address>
+| Perform a page-table walk in <pmap> for <virtual_address>.
+| Set $kgm_pt_verbose=2 for full hex dump of page tables.
+end
+
+define pmap_vtop
+    if $kgm_mtype != 7
+        printf "Not available for current architecture.\n"
+    else
+        if $argc != 2
+            printf "pmap_vtop <pamp> <vaddr>\n"
+        else
+            set $kgm_pt_verbose = 0
+            _pmap_walk $arg0 $arg1
+        end
+    end
+end
+
+document pmap_vtop
+Syntax: (gdb) pmap_vtop <pmap> <virtual_address>
+| For page-tables in <pmap> translate <virtual_address> to physical address.
+end
+
index 9b193ef21e43af5f2af50f69644af8c18b686ebd..ab642795b8be35c0845b16975e090bcc8bd73a05 100644 (file)
@@ -43,7 +43,7 @@ MAKEOBJDIR ?= ${OBJROOT}
 # add version string
 SRCS += libsyscall_version.c
 libsyscall_version.c:
 # add version string
 SRCS += libsyscall_version.c
 libsyscall_version.c:
-       /Developer/Makefiles/bin/version.pl Libsyscall > $@
+       ${NEXT_ROOT}/Developer/Makefiles/bin/version.pl Libsyscall > $@
 
 CFLAGS += -I${SYMROOT}
 .include "${.CURDIR}/Makefile.inc"
 
 CFLAGS += -I${SYMROOT}
 .include "${.CURDIR}/Makefile.inc"
index 62721adbded3f399cbf79d32ce8f7a3def2ebfdd..4b3d8d543e91dbb7ab11160ca6a122996776a021 100644 (file)
@@ -94,9 +94,15 @@ PRIVHDRSPPC = ${PRIVHDRS}/architecture/ppc
 KERNELFRAMEWORK = ${DESTDIR}/System/Library/Frameworks/Kernel.framework
 PRIVKERNELHDRS = ${KERNELFRAMEWORK}/Versions/A/PrivateHeaders
 
 KERNELFRAMEWORK = ${DESTDIR}/System/Library/Frameworks/Kernel.framework
 PRIVKERNELHDRS = ${KERNELFRAMEWORK}/Versions/A/PrivateHeaders
 
+.if ${MACHINE_ARCH} == armv6
+ARCHDIR = arm
+.else
+ARCHDIR = ${MACHINE_ARCH}
+.endif
+
 installhdrs-md: gen_md_mig_defs
 installhdrs-md: gen_md_mig_defs
-       mkdir -p ${INCDIR}/mach/${MACHINE_ARCH}
-       ${INSTALL} -o 0 -c -m 444 ${MD_MIGHDRS} ${INCDIR}/mach/${MACHINE_ARCH}
+       mkdir -p ${INCDIR}/mach/${ARCHDIR}
+       ${INSTALL} -o 0 -c -m 444 ${MD_MIGHDRS} ${INCDIR}/mach/${ARCHDIR}
        mkdir -p ${PRIVHDRSPPC}
        ${INSTALL} -c -m 444 ${PRIVHDRSPPCHDRS} ${PRIVHDRSPPC}
 
        mkdir -p ${PRIVHDRSPPC}
        ${INSTALL} -c -m 444 ${PRIVHDRSPPCHDRS} ${PRIVHDRSPPC}
 
index f54f344ed8d1462ebba06466ff057fa1566945ea..83bf17c1f40ea3f03014a2d8b4c9973a3dbecd49 100755 (executable)
@@ -102,7 +102,7 @@ my %TypeBytes = (
 
 ##########################################################################
 # Make a __xxx.s file: if it exists in the $CustomDir, just copy it, otherwise
 
 ##########################################################################
 # Make a __xxx.s file: if it exists in the $CustomDir, just copy it, otherwise
-# create one.  We define the macro __SYSCALL_I386_ARG_BYTES so that SYS.h could
+# create one.  We define the macro __SYSCALL_32BIT_ARG_BYTES so that SYS.h could
 # use that to define __SYSCALL dependent on the arguments' total size.
 ##########################################################################
 sub make_s {
 # use that to define __SYSCALL dependent on the arguments' total size.
 ##########################################################################
 sub make_s {
@@ -119,7 +119,7 @@ sub make_s {
     } else {
        my $f = IO::File->new($path, 'w');
        die "$MyName: $path: $!\n" unless defined($f);
     } else {
        my $f = IO::File->new($path, 'w');
        die "$MyName: $path: $!\n" unless defined($f);
-       print $f "#define __SYSCALL_I386_ARG_BYTES $bytes\n\n";
+       print $f "#define __SYSCALL_32BIT_ARG_BYTES $bytes\n\n";
        print $f "#include \"SYS.h\"\n\n";
        print $f "__SYSCALL($pseudo, $name, $args)\n";
        print "Creating $path\n";
        print $f "#include \"SYS.h\"\n\n";
        print $f "__SYSCALL($pseudo, $name, $args)\n";
        print "Creating $path\n";
index 53039d9e3e728e099a10f37a56153c65d8218a35..af9074020ffef70bbcf9a1ec935478c94f0de47d 100644 (file)
@@ -138,14 +138,14 @@ LEAF(_##name, 0)                                  ;\
        BRANCH_EXTERN(cerror)                           ;\
 2:
 
        BRANCH_EXTERN(cerror)                           ;\
 2:
 
-#if defined(__SYSCALL_I386_ARG_BYTES) && ((__SYSCALL_I386_ARG_BYTES >= 4) && (__SYSCALL_I386_ARG_BYTES <= 20))
+#if defined(__SYSCALL_32BIT_ARG_BYTES) && ((__SYSCALL_32BIT_ARG_BYTES >= 4) && (__SYSCALL_32BIT_ARG_BYTES <= 20))
 #define UNIX_SYSCALL_NONAME(name, nargs)                       \
 #define UNIX_SYSCALL_NONAME(name, nargs)                       \
-       movl    $(SYS_##name | (__SYSCALL_I386_ARG_BYTES << I386_SYSCALL_ARG_BYTES_SHIFT)), %eax                ;\
+       movl    $(SYS_##name | (__SYSCALL_32BIT_ARG_BYTES << I386_SYSCALL_ARG_BYTES_SHIFT)), %eax               ;\
        UNIX_SYSCALL_SYSENTER                                   ;\
        jnb     2f                                              ;\
        BRANCH_EXTERN(cerror)                                   ;\
 2:
        UNIX_SYSCALL_SYSENTER                                   ;\
        jnb     2f                                              ;\
        BRANCH_EXTERN(cerror)                                   ;\
 2:
-#else /* __SYSCALL_I386_ARG_BYTES < 4 || > 20 */
+#else /* __SYSCALL_32BIT_ARG_BYTES < 4 || > 20 */
 #define UNIX_SYSCALL_NONAME(name, nargs)               \
        .globl  cerror                                  ;\
        movl    $ SYS_##name, %eax                      ;\
 #define UNIX_SYSCALL_NONAME(name, nargs)               \
        .globl  cerror                                  ;\
        movl    $ SYS_##name, %eax                      ;\
index 4efe509ce606f8f7855737aecbafbb827b4355d5..40048e71ee8b578ba8a7dd5515a304d4c28bef43 100644 (file)
@@ -1,6 +1,11 @@
 # machine-dependent mach sources
 # machine-dependent mach sources
-.if exists(${.CURDIR}/mach/${MACHINE_ARCH}/Makefile.inc)
-.include "${.CURDIR}/mach/${MACHINE_ARCH}/Makefile.inc"
+.if ${MACHINE_ARCH} == armv6
+ARCHDIR = arm
+.else
+ARCHDIR = ${MACHINE_ARCH}
+.endif
+.if exists(${.CURDIR}/mach/${ARCHDIR}/Makefile.inc)
+.include "${.CURDIR}/mach/${ARCHDIR}/Makefile.inc"
 .endif
 
 .PATH: ${.CURDIR}/mach
 .endif
 
 .PATH: ${.CURDIR}/mach
index b08213bc0f27992a2fdefe4b974a32bc1df23f65..0d4989a32099eeab41c0853bcfff146663d45fa2 100644 (file)
@@ -84,47 +84,10 @@ $(error There were $(words $(KERNEL_CONFIG)) parameters passed to KERNEL_CONFIG
                Are you sure? To specify multiple configurations please use KERNEL_CONFIGS)
 endif
 
                Are you sure? To specify multiple configurations please use KERNEL_CONFIGS)
 endif
 
-#
-# Machine Configuration options  
-#
-# ppc supported configurations : none
-# i386 supported configurations : none
-# arm supported configurations : LN2410SBC MX31ADS INTEGRATORCP S5I3000SMDK S5L8900XFPGA S5L8900XRB OLOCREEK
-#
-ifndef SUPPORTED_MACHINE_CONFIGS
-export SUPPORTED_MACHINE_CONFIGS = LN2410SBC MX31ADS INTEGRATORCP S5I3000SMDK S5L8900XFPGA S5L8900XRB OLOCREEK DEFAULT
-endif
-
-export DEFAULT_ARM_MACHINE_CONFIG      = S5L8900XRB
-
 ifndef MACHINE_CONFIG
 export MACHINE_CONFIG  = DEFAULT
 endif
 
 ifndef MACHINE_CONFIG
 export MACHINE_CONFIG  = DEFAULT
 endif
 
-ifndef MACHINE_FLAGS_LN2410SBC
-export MACHINE_FLAGS_LN2410SBC = -DARM_BOARD_CONFIG_LN2410_920T
-endif
-ifndef MACHINE_FLAGS_MX31ADS
-export MACHINE_FLAGS_MX31ADS = -DARM_BOARD_CONFIG_MX31ADS_1136JFS
-endif
-ifndef MACHINE_FLAGS_INTEGRATORCP
-export MACHINE_FLAGS_INTEGRATORCP = -DARM_BOARD_CONFIG_INTEGRATORCP_1136JFS
-endif
-ifndef MACHINE_FLAGS_S5I3000SMDK
-export MACHINE_FLAGS_S5I3000SMDK = -DARM_BOARD_CONFIG_S5I3000SMDK_1176JZFS
-endif
-ifndef MACHINE_FLAGS_S5L8900XFPGA
-export MACHINE_FLAGS_S5L8900XFPGA = -DARM_BOARD_CONFIG_S5L8900XFPGA_1136JFS
-endif
-ifndef MACHINE_FLAGS_S5L8900XRB
-export MACHINE_FLAGS_S5L8900XRB = -DARM_BOARD_CONFIG_S5L8900XRB
-endif
-ifndef MACHINE_FLAGS_OLOCREEK
-export MACHINE_FLAGS_OLOCREEK = -DARM_BOARD_CONFIG_OLOCREEK
-endif
-ifndef MACHINE_FLAGS_DEFAULT
-export MACHINE_FLAGS_DEFAULT =
-endif
 
 #
 # Target configuration options.  NOTE - target configurations will 
 
 #
 # Target configuration options.  NOTE - target configurations will 
@@ -234,13 +197,6 @@ ARCH_FLAGS_PPC               = -arch ppc
 ARCH_FLAGS_I386                  = -arch i386
 ARCH_FLAGS_ARM           = $($(addsuffix $(MACHINE_CONFIG),ARCH_FLAGS_ARM_))
 
 ARCH_FLAGS_I386                  = -arch i386
 ARCH_FLAGS_ARM           = $($(addsuffix $(MACHINE_CONFIG),ARCH_FLAGS_ARM_))
 
-ARCH_FLAGS_ARM_LN2410SBC         = -arch arm
-ARCH_FLAGS_ARM_MX31ADS           = -arch armv6
-ARCH_FLAGS_ARM_INTEGRATORCP      = -arch armv6
-ARCH_FLAGS_ARM_S5I3000SMDK       = -arch armv6
-ARCH_FLAGS_ARM_S5L8900XFPGA      = -arch armv6
-ARCH_FLAGS_ARM_S5L8900XRB        = -arch armv6
-ARCH_FLAGS_ARM_OLOCREEK                  = -arch arm
 
 #
 # Default CFLAGS
 
 #
 # Default CFLAGS
@@ -290,6 +246,12 @@ endif
 ifeq (-arch armv6,$(ARCH_FLAGS_ARM))
 CFLAGS_ARM             += -mthumb
 endif
 ifeq (-arch armv6,$(ARCH_FLAGS_ARM))
 CFLAGS_ARM             += -mthumb
 endif
+ifeq (-arch armv5,$(ARCH_FLAGS_ARM))
+CFLAGS_ARM             += -mthumb
+endif
+ifeq (-arch xscale,$(ARCH_FLAGS_ARM))
+CFLAGS_ARM             += -mthumb
+endif
 
 export CFLAGS_RELEASEPPC = -O2 -mcpu=750 -mmultiple
 export CFLAGS_RELEASE_TRACEPPC = -O2 -mcpu=750 -mmultiple
 
 export CFLAGS_RELEASEPPC = -O2 -mcpu=750 -mmultiple
 export CFLAGS_RELEASE_TRACEPPC = -O2 -mcpu=750 -mmultiple
index 01731eae512f17973515805f2de6067eb25100d6..5eb745c8764b28d59950ff1a092d4f2c16060e13 100644 (file)
@@ -58,5 +58,9 @@ options               PAE
 options                X86_64
 options                DISPATCH_COUNTS
 
 options                X86_64
 options                DISPATCH_COUNTS
 
+#
+# Note: MAC options must be set in all the bsd/conf, osfmk/conf, and 
+# security/conf MASTER files.
+#
 options                CONFIG_MACF             # Mandatory Access Control Framework
 #options       CONFIG_MACF_MACH        # MACF applied to Mach services
 options                CONFIG_MACF             # Mandatory Access Control Framework
 #options       CONFIG_MACF_MACH        # MACF applied to Mach services
index d655eea9eb459fb3c44af5db9343981434606870..09dfbf8ee20e6d2fc66284803313c730417ac65e 100644 (file)
@@ -58,5 +58,9 @@ options               POWERMAC
 
 options                DISPATCH_COUNTS
 
 
 options                DISPATCH_COUNTS
 
+#
+# Note: MAC options must be set in all the bsd/conf, osfmk/conf, and 
+# security/conf MASTER files.
+#
 options                CONFIG_MACF             # Mandatory Access Control Framework
 #options       CONFIG_MACF_MACH        # MACF applied to Mach services
 options                CONFIG_MACF             # Mandatory Access Control Framework
 #options       CONFIG_MACF_MACH        # MACF applied to Mach services
index eb86e791ea75c70b5e8bdfcfe11e058e79e3575e..6743dc70bf83ea8bd428a79a0aee71bfca166cf0 100644 (file)
@@ -670,11 +670,13 @@ panic_io_port_read(void) {
 /* For use with the MP rendezvous mechanism
  */
 
 /* For use with the MP rendezvous mechanism
  */
 
+#if !CONFIG_EMBEDDED
 static void
 machine_halt_cpu(__unused void *arg) {
        panic_io_port_read();
        pmCPUHalt(PM_HALT_DEBUG);
 }
 static void
 machine_halt_cpu(__unused void *arg) {
        panic_io_port_read();
        pmCPUHalt(PM_HALT_DEBUG);
 }
+#endif
 
 void
 Debugger(
 
 void
 Debugger(
index 539a82fde9ad53133d19ec6bd0bbb138c66f507c..64c21447ee624d97d1b02db5b7121f19c86a5590 100644 (file)
@@ -173,7 +173,11 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon)
        acpi_sleep_cpu(func, refcon);
 #endif
 
        acpi_sleep_cpu(func, refcon);
 #endif
 
-       /* reset UART if kprintf is enabled */
+       /* Reset UART if kprintf is enabled.
+        * However kprintf should not be used before rtc_sleep_wakeup()
+        * for compatibility with firewire kprintf.
+        */
+
        if (FALSE == disable_serial_output)
                serial_init();
 
        if (FALSE == disable_serial_output)
                serial_init();
 
index 52c70d903079b8131928214473641bca8a339ba7..5ae9621d52d8dbc20d304fdf936dde3921c17ccf 100644 (file)
@@ -280,24 +280,6 @@ hpet_init(void)
        DBG(" CVT: HPET to BUS = %08X.%08X\n",
            (uint32_t)(hpet2bus >> 32), (uint32_t)hpet2bus);
 
        DBG(" CVT: HPET to BUS = %08X.%08X\n",
            (uint32_t)(hpet2bus >> 32), (uint32_t)hpet2bus);
 
-       /* Make sure the counter is off in the HPET configuration flags */
-       uint64_t hpetcon = ((hpetReg_t *)hpetArea)->GEN_CONF;
-       hpetcon = hpetcon & ~1;
-       ((hpetReg_t *)hpetArea)->GEN_CONF = hpetcon;
-
-       /*
-        * Convert current TSC to HPET value,
-        * set it, and start it ticking.
-        */
-       uint64_t currtsc = rdtsc64();
-       uint64_t tscInHPET = tmrCvt(currtsc, tsc2hpet);
-       ((hpetReg_t *)hpetArea)->MAIN_CNT = tscInHPET;
-       hpetcon = hpetcon | 1;
-       ((hpetReg_t *)hpetArea)->GEN_CONF = hpetcon;
-       kprintf("HPET started: TSC = %08X.%08X, HPET = %08X.%08X\n", 
-               (uint32_t)(currtsc >> 32), (uint32_t)currtsc,
-               (uint32_t)(tscInHPET >> 32), (uint32_t)tscInHPET);
-
 #if MACH_KDB
        db_display_hpet((hpetReg_t *)hpetArea); /* (BRINGUP) */
 #endif
 #if MACH_KDB
        db_display_hpet((hpetReg_t *)hpetArea); /* (BRINGUP) */
 #endif
@@ -317,8 +299,13 @@ hpet_get_info(hpetInfo_t *info)
     info->hpet2tsc   = hpet2tsc;
     info->bus2hpet   = bus2hpet;
     info->hpet2bus   = hpet2bus;
     info->hpet2tsc   = hpet2tsc;
     info->bus2hpet   = bus2hpet;
     info->hpet2bus   = hpet2bus;
-    info->rcbaArea   = rcbaArea;
-    info->rcbaAreap  = rcbaAreap;
+    /*
+     * XXX
+     * We're repurposing the rcbaArea so we can use the HPET.
+     * Eventually we'll rename this correctly.
+     */
+    info->rcbaArea   = hpetArea;
+    info->rcbaAreap  = hpetAreap;
 }
 
 
 }
 
 
index 8abf223f975612d1ec7ccbc3a9700b81ba8dcc30..214a588b75fdd7542a038493466fd19238952a67 100644 (file)
@@ -308,11 +308,18 @@ mca_dump(void)
 {
        ia32_mcg_status_t       status;
 
 {
        ia32_mcg_status_t       status;
 
-       mca_exception_taken = TRUE;
        mca_save_state();
 
        mca_save_state();
 
-       /* Serialize in case of multiple simultaneous machine-checks */
+       /*
+        * Serialize in case of multiple simultaneous machine-checks.
+        * Only the first caller is allowed to print MCA registers.
+        */
        simple_lock(&mca_lock);
        simple_lock(&mca_lock);
+       if (mca_exception_taken) {
+               simple_unlock(&mca_lock);
+               return;
+       }
+       mca_exception_taken = TRUE;
 
        /*
         * Report machine-check capabilities:
 
        /*
         * Report machine-check capabilities:
index 8d9036a15e97939900af2a7c87b881d613eae752..93d45455d07cf6f655febdeafb2f1b8c9ce3b59e 100644 (file)
@@ -126,7 +126,7 @@ extern void rtc_clock_stepping(
 extern void    rtc_clock_stepped(
                        uint32_t new_frequency,
                        uint32_t old_frequency);
 extern void    rtc_clock_stepped(
                        uint32_t new_frequency,
                        uint32_t old_frequency);
-extern void    rtc_clock_napped(uint64_t);
+extern void    rtc_clock_napped(uint64_t, uint64_t);
 
 extern void     x86_lowmem_free(void);
 
 
 extern void     x86_lowmem_free(void);
 
index 12a071c2ccb87aeaac1fc854fca617951c348c83..75bbe25cf81c27614e5e01648f8d09f8e01ed301 100644 (file)
@@ -457,8 +457,11 @@ fast_syscall_init64(void)
         */
        wrmsr64(MSR_IA32_KERNEL_GS_BASE,
                UBER64((unsigned long)current_cpu_datap()));
         */
        wrmsr64(MSR_IA32_KERNEL_GS_BASE,
                UBER64((unsigned long)current_cpu_datap()));
+
+#if ONLY_SAFE_FOR_LINDA_SERIAL
        kprintf("fast_syscall_init64() KERNEL_GS_BASE=0x%016llx\n",
                rdmsr64(MSR_IA32_KERNEL_GS_BASE));
        kprintf("fast_syscall_init64() KERNEL_GS_BASE=0x%016llx\n",
                rdmsr64(MSR_IA32_KERNEL_GS_BASE));
+#endif
 }
 
 /*
 }
 
 /*
@@ -725,7 +728,9 @@ cpu_desc_load64(cpu_data_t *cdp)
        
        ml_load_desc64();
 
        
        ml_load_desc64();
 
+#if ONLY_SAFE_FOR_LINDA_SERIAL
        kprintf("64-bit descriptor tables loaded\n");
        kprintf("64-bit descriptor tables loaded\n");
+#endif
 }
 
 void
 }
 
 void
index 36dae2f3ef138edd719cd43106ceea56865a6a75..72ecf5f761a771085e94ed83c0f383deb23f8530 100644 (file)
@@ -221,7 +221,11 @@ void dump_4GB_pdpt_thread(thread_t tp);
 #define        iswired(pte)    ((pte) & INTEL_PTE_WIRED)
 
 int nx_enabled = 1;                    /* enable no-execute protection */
 #define        iswired(pte)    ((pte) & INTEL_PTE_WIRED)
 
 int nx_enabled = 1;                    /* enable no-execute protection */
+#ifdef CONFIG_EMBEDDED
+int allow_data_exec  = 0;      /* no exec from data, embedded is hardcore like that */
+#else
 int allow_data_exec  = VM_ABI_32;      /* 32-bit apps may execute data by default, 64-bit apps may not */
 int allow_data_exec  = VM_ABI_32;      /* 32-bit apps may execute data by default, 64-bit apps may not */
+#endif
 int allow_stack_exec = 0;              /* No apps may execute from the stack by default */
 
 int cpu_64bit  = 0;
 int allow_stack_exec = 0;              /* No apps may execute from the stack by default */
 
 int cpu_64bit  = 0;
index bad2abbe78eea436909336b8c76c54a5f1cd1a6c..a1784f3bfcc9dc2d4116ba74807a08e921366e46 100644 (file)
@@ -107,6 +107,28 @@ extern uint64_t            _rtc_nanotime_read(
 rtc_nanotime_t rtc_nanotime_info = {0,0,0,0,1,0};
 
 
 rtc_nanotime_t rtc_nanotime_info = {0,0,0,0,1,0};
 
 
+/*
+ * tsc_to_nanoseconds:
+ *
+ * Basic routine to convert a raw 64 bit TSC value to a
+ * 64 bit nanosecond value.  The conversion is implemented
+ * based on the scale factor and an implicit 32 bit shift.
+ */
+static inline uint64_t
+_tsc_to_nanoseconds(uint64_t value)
+{
+    asm volatile("movl %%edx,%%esi     ;"
+                "mull  %%ecx           ;"
+                "movl  %%edx,%%edi     ;"
+                "movl  %%esi,%%eax     ;"
+                "mull  %%ecx           ;"
+                "addl  %%edi,%%eax     ;"      
+                "adcl  $0,%%edx         "
+                : "+A" (value) : "c" (rtc_nanotime_info.scale) : "esi", "edi");
+
+    return (value);
+}
+
 static uint32_t
 deadline_to_decrementer(
        uint64_t        deadline,
 static uint32_t
 deadline_to_decrementer(
        uint64_t        deadline,
@@ -234,26 +256,31 @@ rtc_nanotime_read(void)
 /*
  * rtc_clock_napped:
  *
 /*
  * rtc_clock_napped:
  *
- * Invoked from power manangement when we have awoken from a nap (C3/C4)
- * during which the TSC lost counts.  The nanotime data is updated according
- * to the provided value which indicates the number of nanoseconds that the
- * TSC was not counting.
- *
- * The caller must guarantee non-reentrancy.
+ * Invoked from power management when we exit from a low C-State (>= C4)
+ * and the TSC has stopped counting.  The nanotime data is updated according
+ * to the provided value which represents the new value for nanotime.
  */
 void
  */
 void
-rtc_clock_napped(
-       uint64_t                delta)
+rtc_clock_napped(uint64_t base, uint64_t tsc_base)
 {
        rtc_nanotime_t  *rntp = &rtc_nanotime_info;
 {
        rtc_nanotime_t  *rntp = &rtc_nanotime_info;
-       uint32_t        generation;
+       uint64_t        oldnsecs;
+       uint64_t        newnsecs;
+       uint64_t        tsc;
 
        assert(!ml_get_interrupts_enabled());
 
        assert(!ml_get_interrupts_enabled());
-       generation = rntp->generation;
-       rntp->generation = 0;
-       rntp->ns_base += delta;
-       rntp->generation = ((generation + 1) != 0) ? (generation + 1) : 1;
-       rtc_nanotime_set_commpage(rntp);
+       tsc = rdtsc64();
+       oldnsecs = rntp->ns_base + _tsc_to_nanoseconds(tsc - rntp->tsc_base);
+       newnsecs = base + _tsc_to_nanoseconds(tsc - tsc_base);
+       
+       /*
+        * Only update the base values if time using the new base values
+        * is later than the time using the old base values.
+        */
+       if (oldnsecs < newnsecs) {
+           _rtc_nanotime_store(tsc_base, base, rntp->scale, rntp->shift, rntp);
+           rtc_nanotime_set_commpage(rntp);
+       }
 }
 
 void
 }
 
 void
index 0998530b133c5ac35168fd235fc10fde88576c45..b252c496adba2e4767cfcda56f7b82d82ec9ddb5 100644 (file)
@@ -115,7 +115,9 @@ cpu_IA32e_enable(cpu_data_t *cdp)
                : "i" (CR0_PG)
                : "eax" );
        
                : "i" (CR0_PG)
                : "eax" );
        
+#if ONLY_SAFE_FOR_LINDA_SERIAL
        kprintf("cpu_IA32e_enable(%p)\n", cdp);
        kprintf("cpu_IA32e_enable(%p)\n", cdp);
+#endif
 
        if ((rdmsr64(MSR_IA32_EFER) & MSR_IA32_EFER_LMA) == 0)
                panic("cpu_IA32e_enable() MSR_IA32_EFER_LMA not asserted");
 
        if ((rdmsr64(MSR_IA32_EFER) & MSR_IA32_EFER_LMA) == 0)
                panic("cpu_IA32e_enable() MSR_IA32_EFER_LMA not asserted");
index 85a82e8804aa8fdfa5726ea826e02d9fb5990b85..badd5491f21873ccf374850ee38bb1233aec18e3 100644 (file)
@@ -79,7 +79,7 @@
 #include <i386/eflags.h>
 
 /*
 #include <i386/eflags.h>
 
 /*
- *     i386_saved_state:
+ *     x86_saved_state32/64:
  *
  *     Has been exported to servers.  See: mach/i386/thread_status.h
  *
  *
  *     Has been exported to servers.  See: mach/i386/thread_status.h
  *
index 724bb0f9f7332c2ed8444556f335c0f49afb2f0e..19b7469a694cf38327e495de4d922f581eee6583 100644 (file)
@@ -160,13 +160,16 @@ tsc_init(void)
         * Get the TSC increment.  The TSC is incremented by this
         * on every bus tick.  Calculate the TSC conversion factors
         * to and from nano-seconds.
         * Get the TSC increment.  The TSC is incremented by this
         * on every bus tick.  Calculate the TSC conversion factors
         * to and from nano-seconds.
+        * The tsc granularity is also called the "bus ratio". If the N/2 bit
+        * is set this indicates the bus ration is 0.5 more than this - i.e.
+        * that the true bus ratio is (2*tscGranularity + 1)/2.
         */
        if (cpuid_info()->cpuid_family == CPU_FAMILY_PENTIUM_M) {
                uint64_t        prfsts;
 
                prfsts = rdmsr64(IA32_PERF_STS);
                tscGranularity = (uint32_t)bitfield(prfsts, 44, 40);
         */
        if (cpuid_info()->cpuid_family == CPU_FAMILY_PENTIUM_M) {
                uint64_t        prfsts;
 
                prfsts = rdmsr64(IA32_PERF_STS);
                tscGranularity = (uint32_t)bitfield(prfsts, 44, 40);
-               N_by_2_bus_ratio = prfsts & bit(46);
+               N_by_2_bus_ratio = (prfsts & bit(46)) != 0;
 
        } else {
                panic("rtclock_init: unknown CPU family: 0x%X\n",
 
        } else {
                panic("rtclock_init: unknown CPU family: 0x%X\n",
@@ -174,20 +177,20 @@ tsc_init(void)
        }
 
        if (N_by_2_bus_ratio)
        }
 
        if (N_by_2_bus_ratio)
-               tscFCvtt2n = busFCvtt2n * 2 / (uint64_t)tscGranularity;
+               tscFCvtt2n = busFCvtt2n * 2 / (1 + 2*tscGranularity);
        else
        else
-               tscFCvtt2n = busFCvtt2n / (uint64_t)tscGranularity;
+               tscFCvtt2n = busFCvtt2n / tscGranularity;
 
        tscFreq = ((1 * Giga)  << 32) / tscFCvtt2n;
        tscFCvtn2t = 0xFFFFFFFFFFFFFFFFULL / tscFCvtt2n;
 
        kprintf(" TSC: Frequency = %6d.%04dMHz, "
 
        tscFreq = ((1 * Giga)  << 32) / tscFCvtt2n;
        tscFCvtn2t = 0xFFFFFFFFFFFFFFFFULL / tscFCvtt2n;
 
        kprintf(" TSC: Frequency = %6d.%04dMHz, "
-                       "cvtt2n = %08X.%08X, cvtn2t = %08X.%08X, gran = %lld\n",
+                       "cvtt2n = %08X.%08X, cvtn2t = %08X.%08X, gran = %lld%s\n",
                        (uint32_t)(tscFreq / Mega),
                        (uint32_t)(tscFreq % Mega), 
                        (uint32_t)(tscFCvtt2n >> 32), (uint32_t)tscFCvtt2n,
                        (uint32_t)(tscFCvtn2t >> 32), (uint32_t)tscFCvtn2t,
                        (uint32_t)(tscFreq / Mega),
                        (uint32_t)(tscFreq % Mega), 
                        (uint32_t)(tscFCvtt2n >> 32), (uint32_t)tscFCvtt2n,
                        (uint32_t)(tscFCvtn2t >> 32), (uint32_t)tscFCvtn2t,
-                       tscGranularity);
+                       tscGranularity, N_by_2_bus_ratio ? " (N/2)" : "");
 
        /*
         * Calculate conversion from BUS to TSC
 
        /*
         * Calculate conversion from BUS to TSC
index 84674a9901df498de5daaab1cb0b15665e288b30..29eebc4a8523d2aa59374598f7a5a21d8903af57 100644 (file)
@@ -53,8 +53,6 @@ extern int setPop(uint64_t time);
 
 extern void etimer_resync_deadlines(void);
 
 
 extern void etimer_resync_deadlines(void);
 
-extern uint32_t rtclock_tick_interval;
-
 #if 0 /* this is currently still MD */
 #pragma pack(push,4)
 struct rtclock_timer_t  {
 #if 0 /* this is currently still MD */
 #pragma pack(push,4)
 struct rtclock_timer_t  {
index 064a58252f081420534e3223fc1ed42272955c20..5f9d4d80ae7ce6dcee66094c8aacdf5c27b941e6 100644 (file)
@@ -561,25 +561,20 @@ lck_mtx_lock_wait (
        priority = self->sched_pri;
        if (priority < self->priority)
                priority = self->priority;
        priority = self->sched_pri;
        if (priority < self->priority)
                priority = self->priority;
-       if (priority > MINPRI_KERNEL)
-               priority = MINPRI_KERNEL;
-       else
        if (priority < BASEPRI_DEFAULT)
                priority = BASEPRI_DEFAULT;
 
        thread_lock(holder);
        if (mutex->lck_mtx_pri == 0)
                holder->promotions++;
        if (priority < BASEPRI_DEFAULT)
                priority = BASEPRI_DEFAULT;
 
        thread_lock(holder);
        if (mutex->lck_mtx_pri == 0)
                holder->promotions++;
-       if (holder->priority < MINPRI_KERNEL) {
-               holder->sched_mode |= TH_MODE_PROMOTED;
-               if (    mutex->lck_mtx_pri < priority   &&
+       holder->sched_mode |= TH_MODE_PROMOTED;
+       if (            mutex->lck_mtx_pri < priority   &&
                                holder->sched_pri < priority            ) {
                                holder->sched_pri < priority            ) {
-                       KERNEL_DEBUG_CONSTANT(
-                               MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
+               KERNEL_DEBUG_CONSTANT(
+                       MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
                                        holder->sched_pri, priority, (int)holder, (int)lck, 0);
 
                                        holder->sched_pri, priority, (int)holder, (int)lck, 0);
 
-                       set_sched_pri(holder, priority);
-               }
+               set_sched_pri(holder, priority);
        }
        thread_unlock(holder);
        splx(s);
        }
        thread_unlock(holder);
        splx(s);
@@ -654,15 +649,13 @@ lck_mtx_lock_acquire(
 
                thread_lock(thread);
                thread->promotions++;
 
                thread_lock(thread);
                thread->promotions++;
-               if (thread->priority < MINPRI_KERNEL) {
-                       thread->sched_mode |= TH_MODE_PROMOTED;
-                       if (thread->sched_pri < priority) {
-                               KERNEL_DEBUG_CONSTANT(
-                                       MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
+               thread->sched_mode |= TH_MODE_PROMOTED;
+               if (thread->sched_pri < priority) {
+                       KERNEL_DEBUG_CONSTANT(
+                               MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
                                                thread->sched_pri, priority, 0, (int)lck, 0);
 
                                                thread->sched_pri, priority, 0, (int)lck, 0);
 
-                               set_sched_pri(thread, priority);
-                       }
+                       set_sched_pri(thread, priority);
                }
                thread_unlock(thread);
                splx(s);
                }
                thread_unlock(thread);
                splx(s);
index 2c7b7f2549193829d853a9000ed2abbf452fda39..779855296c0bc64dcc0232e41c2563f261784c68 100644 (file)
@@ -106,9 +106,7 @@ hertz_tick(
 #endif
 {
        processor_t             processor = current_processor();
 #endif
 {
        processor_t             processor = current_processor();
-#if !GPROF
        thread_t                thread = current_thread();
        thread_t                thread = current_thread();
-#endif
        timer_t                 state;
 
        if (usermode) {
        timer_t                 state;
 
        if (usermode) {
@@ -117,8 +115,11 @@ hertz_tick(
                state = &PROCESSOR_DATA(processor, user_state);
        }
        else {
                state = &PROCESSOR_DATA(processor, user_state);
        }
        else {
-               TIMER_BUMP(&thread->system_timer, ticks);
-
+               /* If this thread is idling, do not charge that time as system time */
+               if ((thread->state & TH_IDLE) == 0) {
+                       TIMER_BUMP(&thread->system_timer, ticks);
+               }
+        
                if (processor->state == PROCESSOR_IDLE)
                        state = &PROCESSOR_DATA(processor, idle_state);
                else
                if (processor->state == PROCESSOR_IDLE)
                        state = &PROCESSOR_DATA(processor, idle_state);
                else
index 4f08fd378bf2d1e017a7febbb7d0fe4cc4e60367..6564cc97c8f34f96bce28d3fa37808891f0d08db 100644 (file)
@@ -96,7 +96,7 @@ thread_quantum_expire(
        /*
         *      Check for fail-safe trip.
         */
        /*
         *      Check for fail-safe trip.
         */
-       if (!(thread->sched_mode & TH_MODE_TIMESHARE)) {
+       if (!(thread->sched_mode & (TH_MODE_TIMESHARE|TH_MODE_PROMOTED))) {
                uint64_t                        new_computation;
 
                new_computation = processor->quantum_end;
                uint64_t                        new_computation;
 
                new_computation = processor->quantum_end;
@@ -115,7 +115,6 @@ thread_quantum_expire(
 
                        thread->safe_release = sched_tick + sched_safe_duration;
                        thread->sched_mode |= (TH_MODE_FAILSAFE|TH_MODE_TIMESHARE);
 
                        thread->safe_release = sched_tick + sched_safe_duration;
                        thread->sched_mode |= (TH_MODE_FAILSAFE|TH_MODE_TIMESHARE);
-                       thread->sched_mode &= ~TH_MODE_PREEMPT;
                }
        }
                
                }
        }
                
index 0cbde484cd7c7f8e54240934f1229e32af6ad935..e2027c0662e0f474c01245dffbdd810277da89c6 100644 (file)
@@ -150,6 +150,7 @@ void                (*pm_tick_callout)(void)        = NULL;
 void wait_queues_init(void) __attribute__((section("__TEXT, initcode")));
 
 static void load_shift_init(void) __attribute__((section("__TEXT, initcode")));
 void wait_queues_init(void) __attribute__((section("__TEXT, initcode")));
 
 static void load_shift_init(void) __attribute__((section("__TEXT, initcode")));
+static void preempt_pri_init(void) __attribute__((section("__TEXT, initcode")));
 
 static thread_t        thread_select_idle(
                                        thread_t                        thread,
 
 static thread_t        thread_select_idle(
                                        thread_t                        thread,
@@ -181,8 +182,6 @@ boolean_t   thread_runnable(
 
 #endif /*DEBUG*/
 
 
 #endif /*DEBUG*/
 
-
-
 /*
  *     State machine
  *
 /*
  *     State machine
  *
@@ -243,6 +242,7 @@ struct wait_queue wait_queues[NUMQUEUES];
        ((((int)(event) < 0)? ~(int)(event): (int)(event)) % NUMQUEUES)
 
 int8_t         sched_load_shifts[NRQS];
        ((((int)(event) < 0)? ~(int)(event): (int)(event)) % NUMQUEUES)
 
 int8_t         sched_load_shifts[NRQS];
+int                    sched_preempt_pri[NRQBM];
 
 void
 sched_init(void)
 
 void
 sched_init(void)
@@ -262,6 +262,7 @@ sched_init(void)
 
        wait_queues_init();
        load_shift_init();
 
        wait_queues_init();
        load_shift_init();
+       preempt_pri_init();
        simple_lock_init(&rt_lock, 0);
        run_queue_init(&rt_runq);
        sched_tick = 0;
        simple_lock_init(&rt_lock, 0);
        run_queue_init(&rt_runq);
        sched_tick = 0;
@@ -299,9 +300,15 @@ sched_timebase_init(void)
        /* scheduler tick interval */
        clock_interval_to_absolutetime_interval(USEC_PER_SEC >> SCHED_TICK_SHIFT,
                                                                                                        NSEC_PER_USEC, &abstime);
        /* scheduler tick interval */
        clock_interval_to_absolutetime_interval(USEC_PER_SEC >> SCHED_TICK_SHIFT,
                                                                                                        NSEC_PER_USEC, &abstime);
-       assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
        sched_tick_interval = abstime;
 
        sched_tick_interval = abstime;
 
+#if DEBUG
+       printf("Quantum: %d. Smallest quantum: %d. Min Rt/Max Rt: %d/%d."
+               " Tick: %d.\n",
+               std_quantum, min_std_quantum, min_rt_quantum, max_rt_quantum,
+               sched_tick_interval);
+#endif
+
        /*
         * Compute conversion factor from usage to
         * timesharing priorities with 5/8 ** n aging.
        /*
         * Compute conversion factor from usage to
         * timesharing priorities with 5/8 ** n aging.
@@ -343,6 +350,18 @@ load_shift_init(void)
        }
 }
 
        }
 }
 
+static void
+preempt_pri_init(void)
+{
+       int             i, *p = sched_preempt_pri;
+
+       for (i = BASEPRI_FOREGROUND + 1; i < MINPRI_KERNEL; ++i)
+               setbit(i, p);
+
+       for (i = BASEPRI_PREEMPT; i <= MAXPRI; ++i)
+               setbit(i, p);
+}
+
 /*
  *     Thread wait timer expiration.
  */
 /*
  *     Thread wait timer expiration.
  */
@@ -1200,8 +1219,8 @@ thread_select(
                                                ((queue_entry_t)thread)->next->prev = q;
                                                q->next = ((queue_entry_t)thread)->next;
                                                thread->runq = PROCESSOR_NULL;
                                                ((queue_entry_t)thread)->next->prev = q;
                                                q->next = ((queue_entry_t)thread)->next;
                                                thread->runq = PROCESSOR_NULL;
-                                               assert(thread->sched_mode & TH_MODE_PREEMPT);
                                                runq->count--; runq->urgency--;
                                                runq->count--; runq->urgency--;
+                                               assert(runq->urgency >= 0);
                                                if (queue_empty(q)) {
                                                        if (runq->highq != IDLEPRI)
                                                                clrbit(MAXPRI - runq->highq, runq->bitmap);
                                                if (queue_empty(q)) {
                                                        if (runq->highq != IDLEPRI)
                                                                clrbit(MAXPRI - runq->highq, runq->bitmap);
@@ -1916,8 +1935,9 @@ run_queue_dequeue(
 
        thread->runq = PROCESSOR_NULL;
        rq->count--;
 
        thread->runq = PROCESSOR_NULL;
        rq->count--;
-       if (thread->sched_mode & TH_MODE_PREEMPT)
-               rq->urgency--;
+       if (testbit(rq->highq, sched_preempt_pri)) {
+               rq->urgency--; assert(rq->urgency >= 0);
+       }
        if (queue_empty(queue)) {
                if (rq->highq != IDLEPRI)
                        clrbit(MAXPRI - rq->highq, rq->bitmap);
        if (queue_empty(queue)) {
                if (rq->highq != IDLEPRI)
                        clrbit(MAXPRI - rq->highq, rq->bitmap);
@@ -1971,7 +1991,6 @@ realtime_queue_insert(
        }
 
        thread->runq = RT_RUNQ;
        }
 
        thread->runq = RT_RUNQ;
-       assert(thread->sched_mode & TH_MODE_PREEMPT);
        rq->count++; rq->urgency++;
 
        simple_unlock(&rt_lock);
        rq->count++; rq->urgency++;
 
        simple_unlock(&rt_lock);
@@ -2060,7 +2079,7 @@ processor_enqueue(
                enqueue_head(queue, (queue_entry_t)thread);
 
        thread->runq = processor;
                enqueue_head(queue, (queue_entry_t)thread);
 
        thread->runq = processor;
-       if (thread->sched_mode & TH_MODE_PREEMPT)
+       if (testbit(thread->sched_pri, sched_preempt_pri))
                rq->urgency++;
        rq->count++;
 
                rq->urgency++;
        rq->count++;
 
@@ -2106,7 +2125,7 @@ processor_setrun(
        /*
         *      Set preemption mode.
         */
        /*
         *      Set preemption mode.
         */
-       if (thread->sched_mode & TH_MODE_PREEMPT)
+       if (testbit(thread->sched_pri, sched_preempt_pri))
                preempt = (AST_PREEMPT | AST_URGENT);
        else
        if (thread->sched_mode & TH_MODE_TIMESHARE && thread->priority < BASEPRI_BACKGROUND)
                preempt = (AST_PREEMPT | AST_URGENT);
        else
        if (thread->sched_mode & TH_MODE_TIMESHARE && thread->priority < BASEPRI_BACKGROUND)
@@ -2409,8 +2428,9 @@ processor_queue_shutdown(
 
                                thread->runq = PROCESSOR_NULL;
                                rq->count--;
 
                                thread->runq = PROCESSOR_NULL;
                                rq->count--;
-                               if (thread->sched_mode & TH_MODE_PREEMPT)
-                                       rq->urgency--;
+                               if (testbit(pri, sched_preempt_pri)) {
+                                       rq->urgency--; assert(rq->urgency >= 0);
+                               }
                                if (queue_empty(queue)) {
                                        if (pri != IDLEPRI)
                                                clrbit(MAXPRI - pri, rq->bitmap);
                                if (queue_empty(queue)) {
                                        if (pri != IDLEPRI)
                                                clrbit(MAXPRI - pri, rq->bitmap);
@@ -2524,15 +2544,6 @@ set_sched_pri(
 {
        boolean_t               removed = run_queue_remove(thread);
 
 {
        boolean_t               removed = run_queue_remove(thread);
 
-       if (    !(thread->sched_mode & TH_MODE_TIMESHARE)                               &&
-                       (priority >= BASEPRI_PREEMPT                                            ||
-                        (thread->task_priority < MINPRI_KERNEL                 &&
-                         thread->task_priority >= BASEPRI_BACKGROUND   &&
-                         priority > thread->task_priority)                                     )       )
-               thread->sched_mode |= TH_MODE_PREEMPT;
-       else
-               thread->sched_mode &= ~TH_MODE_PREEMPT;
-
        thread->sched_pri = priority;
        if (removed)
                thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
        thread->sched_pri = priority;
        if (removed)
                thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
@@ -2630,9 +2641,9 @@ run_queue_remove(
                         */
                        remqueue(&rq->queues[0], (queue_entry_t)thread);
                        rq->count--;
                         */
                        remqueue(&rq->queues[0], (queue_entry_t)thread);
                        rq->count--;
-                       if (thread->sched_mode & TH_MODE_PREEMPT)
-                               rq->urgency--;
-                       assert(rq->urgency >= 0);
+                       if (testbit(thread->sched_pri, sched_preempt_pri)) {
+                               rq->urgency--; assert(rq->urgency >= 0);
+                       }
 
                        if (queue_empty(rq->queues + thread->sched_pri)) {
                                /* update run queue status */
 
                        if (queue_empty(rq->queues + thread->sched_pri)) {
                                /* update run queue status */
@@ -2741,8 +2752,9 @@ steal_thread(
 
                                thread->runq = PROCESSOR_NULL;
                                rq->count--;
 
                                thread->runq = PROCESSOR_NULL;
                                rq->count--;
-                               if (thread->sched_mode & TH_MODE_PREEMPT)
-                                       rq->urgency--;
+                               if (testbit(pri, sched_preempt_pri)) {
+                                       rq->urgency--; assert(rq->urgency >= 0);
+                               }
                                if (queue_empty(queue)) {
                                        if (pri != IDLEPRI)
                                                clrbit(MAXPRI - pri, rq->bitmap);
                                if (queue_empty(queue)) {
                                        if (pri != IDLEPRI)
                                                clrbit(MAXPRI - pri, rq->bitmap);
@@ -2807,9 +2819,6 @@ processor_idle(
                        break;
        }
 
                        break;
        }
 
-       KERNEL_DEBUG_CONSTANT(
-               MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (int)thread, 0, 0, 0, 0);
-
        timer_switch(&PROCESSOR_DATA(processor, idle_state),
                                                                        mach_absolute_time(), &PROCESSOR_DATA(processor, system_state));
        PROCESSOR_DATA(processor, current_state) = &PROCESSOR_DATA(processor, system_state);
        timer_switch(&PROCESSOR_DATA(processor, idle_state),
                                                                        mach_absolute_time(), &PROCESSOR_DATA(processor, system_state));
        PROCESSOR_DATA(processor, current_state) = &PROCESSOR_DATA(processor, system_state);
@@ -2829,8 +2838,8 @@ processor_idle(
                processor->next_thread = THREAD_NULL;
                processor->state = PROCESSOR_RUNNING;
 
                processor->next_thread = THREAD_NULL;
                processor->state = PROCESSOR_RUNNING;
 
-               if (    processor->runq.highq > new_thread->sched_pri   ||
-                               rt_runq.highq >= new_thread->sched_pri                  ) {
+               if (    processor->runq.highq > new_thread->sched_pri                                   ||
+                               (rt_runq.highq > 0 && rt_runq.highq >= new_thread->sched_pri)   ) {
                        processor->deadline = UINT64_MAX;
 
                        pset_unlock(pset);
                        processor->deadline = UINT64_MAX;
 
                        pset_unlock(pset);
@@ -2839,11 +2848,17 @@ processor_idle(
                        thread_setrun(new_thread, SCHED_HEADQ);
                        thread_unlock(new_thread);
 
                        thread_setrun(new_thread, SCHED_HEADQ);
                        thread_unlock(new_thread);
 
+                       KERNEL_DEBUG_CONSTANT(
+                               MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (int)thread, (int)state, 0, 0, 0);
+       
                        return (THREAD_NULL);
                }
 
                pset_unlock(pset);
 
                        return (THREAD_NULL);
                }
 
                pset_unlock(pset);
 
+               KERNEL_DEBUG_CONSTANT(
+                       MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (int)thread, (int)state, (int)new_thread, 0, 0);
+
                return (new_thread);
        }
        else
                return (new_thread);
        }
        else
@@ -2870,12 +2885,18 @@ processor_idle(
                        thread_setrun(new_thread, SCHED_HEADQ);
                        thread_unlock(new_thread);
 
                        thread_setrun(new_thread, SCHED_HEADQ);
                        thread_unlock(new_thread);
 
+                       KERNEL_DEBUG_CONSTANT(
+                               MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (int)thread, (int)state, 0, 0, 0);
+
                        return (THREAD_NULL);
                }
        }
 
        pset_unlock(pset);
 
                        return (THREAD_NULL);
                }
        }
 
        pset_unlock(pset);
 
+       KERNEL_DEBUG_CONSTANT(
+               MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (int)thread, (int)state, 0, 0, 0);
+
        return (THREAD_NULL);
 }
 
        return (THREAD_NULL);
 }
 
index f7855e11422dadc35fa4d7e1aa113d4d75c7231e..311e96c7dad5ebe338ebb03312c87574385ab9c1 100644 (file)
@@ -331,7 +331,6 @@ thread_depress_abstime(
 
                self->sched_pri = DEPRESSPRI;
                myprocessor->current_pri = self->sched_pri;
 
                self->sched_pri = DEPRESSPRI;
                myprocessor->current_pri = self->sched_pri;
-               self->sched_mode &= ~TH_MODE_PREEMPT;
                self->sched_mode |= TH_MODE_DEPRESS;
 
                if (interval != 0) {
                self->sched_mode |= TH_MODE_DEPRESS;
 
                if (interval != 0) {
@@ -427,7 +426,6 @@ thread_poll_yield(
                        if (!(self->sched_mode & TH_MODE_ISDEPRESSED)) {
                                self->sched_pri = DEPRESSPRI;
                                myprocessor->current_pri = self->sched_pri;
                        if (!(self->sched_mode & TH_MODE_ISDEPRESSED)) {
                                self->sched_pri = DEPRESSPRI;
                                myprocessor->current_pri = self->sched_pri;
-                               self->sched_mode &= ~TH_MODE_PREEMPT;
                        }
                        self->computation_epoch = abstime;
                        self->computation_metered = 0;
                        }
                        self->computation_epoch = abstime;
                        self->computation_metered = 0;
index 8387019b8cf9db2c7baa8fe79c871b849c1b91cb..4cca246563896080ec1687ad217c8a1d44899c98 100644 (file)
@@ -185,14 +185,13 @@ struct thread {
        integer_t                       sched_mode;                     /* scheduling mode bits */
 #define TH_MODE_REALTIME               0x0001          /* time constraints supplied */
 #define TH_MODE_TIMESHARE              0x0002          /* use timesharing algorithm */
        integer_t                       sched_mode;                     /* scheduling mode bits */
 #define TH_MODE_REALTIME               0x0001          /* time constraints supplied */
 #define TH_MODE_TIMESHARE              0x0002          /* use timesharing algorithm */
-#define TH_MODE_PREEMPT                        0x0004          /* can preempt kernel contexts */
-#define TH_MODE_FAILSAFE               0x0008          /* fail-safe has tripped */
-#define        TH_MODE_PROMOTED                0x0010          /* sched pri has been promoted */
-#define TH_MODE_ABORT                  0x0020          /* abort interruptible waits */
-#define TH_MODE_ABORTSAFELY            0x0040          /* ... but only those at safe point */
+#define TH_MODE_FAILSAFE               0x0004          /* fail-safe has tripped */
+#define        TH_MODE_PROMOTED                0x0008          /* sched pri has been promoted */
+#define TH_MODE_ABORT                  0x0010          /* abort interruptible waits */
+#define TH_MODE_ABORTSAFELY            0x0020          /* ... but only those at safe point */
 #define TH_MODE_ISABORTED              (TH_MODE_ABORT | TH_MODE_ABORTSAFELY)
 #define TH_MODE_ISABORTED              (TH_MODE_ABORT | TH_MODE_ABORTSAFELY)
-#define        TH_MODE_DEPRESS                 0x0080          /* normal depress yield */
-#define TH_MODE_POLLDEPRESS            0x0100          /* polled depress yield */
+#define        TH_MODE_DEPRESS                 0x0040          /* normal depress yield */
+#define TH_MODE_POLLDEPRESS            0x0080          /* polled depress yield */
 #define TH_MODE_ISDEPRESSED            (TH_MODE_DEPRESS | TH_MODE_POLLDEPRESS)
 
        integer_t                       sched_pri;                      /* scheduled (current) priority */
 #define TH_MODE_ISDEPRESSED            (TH_MODE_DEPRESS | TH_MODE_POLLDEPRESS)
 
        integer_t                       sched_pri;                      /* scheduled (current) priority */
index da65103821e0c6b4bf544fdbad053dfcf4e1b03c..4fcb5f9570a6291d0bf2f301a223e5c92bbef664 100644 (file)
@@ -759,7 +759,6 @@ special_handler_continue(void)
 
                        thread->sched_pri = DEPRESSPRI;
                        myprocessor->current_pri = thread->sched_pri;
 
                        thread->sched_pri = DEPRESSPRI;
                        myprocessor->current_pri = thread->sched_pri;
-                       thread->sched_mode &= ~TH_MODE_PREEMPT;
                }
                thread_unlock(thread);
                splx(s);
                }
                thread_unlock(thread);
                splx(s);
index d8c38843f2781c487bc4918acd9642765fc54ed5..173e79a8b104d6f1091aa583af3dfbaec82064f0 100644 (file)
@@ -361,7 +361,7 @@ struct x86_saved_state32_tagged {
 typedef struct x86_saved_state32_tagged x86_saved_state32_tagged_t;
 
 struct x86_sframe32 {
 typedef struct x86_saved_state32_tagged x86_saved_state32_tagged_t;
 
 struct x86_sframe32 {
-/*
+       /*
         * in case we throw a fault reloading
         * segment registers on a return out of
         * the kernel... the 'slf' state is only kept
         * in case we throw a fault reloading
         * segment registers on a return out of
         * the kernel... the 'slf' state is only kept
index 8a73ba9d892dc96b38b5c82188e46c1365a14aa2..e28a2c5371b7893e0f0b102e8f0487e27db3d469 100644 (file)
@@ -345,6 +345,8 @@ __END_DECLS
 #define CPU_SUBTYPE_ARM_ALL             ((cpu_subtype_t) 0)
 #define CPU_SUBTYPE_ARM_V4T             ((cpu_subtype_t) 5)
 #define CPU_SUBTYPE_ARM_V6              ((cpu_subtype_t) 6)
 #define CPU_SUBTYPE_ARM_ALL             ((cpu_subtype_t) 0)
 #define CPU_SUBTYPE_ARM_V4T             ((cpu_subtype_t) 5)
 #define CPU_SUBTYPE_ARM_V6              ((cpu_subtype_t) 6)
+#define CPU_SUBTYPE_ARM_V5TEJ           ((cpu_subtype_t) 7)
+#define CPU_SUBTYPE_ARM_XSCALE         ((cpu_subtype_t) 8)
 
 /*
  *     CPU families (sysctl hw.cpufamily)
 
 /*
  *     CPU families (sysctl hw.cpufamily)
@@ -368,6 +370,7 @@ __END_DECLS
 #define CPUFAMILY_INTEL_6_26 0x6b5a4cd2  /* Nehalem */
 #define CPUFAMILY_ARM_9      0xe73283ae
 #define CPUFAMILY_ARM_11     0x8ff620d8
 #define CPUFAMILY_INTEL_6_26 0x6b5a4cd2  /* Nehalem */
 #define CPUFAMILY_ARM_9      0xe73283ae
 #define CPUFAMILY_ARM_11     0x8ff620d8
+#define CPUFAMILY_ARM_XSCALE 0x53b005f5
 
 #define CPUFAMILY_INTEL_YONAH  CPUFAMILY_INTEL_6_14
 #define CPUFAMILY_INTEL_MEROM  CPUFAMILY_INTEL_6_15
 
 #define CPUFAMILY_INTEL_YONAH  CPUFAMILY_INTEL_6_14
 #define CPUFAMILY_INTEL_MEROM  CPUFAMILY_INTEL_6_15
index 6721d47bf8e69f4decd24afc871b676fc9eacd47..b0280482533a6bce82bf33e3e15206942bdbe663 100644 (file)
@@ -321,6 +321,8 @@ trigger_name_to_port(
 extern int     uiomove64(addr64_t, int, void *);
 #define        MAX_RUN 32
 
 extern int     uiomove64(addr64_t, int, void *);
 #define        MAX_RUN 32
 
+unsigned long vm_cs_tainted_forces = 0;
+
 int
 memory_object_control_uiomove(
        memory_object_control_t control,
 int
 memory_object_control_uiomove(
        memory_object_control_t control,
@@ -396,8 +398,18 @@ memory_object_control_uiomove(
                         */
                        assert(!dst_page->encrypted);
 
                         */
                        assert(!dst_page->encrypted);
 
-                       if (mark_dirty)
+                       if (mark_dirty) {
                                dst_page->dirty = TRUE;
                                dst_page->dirty = TRUE;
+                               if (dst_page->cs_validated) {
+                                       /*
+                                        * CODE SIGNING:
+                                        * We're modifying a code-signed
+                                        * page:  assume that it is now tainted.
+                                        */
+                                       dst_page->cs_tainted = TRUE;
+                                       vm_cs_tainted_forces++;
+                               }
+                       }
                        dst_page->busy = TRUE;
 
                        page_run[cur_run++] = dst_page;
                        dst_page->busy = TRUE;
 
                        page_run[cur_run++] = dst_page;
index 87ffc3ee7c6d8b78671fcc2bfe599303a8e851ce..809d71e177940e8343a2eeb30bd8f7ce466a8418 100644 (file)
@@ -149,6 +149,12 @@ extern void vm_fault_classify(vm_object_t  object,
 extern void vm_fault_classify_init(void);
 #endif
 
 extern void vm_fault_classify_init(void);
 #endif
 
+
+unsigned long vm_cs_validates = 0;
+unsigned long vm_cs_revalidates = 0;
+unsigned long vm_cs_query_modified = 0;
+unsigned long vm_cs_validated_dirtied = 0;
+
 /*
  *     Routine:        vm_fault_init
  *     Purpose:
 /*
  *     Routine:        vm_fault_init
  *     Purpose:
@@ -1988,19 +1994,21 @@ vm_fault_enter(vm_page_t m,
 
         cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
 
 
         cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
 
-       if (m->object->code_signed && !m->cs_validated &&
-           pmap != kernel_pmap) {
-               /*
-                * CODE SIGNING:
-                * This page comes from a VM object backed by a
-                * signed memory object and it hasn't been validated yet.
-                * We're about to enter it into a process address space,
-                * so we need to validate its signature now.
-                */
+       if (m->object->code_signed && pmap != kernel_pmap &&
+           (!m->cs_validated || m->wpmapped)) {
                vm_object_lock_assert_exclusive(m->object);
 
                vm_object_lock_assert_exclusive(m->object);
 
-               /* VM map still locked, so 1 ref will remain on VM object */
+               if (m->cs_validated && m->wpmapped) {
+                       vm_cs_revalidates++;
+               }
 
 
+               /*
+                * CODE SIGNING:
+                * This page comes from a VM object backed by a signed
+                * memory object.  We are about to enter it into a process
+                * address space, so we need to validate its signature.
+                */
+               /* VM map is locked, so 1 ref will remain on VM object */
                vm_page_validate_cs(m);
        }
 
                vm_page_validate_cs(m);
        }
 
@@ -2087,6 +2095,10 @@ vm_fault_enter(vm_page_t m,
                 * that's needed for an AtomicCompareAndSwap
                 */
                m->pmapped = TRUE;
                 * that's needed for an AtomicCompareAndSwap
                 */
                m->pmapped = TRUE;
+               if (prot & VM_PROT_WRITE) {
+                       vm_object_lock_assert_exclusive(m->object);
+                       m->wpmapped = TRUE;
+               }
 
                PMAP_ENTER(pmap, vaddr, m, prot, cache_attr, wired);
        }
 
                PMAP_ENTER(pmap, vaddr, m, prot, cache_attr, wired);
        }
@@ -2273,7 +2285,6 @@ RetryFault:
         */
        if (wired) {
                fault_type = prot | VM_PROT_WRITE;
         */
        if (wired) {
                fault_type = prot | VM_PROT_WRITE;
-       
                /*
                 * since we're treating this fault as a 'write'
                 * we must hold the top object lock exclusively
                /*
                 * since we're treating this fault as a 'write'
                 * we must hold the top object lock exclusively
@@ -2500,9 +2511,10 @@ RetryFault:
                        }
                        ASSERT_PAGE_DECRYPTED(m);
 
                        }
                        ASSERT_PAGE_DECRYPTED(m);
 
-                       if (m->object->code_signed && !m->cs_validated) {
+                       if (m->object->code_signed && map != kernel_map &&
+                           (!m->cs_validated || m->wpmapped)) {
                                /*
                                /*
-                                * We will need to validate this page
+                                * We might need to validate this page
                                 * against its code signature, so we
                                 * want to hold the VM object exclusively.
                                 */
                                 * against its code signature, so we
                                 * want to hold the VM object exclusively.
                                 */
@@ -2547,8 +2559,23 @@ RetryFault:
                         *              --> must disallow write.
                         */
 
                         *              --> must disallow write.
                         */
 
-                       if (object == cur_object && object->copy == VM_OBJECT_NULL)
+                       if (object == cur_object && object->copy == VM_OBJECT_NULL) {
+                               if ((fault_type & VM_PROT_WRITE) == 0) {
+                                       /*
+                                        * This is not a "write" fault, so we
+                                        * might not have taken the object lock
+                                        * exclusively and we might not be able
+                                        * to update the "wpmapped" bit in
+                                        * vm_fault_enter().
+                                        * Let's just grant read access to
+                                        * the page for now and we'll
+                                        * soft-fault again if we need write
+                                        * access later...
+                                        */
+                                       prot &= ~VM_PROT_WRITE;
+                               }
                                goto FastPmapEnter;
                                goto FastPmapEnter;
+                       }
 
                        if ((fault_type & VM_PROT_WRITE) == 0) {
 
 
                        if ((fault_type & VM_PROT_WRITE) == 0) {
 
@@ -4117,13 +4144,51 @@ vm_page_validate_cs(
        boolean_t               validated, tainted;
        boolean_t               busy_page;
 
        boolean_t               validated, tainted;
        boolean_t               busy_page;
 
-       vm_object_lock_assert_exclusive(page->object);
-       assert(!page->cs_validated);
+       vm_object_lock_assert_held(page->object);
 
        if (!cs_validation) {
                return;
        }
 
 
        if (!cs_validation) {
                return;
        }
 
+       if (page->cs_validated && !page->cs_tainted && page->wpmapped) {
+               vm_object_lock_assert_exclusive(page->object);
+
+               /*
+                * This page has already been validated and found to
+                * be valid.  However, it was mapped for "write" access
+                * sometime in the past, so we have to check if it was
+                * modified.  If so, it needs to be revalidated.
+                * If the page was already found to be "tainted", no
+                * need to re-validate.
+                */
+               if (!page->dirty) {
+                       vm_cs_query_modified++;
+                       page->dirty = pmap_is_modified(page->phys_page);
+               }
+               if (page->dirty) {
+                       /*
+                        * The page is dirty, so let's clear its
+                        * "validated" bit and re-validate it.
+                        */
+                       if (cs_debug) {
+                               printf("CODESIGNING: vm_page_validate_cs: "
+                                      "page %p obj %p off 0x%llx "
+                                      "was modified\n",
+                                      page, page->object, page->offset);
+                       }
+                       page->cs_validated = FALSE;
+                       vm_cs_validated_dirtied++;
+               }
+       }
+
+       if (page->cs_validated) {
+               return;
+       }
+
+       vm_object_lock_assert_exclusive(page->object);
+
+       vm_cs_validates++;
+
        object = page->object;
        assert(object->code_signed);
        offset = page->offset;
        object = page->object;
        assert(object->code_signed);
        offset = page->offset;
index f20b587c16205fa8f6b6aaf2c94ec32260ed8300..74e805b790c01cf8ae35b965e8a0c2a30f85c696 100644 (file)
@@ -1749,10 +1749,13 @@ StartAgain: ;
                        }
                        for (; entry->vme_start < end;
                             entry = entry->vme_next) {
                        }
                        for (; entry->vme_start < end;
                             entry = entry->vme_next) {
+                               /*
+                                * Check if the mapping's attributes
+                                * match the existing map entry.
+                                */
                                if (entry == vm_map_to_entry(map) ||
                                    entry->vme_start != tmp_start ||
                                    entry->is_sub_map != is_submap ||
                                if (entry == vm_map_to_entry(map) ||
                                    entry->vme_start != tmp_start ||
                                    entry->is_sub_map != is_submap ||
-                                   entry->object.vm_object != object ||
                                    entry->offset != tmp_offset ||
                                    entry->needs_copy != needs_copy ||
                                    entry->protection != cur_protection ||
                                    entry->offset != tmp_offset ||
                                    entry->needs_copy != needs_copy ||
                                    entry->protection != cur_protection ||
@@ -1762,6 +1765,36 @@ StartAgain: ;
                                        /* not the same mapping ! */
                                        RETURN(KERN_NO_SPACE);
                                }
                                        /* not the same mapping ! */
                                        RETURN(KERN_NO_SPACE);
                                }
+                               /*
+                                * Check if the same object is being mapped.
+                                */
+                               if (is_submap) {
+                                       if (entry->object.sub_map !=
+                                           (vm_map_t) object) {
+                                               /* not the same submap */
+                                               RETURN(KERN_NO_SPACE);
+                                       }
+                               } else {
+                                       if (entry->object.vm_object != object) {
+                                               /* not the same VM object... */
+                                               vm_object_t obj2;
+
+                                               obj2 = entry->object.vm_object;
+                                               if ((obj2 == VM_OBJECT_NULL ||
+                                                    obj2->internal) &&
+                                                   (object == VM_OBJECT_NULL ||
+                                                    object->internal)) {
+                                                       /*
+                                                        * ... but both are
+                                                        * anonymous memory,
+                                                        * so equivalent.
+                                                        */
+                                               } else {
+                                                       RETURN(KERN_NO_SPACE);
+                                               }
+                                       }
+                               }
+
                                tmp_offset += entry->vme_end - entry->vme_start;
                                tmp_start += entry->vme_end - entry->vme_start;
                                if (entry->vme_end >= end) {
                                tmp_offset += entry->vme_end - entry->vme_start;
                                tmp_start += entry->vme_end - entry->vme_start;
                                if (entry->vme_end >= end) {
@@ -7978,8 +8011,8 @@ submap_recurse:
 
 
                        if(submap_entry->wired_count != 0 ||
 
 
                        if(submap_entry->wired_count != 0 ||
-                          (sub_object->copy_strategy !=
-                           MEMORY_OBJECT_COPY_SYMMETRIC)) {
+                          (sub_object->copy_strategy ==
+                           MEMORY_OBJECT_COPY_NONE)) {
                                vm_object_lock(sub_object);
                                vm_object_copy_slowly(sub_object,
                                                      submap_entry->offset,
                                vm_object_lock(sub_object);
                                vm_object_copy_slowly(sub_object,
                                                      submap_entry->offset,
@@ -8086,7 +8119,7 @@ submap_recurse:
                        entry->max_protection |= submap_entry->max_protection;
 
                        if(copied_slowly) {
                        entry->max_protection |= submap_entry->max_protection;
 
                        if(copied_slowly) {
-                               entry->offset = 0;
+                               entry->offset = local_start - old_start;
                                entry->needs_copy = FALSE;
                                entry->is_shared = FALSE;
                        } else {
                                entry->needs_copy = FALSE;
                                entry->is_shared = FALSE;
                        } else {
index 60a80d38a1f000b1b196b464a9c66d291f072bcc..218a491572ee4721b76a12765d6501861c83f53f 100644 (file)
@@ -1205,7 +1205,7 @@ vm_object_terminate(
                        panic("vm_object_terminate.4 %p %p", object, p);
                }
 
                        panic("vm_object_terminate.4 %p %p", object, p);
                }
 
-               if (!p->dirty && p->pmapped)
+               if (!p->dirty && p->wpmapped)
                        p->dirty = pmap_is_modified(p->phys_page);
 
                if ((p->dirty || p->precious) && !p->error && object->alive) {
                        p->dirty = pmap_is_modified(p->phys_page);
 
                if ((p->dirty || p->precious) && !p->error && object->alive) {
index 53d13765482b06eef02e20d816b03fda8d68f05b..4052f9673b84a6cc31a8a29f4d8074ec045f1a89 100644 (file)
@@ -196,6 +196,8 @@ struct vm_page {
                        fictitious:1,   /* Physical page doesn't exist (O) */
                        pmapped:1,      /* page has been entered at some
                                         * point into a pmap (O) */
                        fictitious:1,   /* Physical page doesn't exist (O) */
                        pmapped:1,      /* page has been entered at some
                                         * point into a pmap (O) */
+                       wpmapped:1,     /* page has been entered at some
+                                        * point into a pmap for write (O) */
                        absent:1,       /* Data has been requested, but is
                                         *  not yet available (O) */
                        error:1,        /* Data manager was unable to provide
                        absent:1,       /* Data has been requested, but is
                                         *  not yet available (O) */
                        error:1,        /* Data manager was unable to provide
@@ -230,7 +232,7 @@ struct vm_page {
                                           /* other pages                  */
                        deactivated:1,
                        zero_fill:1,
                                           /* other pages                  */
                        deactivated:1,
                        zero_fill:1,
-                       __unused_object_bits:9;  /* 9 bits available here */
+                       __unused_object_bits:8;  /* 8 bits available here */
 
        ppnum_t         phys_page;      /* Physical address of page, passed
                                         *  to pmap_enter (read-only) */
 
        ppnum_t         phys_page;      /* Physical address of page, passed
                                         *  to pmap_enter (read-only) */
@@ -484,6 +486,12 @@ extern void                vm_page_insert(
                                        vm_object_t             object,
                                        vm_object_offset_t      offset);
 
                                        vm_object_t             object,
                                        vm_object_offset_t      offset);
 
+extern void            vm_page_insert_internal(
+                                       vm_page_t               page,
+                                       vm_object_t             object,
+                                       vm_object_offset_t      offset,
+                                       boolean_t               queues_lock_held);
+
 extern void            vm_page_replace(
                                        vm_page_t               mem,
                                        vm_object_t             object,
 extern void            vm_page_replace(
                                        vm_page_t               mem,
                                        vm_object_t             object,
index 7eeace1d0bd183914c5ae76680f8b9c67c48dd6c..0f3e790a689bb5a55ac1c3df21c89711e36621ef 100644 (file)
@@ -370,6 +370,7 @@ unsigned int vm_page_speculative_target = 0;
 
 vm_object_t    vm_pageout_scan_wants_object = VM_OBJECT_NULL;
 
 
 vm_object_t    vm_pageout_scan_wants_object = VM_OBJECT_NULL;
 
+unsigned long vm_cs_validated_resets = 0;
 
 /*
  *     Routine:        vm_backing_store_disable
 
 /*
  *     Routine:        vm_backing_store_disable
@@ -1632,12 +1633,30 @@ consider_inactive:
                                vm_purgeable_q_advance_all(1);
                }
 
                                vm_purgeable_q_advance_all(1);
                }
 
-               if (object->copy == VM_OBJECT_NULL && 
-                   (object->purgable == VM_PURGABLE_EMPTY ||
-                    object->purgable == VM_PURGABLE_VOLATILE)) {
-                       assert(m->wire_count == 0);     /* if it's wired, we can't put it on our queue */
-                       /* just stick it back on! */
-                       goto reactivate_page;
+               /* If the object is empty, the page must be reclaimed even if dirty or used. */
+               /* If the page belongs to a volatile object, we stick it back on. */
+               if (object->copy == VM_OBJECT_NULL) {
+                       if(object->purgable == VM_PURGABLE_EMPTY && !m->cleaning) {
+                               m->busy = TRUE;
+                               if (m->pmapped == TRUE) {
+                                       /* unmap the page */
+                                       refmod_state = pmap_disconnect(m->phys_page);
+                                       if (refmod_state & VM_MEM_MODIFIED) {
+                                               m->dirty = TRUE;
+                                       }
+                               }
+                               if (m->dirty || m->precious) {
+                                       /* we saved the cost of cleaning this page ! */
+                                       vm_page_purged_count++;
+                               }
+                               goto reclaim_page;
+                       }
+                       if (object->purgable == VM_PURGABLE_VOLATILE) {
+                               /* if it's wired, we can't put it on our queue */
+                               assert(m->wire_count == 0);
+                               /* just stick it back on! */
+                               goto reactivate_page;
+                       }
                }
                m->pageq.next = NULL;
                m->pageq.prev = NULL;
                }
                m->pageq.next = NULL;
                m->pageq.prev = NULL;
@@ -2578,6 +2597,7 @@ vm_object_upl_request(
        wpl_array_t             lite_list = NULL;
        vm_object_t             last_copy_object;
        int                     delayed_unlock = 0;
        wpl_array_t             lite_list = NULL;
        vm_object_t             last_copy_object;
        int                     delayed_unlock = 0;
+       int                     j;
 
        if (cntrl_flags & ~UPL_VALID_FLAGS) {
                /*
 
        if (cntrl_flags & ~UPL_VALID_FLAGS) {
                /*
@@ -2711,11 +2731,34 @@ vm_object_upl_request(
                        }
                        vm_object_unlock(object);
                        VM_PAGE_GRAB_FICTITIOUS(alias_page);
                        }
                        vm_object_unlock(object);
                        VM_PAGE_GRAB_FICTITIOUS(alias_page);
-                       vm_object_lock(object);
+                       goto relock;
                }
                }
-               if (delayed_unlock == 0)
-                       vm_page_lock_queues();
+               if (delayed_unlock == 0) {
+                       /*
+                        * pageout_scan takes the vm_page_lock_queues first
+                        * then tries for the object lock... to avoid what
+                        * is effectively a lock inversion, we'll go to the
+                        * trouble of taking them in that same order... otherwise
+                        * if this object contains the majority of the pages resident
+                        * in the UBC (or a small set of large objects actively being
+                        * worked on contain the majority of the pages), we could
+                        * cause the pageout_scan thread to 'starve' in its attempt
+                        * to find pages to move to the free queue, since it has to
+                        * successfully acquire the object lock of any candidate page
+                        * before it can steal/clean it.
+                        */
+                       vm_object_unlock(object);
+relock:
+                       for (j = 0; ; j++) {
+                               vm_page_lock_queues();
 
 
+                               if (vm_object_lock_try(object))
+                                       break;
+                               vm_page_unlock_queues();
+                               mutex_pause(j);
+                       }
+                       delayed_unlock = 1;
+               }
                if (cntrl_flags & UPL_COPYOUT_FROM) {
                        upl->flags |= UPL_PAGE_SYNC_DONE;
 
                if (cntrl_flags & UPL_COPYOUT_FROM) {
                        upl->flags |= UPL_PAGE_SYNC_DONE;
 
@@ -2848,6 +2891,7 @@ check_busy:
                                dst_page->busy = was_busy;
 
                                vm_page_lock_queues();
                                dst_page->busy = was_busy;
 
                                vm_page_lock_queues();
+                               delayed_unlock = 1;
                        }
                        if (dst_page->pageout_queue == TRUE)
                                /*
                        }
                        if (dst_page->pageout_queue == TRUE)
                                /*
@@ -3001,6 +3045,7 @@ check_busy:
                                        upl_cow_again_pages += xfer_size >> PAGE_SHIFT;
 
                                        vm_page_lock_queues();
                                        upl_cow_again_pages += xfer_size >> PAGE_SHIFT;
 
                                        vm_page_lock_queues();
+                                       delayed_unlock = 1;
                                }
                                /*
                                 * remember the copy object we synced with
                                }
                                /*
                                 * remember the copy object we synced with
@@ -3070,14 +3115,8 @@ check_busy:
                                }
                                /*
                                 * need to allocate a page
                                }
                                /*
                                 * need to allocate a page
-                                * vm_page_alloc may grab the
-                                * queues lock for a purgeable object
-                                * so drop it
                                 */
                                 */
-                               delayed_unlock = 0;
-                               vm_page_unlock_queues();
-
-                               dst_page = vm_page_alloc(object, dst_offset);
+                               dst_page = vm_page_grab();
 
                                if (dst_page == VM_PAGE_NULL) {
                                        if ( (cntrl_flags & (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) {
 
                                if (dst_page == VM_PAGE_NULL) {
                                        if ( (cntrl_flags & (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) {
@@ -3096,14 +3135,41 @@ check_busy:
                                         * then try again for the same
                                         * offset...
                                         */
                                         * then try again for the same
                                         * offset...
                                         */
+                                       delayed_unlock = 0;
+                                       vm_page_unlock_queues();
+
                                        vm_object_unlock(object);
                                        VM_PAGE_WAIT();
                                        vm_object_unlock(object);
                                        VM_PAGE_WAIT();
-                                       vm_object_lock(object);
+
+                                       /*
+                                        * pageout_scan takes the vm_page_lock_queues first
+                                        * then tries for the object lock... to avoid what
+                                        * is effectively a lock inversion, we'll go to the
+                                        * trouble of taking them in that same order... otherwise
+                                        * if this object contains the majority of the pages resident
+                                        * in the UBC (or a small set of large objects actively being
+                                        * worked on contain the majority of the pages), we could
+                                        * cause the pageout_scan thread to 'starve' in its attempt
+                                        * to find pages to move to the free queue, since it has to
+                                        * successfully acquire the object lock of any candidate page
+                                        * before it can steal/clean it.
+                                        */
+                                       for (j = 0; ; j++) {
+                                               vm_page_lock_queues();
+
+                                               if (vm_object_lock_try(object))
+                                                       break;
+                                               vm_page_unlock_queues();
+                                               mutex_pause(j);
+                                       }
+                                       delayed_unlock = 1;
 
                                        continue;
                                }
 
                                        continue;
                                }
-                               dst_page->busy = FALSE;
+                               vm_page_insert_internal(dst_page, object, dst_offset, TRUE);
+
                                dst_page->absent = TRUE;
                                dst_page->absent = TRUE;
+                               dst_page->busy = FALSE;
 
                                if (cntrl_flags & UPL_RET_ONLY_ABSENT) {
                                        /*
 
                                if (cntrl_flags & UPL_RET_ONLY_ABSENT) {
                                        /*
@@ -3116,7 +3182,6 @@ check_busy:
                                         */
                                        dst_page->clustered = TRUE;
                                }
                                         */
                                        dst_page->clustered = TRUE;
                                }
-                               vm_page_lock_queues();
                        }
                        /*
                         * ENCRYPTED SWAP:
                        }
                        /*
                         * ENCRYPTED SWAP:
@@ -3268,7 +3333,29 @@ check_busy:
                }
 delay_unlock_queues:
                if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) {
                }
 delay_unlock_queues:
                if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) {
+                       /*
+                        * pageout_scan takes the vm_page_lock_queues first
+                        * then tries for the object lock... to avoid what
+                        * is effectively a lock inversion, we'll go to the
+                        * trouble of taking them in that same order... otherwise
+                        * if this object contains the majority of the pages resident
+                        * in the UBC (or a small set of large objects actively being
+                        * worked on contain the majority of the pages), we could
+                        * cause the pageout_scan thread to 'starve' in its attempt
+                        * to find pages to move to the free queue, since it has to
+                        * successfully acquire the object lock of any candidate page
+                        * before it can steal/clean it.
+                        */
+                       vm_object_unlock(object);
                        mutex_yield(&vm_page_queue_lock);
                        mutex_yield(&vm_page_queue_lock);
+
+                       for (j = 0; ; j++) {
+                               if (vm_object_lock_try(object))
+                                       break;
+                               vm_page_unlock_queues();
+                               mutex_pause(j);
+                               vm_page_lock_queues();
+                       }
                        delayed_unlock = 1;
                }
 try_next_page:
                        delayed_unlock = 1;
                }
 try_next_page:
@@ -3279,7 +3366,7 @@ try_next_page:
        if (alias_page != NULL) {
                if (delayed_unlock == 0) {
                        vm_page_lock_queues();
        if (alias_page != NULL) {
                if (delayed_unlock == 0) {
                        vm_page_lock_queues();
-                       delayed_unlock++;
+                       delayed_unlock = 1;
                }
                vm_page_free(alias_page);
        }
                }
                vm_page_free(alias_page);
        }
@@ -3760,6 +3847,7 @@ vm_map_enter_upl(
                        cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
 
                        m->pmapped = TRUE;
                        cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
 
                        m->pmapped = TRUE;
+                       m->wpmapped = TRUE;
        
                        PMAP_ENTER(map->pmap, addr, m, VM_PROT_ALL, cache_attr, TRUE);
                }
        
                        PMAP_ENTER(map->pmap, addr, m, VM_PROT_ALL, cache_attr, TRUE);
                }
@@ -3844,6 +3932,7 @@ upl_commit_range(
        int                     delayed_unlock = 0;
        int                     clear_refmod = 0;
        int                     pgpgout_count = 0;
        int                     delayed_unlock = 0;
        int                     clear_refmod = 0;
        int                     pgpgout_count = 0;
+       int                     j;
 
        *empty = FALSE;
 
 
        *empty = FALSE;
 
@@ -3887,17 +3976,35 @@ upl_commit_range(
        } else {
                shadow_object = object;
        }
        } else {
                shadow_object = object;
        }
-       vm_object_lock(shadow_object);
-
        entry = offset/PAGE_SIZE;
        target_offset = (vm_object_offset_t)offset;
 
        entry = offset/PAGE_SIZE;
        target_offset = (vm_object_offset_t)offset;
 
+       /*
+        * pageout_scan takes the vm_page_lock_queues first
+        * then tries for the object lock... to avoid what
+        * is effectively a lock inversion, we'll go to the
+        * trouble of taking them in that same order... otherwise
+        * if this object contains the majority of the pages resident
+        * in the UBC (or a small set of large objects actively being
+        * worked on contain the majority of the pages), we could
+        * cause the pageout_scan thread to 'starve' in its attempt
+        * to find pages to move to the free queue, since it has to
+        * successfully acquire the object lock of any candidate page
+        * before it can steal/clean it.
+        */
+       for (j = 0; ; j++) {
+               vm_page_lock_queues();
+
+               if (vm_object_lock_try(shadow_object))
+                       break;
+               vm_page_unlock_queues();
+               mutex_pause(j);
+       }
+       delayed_unlock = 1;
+
        while (xfer_size) {
                vm_page_t       t, m;
 
        while (xfer_size) {
                vm_page_t       t, m;
 
-               if (delayed_unlock == 0)
-                       vm_page_lock_queues();
-
                m = VM_PAGE_NULL;
 
                if (upl->flags & UPL_LITE) {
                m = VM_PAGE_NULL;
 
                if (upl->flags & UPL_LITE) {
@@ -3937,6 +4044,17 @@ upl_commit_range(
                                        m->dirty = TRUE;
                                else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
                                        m->dirty = FALSE;
                                        m->dirty = TRUE;
                                else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
                                        m->dirty = FALSE;
+                                       if (m->cs_validated && !m->cs_tainted) {
+                                               /*
+                                                * CODE SIGNING:
+                                                * This page is no longer dirty
+                                                * but could have been modified,
+                                                * so it will need to be
+                                                * re-validated.
+                                                */
+                                               m->cs_validated = FALSE;
+                                               vm_cs_validated_resets++;
+                                       }
                                        clear_refmod |= VM_MEM_MODIFIED;
                                }
                                if (flags & UPL_COMMIT_INACTIVATE)
                                        clear_refmod |= VM_MEM_MODIFIED;
                                }
                                if (flags & UPL_COMMIT_INACTIVATE)
@@ -3964,6 +4082,17 @@ upl_commit_range(
                         */
                        if (flags & UPL_COMMIT_CLEAR_DIRTY) {
                                m->dirty = FALSE;
                         */
                        if (flags & UPL_COMMIT_CLEAR_DIRTY) {
                                m->dirty = FALSE;
+                               if (m->cs_validated && !m->cs_tainted) {
+                                       /*
+                                        * CODE SIGNING:
+                                        * This page is no longer dirty
+                                        * but could have been modified,
+                                        * so it will need to be
+                                        * re-validated.
+                                        */
+                                       m->cs_validated = FALSE;
+                                       vm_cs_validated_resets++;
+                               }
                                clear_refmod |= VM_MEM_MODIFIED;
                        }
                        if (clear_refmod)
                                clear_refmod |= VM_MEM_MODIFIED;
                        }
                        if (clear_refmod)
@@ -4003,6 +4132,17 @@ upl_commit_range(
                                if (m->wanted) vm_pageout_target_collisions++;
 #endif
                                m->dirty = FALSE;
                                if (m->wanted) vm_pageout_target_collisions++;
 #endif
                                m->dirty = FALSE;
+                               if (m->cs_validated && !m->cs_tainted) {
+                                       /*
+                                        * CODE SIGNING:
+                                        * This page is no longer dirty
+                                        * but could have been modified,
+                                        * so it will need to be
+                                        * re-validated.
+                                        */
+                                       m->cs_validated = FALSE;
+                                       vm_cs_validated_resets++;
+                               }
 
                                if (m->pmapped && (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED))
                                        m->dirty = TRUE;
 
                                if (m->pmapped && (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED))
                                        m->dirty = TRUE;
@@ -4049,7 +4189,7 @@ upl_commit_range(
                                goto commit_next_page;
                        }
 #if MACH_CLUSTER_STATS
                                goto commit_next_page;
                        }
 #if MACH_CLUSTER_STATS
-                       if (m->pmapped)
+                       if (m->wpmapped)
                                m->dirty = pmap_is_modified(m->phys_page);
 
                        if (m->dirty)   vm_pageout_cluster_dirtied++;
                                m->dirty = pmap_is_modified(m->phys_page);
 
                        if (m->dirty)   vm_pageout_cluster_dirtied++;
@@ -4057,6 +4197,17 @@ upl_commit_range(
                        if (m->wanted)  vm_pageout_cluster_collisions++;
 #endif
                        m->dirty = FALSE;
                        if (m->wanted)  vm_pageout_cluster_collisions++;
 #endif
                        m->dirty = FALSE;
+                       if (m->cs_validated && !m->cs_tainted) {
+                               /*
+                                * CODE SIGNING:
+                                * This page is no longer dirty
+                                * but could have been modified,
+                                * so it will need to be
+                                * re-validated.
+                                */
+                               m->cs_validated = FALSE;
+                               vm_cs_validated_resets++;
+                       }
 
                        if ((m->busy) && (m->cleaning)) {
                                /*
 
                        if ((m->busy) && (m->cleaning)) {
                                /*
@@ -4122,7 +4273,29 @@ commit_next_page:
                entry++;
 
                if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) {
                entry++;
 
                if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) {
+                       /*
+                        * pageout_scan takes the vm_page_lock_queues first
+                        * then tries for the object lock... to avoid what
+                        * is effectively a lock inversion, we'll go to the
+                        * trouble of taking them in that same order... otherwise
+                        * if this object contains the majority of the pages resident
+                        * in the UBC (or a small set of large objects actively being
+                        * worked on contain the majority of the pages), we could
+                        * cause the pageout_scan thread to 'starve' in its attempt
+                        * to find pages to move to the free queue, since it has to
+                        * successfully acquire the object lock of any candidate page
+                        * before it can steal/clean it.
+                        */
+                       vm_object_unlock(shadow_object);
                        mutex_yield(&vm_page_queue_lock);
                        mutex_yield(&vm_page_queue_lock);
+
+                       for (j = 0; ; j++) {
+                               if (vm_object_lock_try(shadow_object))
+                                       break;
+                               vm_page_unlock_queues();
+                               mutex_pause(j);
+                               vm_page_lock_queues();
+                       }
                        delayed_unlock = 1;
                }
        }
                        delayed_unlock = 1;
                }
        }
@@ -4199,6 +4372,7 @@ upl_abort_range(
        wpl_array_t             lite_list;
        int                     occupied;
        int                     delayed_unlock = 0;
        wpl_array_t             lite_list;
        int                     occupied;
        int                     delayed_unlock = 0;
+       int                     j;
 
        *empty = FALSE;
 
 
        *empty = FALSE;
 
@@ -4233,17 +4407,35 @@ upl_abort_range(
        } else
                shadow_object = object;
 
        } else
                shadow_object = object;
 
-       vm_object_lock(shadow_object);
-
        entry = offset/PAGE_SIZE;
        target_offset = (vm_object_offset_t)offset;
 
        entry = offset/PAGE_SIZE;
        target_offset = (vm_object_offset_t)offset;
 
+       /*
+        * pageout_scan takes the vm_page_lock_queues first
+        * then tries for the object lock... to avoid what
+        * is effectively a lock inversion, we'll go to the
+        * trouble of taking them in that same order... otherwise
+        * if this object contains the majority of the pages resident
+        * in the UBC (or a small set of large objects actively being
+        * worked on contain the majority of the pages), we could
+        * cause the pageout_scan thread to 'starve' in its attempt
+        * to find pages to move to the free queue, since it has to
+        * successfully acquire the object lock of any candidate page
+        * before it can steal/clean it.
+        */
+       for (j = 0; ; j++) {
+               vm_page_lock_queues();
+
+               if (vm_object_lock_try(shadow_object))
+                       break;
+               vm_page_unlock_queues();
+               mutex_pause(j);
+       }
+       delayed_unlock = 1;
+
        while (xfer_size) {
                vm_page_t       t, m;
 
        while (xfer_size) {
                vm_page_t       t, m;
 
-               if (delayed_unlock == 0)
-                       vm_page_lock_queues();
-
                m = VM_PAGE_NULL;
 
                if (upl->flags & UPL_LITE) {
                m = VM_PAGE_NULL;
 
                if (upl->flags & UPL_LITE) {
@@ -4352,7 +4544,29 @@ upl_abort_range(
                        }
                }
                if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) {
                        }
                }
                if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) {
+                       /*
+                        * pageout_scan takes the vm_page_lock_queues first
+                        * then tries for the object lock... to avoid what
+                        * is effectively a lock inversion, we'll go to the
+                        * trouble of taking them in that same order... otherwise
+                        * if this object contains the majority of the pages resident
+                        * in the UBC (or a small set of large objects actively being
+                        * worked on contain the majority of the pages), we could
+                        * cause the pageout_scan thread to 'starve' in its attempt
+                        * to find pages to move to the free queue, since it has to
+                        * successfully acquire the object lock of any candidate page
+                        * before it can steal/clean it.
+                        */
+                       vm_object_unlock(shadow_object);
                        mutex_yield(&vm_page_queue_lock);
                        mutex_yield(&vm_page_queue_lock);
+
+                       for (j = 0; ; j++) {
+                               if (vm_object_lock_try(shadow_object))
+                                       break;
+                               vm_page_unlock_queues();
+                               mutex_pause(j);
+                               vm_page_lock_queues();
+                       }
                        delayed_unlock = 1;
                }
                target_offset += PAGE_SIZE_64;
                        delayed_unlock = 1;
                }
                target_offset += PAGE_SIZE_64;
@@ -5230,6 +5444,7 @@ vm_paging_map_object(
                        pmap_sync_page_data_phys(page->phys_page);
                }
                page->pmapped = TRUE;
                        pmap_sync_page_data_phys(page->phys_page);
                }
                page->pmapped = TRUE;
+               page->wpmapped = TRUE;
                cache_attr = ((unsigned int) object->wimg_bits) & VM_WIMG_MASK;
 
                //assert(pmap_verify_free(page->phys_page));
                cache_attr = ((unsigned int) object->wimg_bits) & VM_WIMG_MASK;
 
                //assert(pmap_verify_free(page->phys_page));
@@ -5656,6 +5871,17 @@ vm_page_decrypt(
         * and the decryption doesn't count.
         */
        page->dirty = FALSE;
         * and the decryption doesn't count.
         */
        page->dirty = FALSE;
+       if (page->cs_validated && !page->cs_tainted) {
+               /*
+                * CODE SIGNING:
+                * This page is no longer dirty
+                * but could have been modified,
+                * so it will need to be
+                * re-validated.
+                */
+               page->cs_validated = FALSE;
+               vm_cs_validated_resets++;
+       }
        pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
 
        page->encrypted = FALSE;
        pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
 
        page->encrypted = FALSE;
@@ -5676,6 +5902,7 @@ vm_page_decrypt(
         */
        assert(pmap_verify_free(page->phys_page));
        page->pmapped = FALSE;
         */
        assert(pmap_verify_free(page->phys_page));
        page->pmapped = FALSE;
+       page->wpmapped = FALSE;
 
        vm_object_paging_end(page->object);
 }
 
        vm_object_paging_end(page->object);
 }
index df1f3f810b7bcf307937fa66a97875672e5c5713..dfd80266f956420d3e8592214e335f8297a4e4f6 100644 (file)
@@ -33,9 +33,9 @@ struct token {
 
 struct token    tokens[MAX_VOLATILE];
 
 
 struct token    tokens[MAX_VOLATILE];
 
-token_idx_t     token_free_idx = 0;    /* head of free queue */
-token_cnt_t     token_init_count = 1;  /* token 0 is reserved!! */
-token_cnt_t     token_new_pagecount = 0;       /* count of pages that will
+token_idx_t     token_free_idx = 0;            /* head of free queue */
+token_idx_t     token_init_idx = 1;            /* token 0 is reserved!! */
+int32_t                token_new_pagecount = 0;        /* count of pages that will
                                                 * be added onto token queue */
 
 int             available_for_purge = 0;       /* increase when ripe token
                                                 * be added onto token queue */
 
 int             available_for_purge = 0;       /* increase when ripe token
@@ -96,9 +96,9 @@ vm_purgeable_token_add(purgeable_q_t queue)
        token_idx_t     token;
        enum purgeable_q_type i;
 
        token_idx_t     token;
        enum purgeable_q_type i;
 
-       if (token_init_count < MAX_VOLATILE) {  /* lazy token array init */
-               token = token_init_count;
-               token_init_count++;
+       if (token_init_idx < MAX_VOLATILE) {    /* lazy token array init */
+               token = token_init_idx;
+               token_init_idx++;
        } else if (token_free_idx) {
                token = token_free_idx;
                token_free_idx = tokens[token_free_idx].next;
        } else if (token_free_idx) {
                token = token_free_idx;
                token_free_idx = tokens[token_free_idx].next;
@@ -111,9 +111,10 @@ vm_purgeable_token_add(purgeable_q_t queue)
         * obsolete
         */
        for (i = PURGEABLE_Q_TYPE_FIFO; i < PURGEABLE_Q_TYPE_MAX; i++) {
         * obsolete
         */
        for (i = PURGEABLE_Q_TYPE_FIFO; i < PURGEABLE_Q_TYPE_MAX; i++) {
-               purgeable_queues[i].new_pages += token_new_pagecount;
-               assert(purgeable_queues[i].new_pages >= 0);
-               assert((uint64_t) (purgeable_queues[i].new_pages) <= TOKEN_COUNT_MAX);
+               int64_t pages = purgeable_queues[i].new_pages += token_new_pagecount;
+               assert(pages >= 0);
+               assert(pages <= TOKEN_COUNT_MAX);
+               purgeable_queues[i].new_pages=pages;
        }
        token_new_pagecount = 0;
 
        }
        token_new_pagecount = 0;
 
@@ -235,6 +236,20 @@ vm_purgeable_token_delete_first(purgeable_q_t queue)
 void
 vm_purgeable_q_advance_all(uint32_t num_pages)
 {
 void
 vm_purgeable_q_advance_all(uint32_t num_pages)
 {
+       /* check queue counters - if they get really large, scale them back.
+        * They tend to get that large when there is no purgeable queue action */
+       int i;
+       if(token_new_pagecount > (INT32_MAX >> 1))      /* a system idling years might get there */
+       {
+               for (i = PURGEABLE_Q_TYPE_FIFO; i < PURGEABLE_Q_TYPE_MAX; i++) {
+                       int64_t pages = purgeable_queues[i].new_pages += token_new_pagecount;
+                       assert(pages >= 0);
+                       assert(pages <= TOKEN_COUNT_MAX);
+                       purgeable_queues[i].new_pages=pages;
+               }
+               token_new_pagecount = 0;
+       }
+       
        /*
         * don't need to advance obsolete queue - all items are ripe there,
         * always
        /*
         * don't need to advance obsolete queue - all items are ripe there,
         * always
index ab2db597e68f6e4d47468bcca6b454476002dc39..e225da651463a5770d8b5d3f398db77afc9f04b9 100644 (file)
@@ -46,12 +46,7 @@ enum purgeable_q_type {
        PURGEABLE_Q_TYPE_MAX
 };
 
        PURGEABLE_Q_TYPE_MAX
 };
 
-/* 
- * It appears there's a 16 vs 32 size mismatch when using
- * CONFIG_TOKEN_QUEUE_SMALL and the resulting math can lead to a large 
- * negative value for new_pages in vm_purgeable.c.
- */
-#if (CONFIG_TOKEN_QUEUE_SMALL == 1) && 0
+#if (CONFIG_TOKEN_QUEUE_SMALL == 1)
 typedef uint16_t token_idx_t;
 typedef uint16_t token_cnt_t;
 #define MAX_VOLATILE 0x01000
 typedef uint16_t token_idx_t;
 typedef uint16_t token_cnt_t;
 #define MAX_VOLATILE 0x01000
@@ -80,7 +75,7 @@ struct purgeable_q {
 typedef struct purgeable_q * purgeable_q_t;
 
 extern struct purgeable_q purgeable_queues[PURGEABLE_Q_TYPE_MAX];
 typedef struct purgeable_q * purgeable_q_t;
 
 extern struct purgeable_q purgeable_queues[PURGEABLE_Q_TYPE_MAX];
-extern token_cnt_t token_new_pagecount;
+extern int32_t token_new_pagecount;
 extern int available_for_purge;
 
 
 extern int available_for_purge;
 
 
index 5d4d80b47e38fffd67d0665dc649e037ccc1575c..f50356d0db2ed33cabd628ba2b394e03d3715883 100644 (file)
@@ -100,8 +100,6 @@ int                 speculative_steal_index = 0;
 
 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
 
 
 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
 
-static void vm_page_insert_internal(vm_page_t, vm_object_t, vm_object_offset_t, boolean_t);
-
 
 /*
  *     Associated with page of user-allocatable memory is a
 
 /*
  *     Associated with page of user-allocatable memory is a
@@ -406,6 +404,7 @@ vm_page_bootstrap(
        m->laundry = FALSE;
        m->free = FALSE;
        m->pmapped = FALSE;
        m->laundry = FALSE;
        m->free = FALSE;
        m->pmapped = FALSE;
+       m->wpmapped = FALSE;
        m->reference = FALSE;
        m->pageout = FALSE;
        m->dump_cleaning = FALSE;
        m->reference = FALSE;
        m->pageout = FALSE;
        m->dump_cleaning = FALSE;
@@ -889,7 +888,7 @@ vm_page_insert(
 }
 
 
 }
 
 
-static void
+void
 vm_page_insert_internal(
        vm_page_t               mem,
        vm_object_t             object,
 vm_page_insert_internal(
        vm_page_t               mem,
        vm_object_t             object,
@@ -1546,6 +1545,7 @@ vm_page_grablo(void)
                assert(mem->free);
                assert(mem->busy);
                assert(!mem->pmapped);
                assert(mem->free);
                assert(mem->busy);
                assert(!mem->pmapped);
+               assert(!mem->wpmapped);
 
                mem->pageq.next = NULL;
                mem->pageq.prev = NULL;
 
                mem->pageq.next = NULL;
                mem->pageq.prev = NULL;
@@ -1613,6 +1613,7 @@ return_page_from_cpu_list:
                assert(mem->busy);
                assert(!mem->encrypted);
                assert(!mem->pmapped);
                assert(mem->busy);
                assert(!mem->encrypted);
                assert(!mem->pmapped);
+               assert(!mem->wpmapped);
 
                return mem;
        }
 
                return mem;
        }
@@ -1723,6 +1724,7 @@ return_page_from_cpu_list:
                        assert(!mem->free);
                        assert(!mem->encrypted);
                        assert(!mem->pmapped);
                        assert(!mem->free);
                        assert(!mem->encrypted);
                        assert(!mem->pmapped);
+                       assert(!mem->wpmapped);
                }
                PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
                PROCESSOR_DATA(current_processor(), start_color) = color;
                }
                PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
                PROCESSOR_DATA(current_processor(), start_color) = color;
@@ -2090,6 +2092,7 @@ vm_page_free_prepare(
        mem->encrypted_cleaning = FALSE;
        mem->deactivated = FALSE;
        mem->pmapped = FALSE;
        mem->encrypted_cleaning = FALSE;
        mem->deactivated = FALSE;
        mem->pmapped = FALSE;
+       mem->wpmapped = FALSE;
 
        if (mem->private) {
                mem->private = FALSE;
 
        if (mem->private) {
                mem->private = FALSE;
@@ -2805,11 +2808,9 @@ vm_page_copy(
        dest_m->encrypted = FALSE;
 
        if (src_m->object != VM_OBJECT_NULL &&
        dest_m->encrypted = FALSE;
 
        if (src_m->object != VM_OBJECT_NULL &&
-           src_m->object->code_signed &&
-           !src_m->cs_validated) {
+           src_m->object->code_signed) {
                /*
                /*
-                * We're copying a not-yet-validated page from a
-                * code-signed object.
+                * We're copying a page from a code-signed object.
                 * Whoever ends up mapping the copy page might care about
                 * the original page's integrity, so let's validate the
                 * source page now.
                 * Whoever ends up mapping the copy page might care about
                 * the original page's integrity, so let's validate the
                 * source page now.
index 50632a9a0ed30c01eac2deaa958f24d5d82e834c..f6975e1c1dd9a81086035701cf0fef56bd6e59de 100644 (file)
 #include <kern/ipc_tt.h>
 #include <kern/kalloc.h>
 
 #include <kern/ipc_tt.h>
 #include <kern/kalloc.h>
 
+#include <mach/mach_vm.h>
+
 #include <vm/vm_map.h>
 #include <vm/vm_shared_region.h>
 
 #include <vm/vm_map.h>
 #include <vm/vm_shared_region.h>
 
@@ -770,6 +772,9 @@ vm_shared_region_map_file(
        unsigned int            i;
        mach_port_t             map_port;
        mach_vm_offset_t        target_address;
        unsigned int            i;
        mach_port_t             map_port;
        mach_vm_offset_t        target_address;
+       vm_object_t             object;
+       vm_object_size_t        obj_size;
+
 
        kr = KERN_SUCCESS;
 
 
        kr = KERN_SUCCESS;
 
@@ -844,51 +849,143 @@ vm_shared_region_map_file(
                target_address =
                        mappings[i].sfm_address - sr_base_address;
 
                target_address =
                        mappings[i].sfm_address - sr_base_address;
 
-               /* establish that mapping, OK if it's to "already" there */
-               kr = vm_map_enter_mem_object(
-                       sr_map,
-                       &target_address,
-                       vm_map_round_page(mappings[i].sfm_size),
-                       0,
-                       VM_FLAGS_FIXED | VM_FLAGS_ALREADY,
-                       map_port,
-                       mappings[i].sfm_file_offset,
-                       TRUE,
-                       mappings[i].sfm_init_prot & VM_PROT_ALL,
-                       mappings[i].sfm_max_prot & VM_PROT_ALL,
-                       VM_INHERIT_DEFAULT);
-               if (kr == KERN_MEMORY_PRESENT) {
-                       /* this exact mapping was already there: that's fine */
-                       SHARED_REGION_TRACE_INFO(
-                               ("shared_region: mapping[%d]: "
-                                "address:0x%016llx size:0x%016llx "
-                                "offset:0x%016llx "
-                                "maxprot:0x%x prot:0x%x already mapped...\n",
-                                i,
-                                (long long)mappings[i].sfm_address,
-                                (long long)mappings[i].sfm_size,
-                                (long long)mappings[i].sfm_file_offset,
-                                mappings[i].sfm_max_prot,
-                                mappings[i].sfm_init_prot));
-                       kr = KERN_SUCCESS;
-               } else if (kr != KERN_SUCCESS) {
-                       /* this mapping failed ! */
-                       SHARED_REGION_TRACE_ERROR(
-                               ("shared_region: mapping[%d]: "
-                                "address:0x%016llx size:0x%016llx "
-                                "offset:0x%016llx "
-                                "maxprot:0x%x prot:0x%x failed 0x%x\n",
-                                i,
-                                (long long)mappings[i].sfm_address,
-                                (long long)mappings[i].sfm_size,
-                                (long long)mappings[i].sfm_file_offset,
-                                mappings[i].sfm_max_prot,
-                                mappings[i].sfm_init_prot,
-                                kr));
-                       break;
+               /* establish that mapping, OK if it's "already" there */
+               if (map_port == MACH_PORT_NULL) {
+                       /*
+                        * We want to map some anonymous memory in a
+                        * shared region.
+                        * We have to create the VM object now, so that it
+                        * can be mapped "copy-on-write".
+                        */
+                       obj_size = vm_map_round_page(mappings[i].sfm_size);
+                       object = vm_object_allocate(obj_size);
+                       if (object == VM_OBJECT_NULL) {
+                               kr = KERN_RESOURCE_SHORTAGE;
+                       } else {
+                               kr = vm_map_enter(
+                                       sr_map,
+                                       &target_address,
+                                       vm_map_round_page(mappings[i].sfm_size),
+                                       0,
+                                       VM_FLAGS_FIXED | VM_FLAGS_ALREADY,
+                                       object,
+                                       0,
+                                       TRUE,
+                                       mappings[i].sfm_init_prot & VM_PROT_ALL,
+                                       mappings[i].sfm_max_prot & VM_PROT_ALL,
+                                       VM_INHERIT_DEFAULT);
+                       }
+               } else {
+                       object = VM_OBJECT_NULL; /* no anonymous memory here */
+                       kr = vm_map_enter_mem_object(
+                               sr_map,
+                               &target_address,
+                               vm_map_round_page(mappings[i].sfm_size),
+                               0,
+                               VM_FLAGS_FIXED | VM_FLAGS_ALREADY,
+                               map_port,
+                               mappings[i].sfm_file_offset,
+                               TRUE,
+                               mappings[i].sfm_init_prot & VM_PROT_ALL,
+                               mappings[i].sfm_max_prot & VM_PROT_ALL,
+                               VM_INHERIT_DEFAULT);
                }
 
                }
 
-               /* we're protected by "sr_mapping_in_progress" */
+               if (kr != KERN_SUCCESS) {
+                       if (map_port == MACH_PORT_NULL) {
+                               /*
+                                * Get rid of the VM object we just created
+                                * but failed to map.
+                                */
+                               vm_object_deallocate(object);
+                               object = VM_OBJECT_NULL;
+                       }
+                       if (kr == KERN_MEMORY_PRESENT) {
+                               /*
+                                * This exact mapping was already there:
+                                * that's fine.
+                                */
+                               SHARED_REGION_TRACE_INFO(
+                                       ("shared_region: mapping[%d]: "
+                                        "address:0x%016llx size:0x%016llx "
+                                        "offset:0x%016llx "
+                                        "maxprot:0x%x prot:0x%x "
+                                        "already mapped...\n",
+                                        i,
+                                        (long long)mappings[i].sfm_address,
+                                        (long long)mappings[i].sfm_size,
+                                        (long long)mappings[i].sfm_file_offset,
+                                        mappings[i].sfm_max_prot,
+                                        mappings[i].sfm_init_prot));
+                               /*
+                                * We didn't establish this mapping ourselves;
+                                * let's reset its size, so that we do not
+                                * attempt to undo it if an error occurs later.
+                                */
+                               mappings[i].sfm_size = 0;
+                               kr = KERN_SUCCESS;
+                       } else {
+                               unsigned int j;
+
+                               /* this mapping failed ! */
+                               SHARED_REGION_TRACE_ERROR(
+                                       ("shared_region: mapping[%d]: "
+                                        "address:0x%016llx size:0x%016llx "
+                                        "offset:0x%016llx "
+                                        "maxprot:0x%x prot:0x%x failed 0x%x\n",
+                                        i,
+                                        (long long)mappings[i].sfm_address,
+                                        (long long)mappings[i].sfm_size,
+                                        (long long)mappings[i].sfm_file_offset,
+                                        mappings[i].sfm_max_prot,
+                                        mappings[i].sfm_init_prot,
+                                        kr));
+
+                               /*
+                                * Undo the mappings we've established so far.
+                                */
+                               for (j = 0; j < i; j++) {
+                                       kern_return_t kr2;
+
+                                       if (mappings[j].sfm_size == 0) {
+                                               /*
+                                                * We didn't establish this
+                                                * mapping, so nothing to undo.
+                                                */
+                                               continue;
+                                       }
+                                       SHARED_REGION_TRACE_INFO(
+                                               ("shared_region: mapping[%d]: "
+                                                "address:0x%016llx "
+                                                "size:0x%016llx "
+                                                "offset:0x%016llx "
+                                                "maxprot:0x%x prot:0x%x: "
+                                                "undoing...\n",
+                                                j,
+                                                (long long)mappings[j].sfm_address,
+                                                (long long)mappings[j].sfm_size,
+                                                (long long)mappings[j].sfm_file_offset,
+                                                mappings[j].sfm_max_prot,
+                                                mappings[j].sfm_init_prot));
+                                       kr2 = mach_vm_deallocate(
+                                               sr_map,
+                                               (mappings[j].sfm_address -
+                                                sr_base_address),
+                                               mappings[j].sfm_size);
+                                       assert(kr2 == KERN_SUCCESS);
+                               }
+
+                               break;
+                       }
+
+               }
+
+               /*
+                * Record the first (chronologically) mapping in
+                * this shared region.
+                * We're protected by "sr_mapping_in_progress" here,
+                * so no need to lock "shared_region".
+                */
                if (shared_region->sr_first_mapping == (mach_vm_offset_t) -1) {
                        shared_region->sr_first_mapping = target_address;
                }
                if (shared_region->sr_first_mapping == (mach_vm_offset_t) -1) {
                        shared_region->sr_first_mapping = target_address;
                }
index d3fcf6d1f53e94a74044e7ab4eee01320142ed45..d692d5ae572c3c6608bfb1574d517c684b5f72ae 100644 (file)
@@ -55,7 +55,7 @@ ident         SECURITY
 # Note: MAC options must be set in both bsd/conf and security/conf MASTER files
 #
 options                KDEBUG          # kernel tracing                # <kdebug>
 # Note: MAC options must be set in both bsd/conf and security/conf MASTER files
 #
 options                KDEBUG          # kernel tracing                # <kdebug>
-options                AUDIT           # Security event auditing
+options                AUDIT           # Security event auditing       # <audit>
 options                CONFIG_LCTX     # Login Context
 
 options                CONFIG_DTRACE   # dtrace support        # <config_dtrace>
 options                CONFIG_LCTX     # Login Context
 
 options                CONFIG_DTRACE   # dtrace support        # <config_dtrace>
index 01b3a55d28ff511350ad398402247f563ca245f6..1bd4637655e1b47a9629fdd76fcee23fb09c4558 100644 (file)
@@ -1,16 +1,17 @@
 ######################################################################
 #
 ######################################################################
 #
-#  RELEASE     = [ intel mach libkerncpp config_dtrace ]
+#  RELEASE     = [ intel mach libkerncpp config_dtrace audit ]
 #  PROFILE     = [ RELEASE profile ]
 #  DEBUG       = [ RELEASE debug ]
 #
 #  PROFILE     = [ RELEASE profile ]
 #  DEBUG       = [ RELEASE debug ]
 #
-#  EMBEDDED    = [ intel mach libkerncpp ]
+#  EMBEDDED    = [ intel mach libkerncpp audit ]
 #  DEVELOPMENT = [ EMBEDDED config_dtrace ]
 #
 ######################################################################
 
 #
 #  DEVELOPMENT = [ EMBEDDED config_dtrace ]
 #
 ######################################################################
 
 #
-# Note: MAC options must be set in both bsd/conf and security/conf MASTER files
+# Note: MAC options must be set in all the bsd/conf, osfmk/conf, and 
+# security/conf MASTER files.
 #
 options                CONFIG_MACF                     # Mandatory Access Control Framework
 options                CONFIG_MACF_SOCKET_SUBSET       # MACF subset of socket support
 #
 options                CONFIG_MACF                     # Mandatory Access Control Framework
 options                CONFIG_MACF_SOCKET_SUBSET       # MACF subset of socket support
index 177301b38eec06a1e63b2533873a6e7eb26773b3..534e8d2fc5b4fc86cf8b0405203f2e4aa2946ffd 100644 (file)
@@ -4,7 +4,7 @@
 #  Standard Apple MacOS X Configurations:
 #  -------- ---- -------- ---------------
 #
 #  Standard Apple MacOS X Configurations:
 #  -------- ---- -------- ---------------
 #
-#  RELEASE = [ppc mach libkerncpp config_dtrace]
+#  RELEASE = [ppc mach libkerncpp config_dtrace audit]
 #  DEVELOPMENT = [RELEASE]
 #  PROFILE = [RELEASE]
 #  DEBUG = [RELEASE debug]
 #  DEVELOPMENT = [RELEASE]
 #  PROFILE = [RELEASE]
 #  DEBUG = [RELEASE debug]
@@ -14,8 +14,8 @@
 ######################################################################
 
 #
 ######################################################################
 
 #
-# Note: corresponding MACF options must be set in both security/conf
-#       bsd/conf and/or osfmk/conf MASTER files (depending upon the option)
+# Note: MAC options must be set in all the bsd/conf, osfmk/conf, and 
+# security/conf MASTER files.
 #
 options                CONFIG_MACF                     # Mandatory Access Control Framework
 options                CONFIG_MACF_SOCKET_SUBSET       # MACF subset of socket support
 #
 options                CONFIG_MACF                     # Mandatory Access Control Framework
 options                CONFIG_MACF_SOCKET_SUBSET       # MACF subset of socket support
index b5a57e1580a12059316f3caf36064d8423f61ea5..f697e624e3c42fac682141742739158cf2fea0ac 100644 (file)
@@ -26,7 +26,7 @@ include $(MakeInc_def)
 #
 # XXX: CFLAGS
 #
 #
 # XXX: CFLAGS
 #
-CFLAGS+= -DKERNEL -DBSD_KERNEL_PRIVATE \
+CFLAGS+= -I. -imacros meta_features.h -DKERNEL -DBSD_KERNEL_PRIVATE \
        -Wall -Wno-four-char-constants -fno-common
 
 #
        -Wall -Wno-four-char-constants -fno-common
 
 #
index c0565103dd237692f9a6415c0b346330eb2bd106..bea378a45e20eb81387faa9ad816edac5d728197 100644 (file)
@@ -1,6 +1,12 @@
 # options
 
 # OPTIONS/kdebug                        optional kdebug
 # options
 
 # OPTIONS/kdebug                        optional kdebug
+OPTIONS/audit                          optional audit
+OPTIONS/config_macf                    optional config_macf
+OPTIONS/config_macf_socket_subset      optional config_macf_socket_subset
+OPTIONS/config_macf_socket             optional config_macf_socket
+OPTIONS/config_macf_net                        optional config_macf_net
+
 # security
 
 security/mac_alloc.c                                   optional config_macf
 # security
 
 security/mac_alloc.c                                   optional config_macf
index cb61c1912c228dc2e6be11c8acff2f7a2114bc05..286b6ad5a1344fc022cb4105c44a28ee7ee7d442 100644 (file)
@@ -74,7 +74,7 @@
 #include <kern/kalloc.h>
 #include <kern/zalloc.h>
 
 #include <kern/kalloc.h>
 #include <kern/zalloc.h>
 
-#ifdef AUDIT
+#if AUDIT
 
 /* The zone allocator is initialized in mac_base.c. */
 zone_t mac_audit_data_zone;
 
 /* The zone allocator is initialized in mac_base.c. */
 zone_t mac_audit_data_zone;
@@ -395,4 +395,10 @@ mac_audit(int len, u_char *data)
 
        return (0);
 }
 
        return (0);
 }
+
+int
+mac_audit_text(__unused char *text, __unused mac_policy_handle_t handle)
+{
+       return (0);
+}
 #endif /* !AUDIT */
 #endif /* !AUDIT */
index 37c9d05af09e5d26b66dd5d3967d30a31e2e3c8a..b659481314d7eec4bee46c912af821c88152fb0f 100644 (file)
@@ -248,12 +248,14 @@ SYSCTL_UINT(_security_mac, OID_AUTO, label_mbufs, CTLFLAG_RW,
        &mac_label_mbufs, 0, "Label all MBUFs");
 #endif
 
        &mac_label_mbufs, 0, "Label all MBUFs");
 #endif
 
+#if AUDIT
 /*
  * mac_audit_data_zone is the zone used for data pushed into the audit
  * record by policies. Using a zone simplifies memory management of this
  * data, and allows tracking of the amount of data in flight.
  */
 extern zone_t mac_audit_data_zone;
 /*
  * mac_audit_data_zone is the zone used for data pushed into the audit
  * record by policies. Using a zone simplifies memory management of this
  * data, and allows tracking of the amount of data in flight.
  */
 extern zone_t mac_audit_data_zone;
+#endif
 
 /*
  * mac_policy_list holds the list of policy modules.  Modules with a
 
 /*
  * mac_policy_list holds the list of policy modules.  Modules with a
@@ -540,9 +542,11 @@ mac_policy_initbsd(void)
        struct mac_policy_conf *mpc;
        u_int i;
 
        struct mac_policy_conf *mpc;
        u_int i;
 
+#if AUDIT
        mac_audit_data_zone = zinit(MAC_AUDIT_DATA_LIMIT,
                                    AQ_HIWATER * MAC_AUDIT_DATA_LIMIT,
                                    8192, "mac_audit_data_zone");
        mac_audit_data_zone = zinit(MAC_AUDIT_DATA_LIMIT,
                                    AQ_HIWATER * MAC_AUDIT_DATA_LIMIT,
                                    8192, "mac_audit_data_zone");
+#endif
 
        printf("MAC Framework successfully initialized\n");
 
 
        printf("MAC Framework successfully initialized\n");
 
index ad0db0e630b162d1a39e5bcbf789a63b79161d73..4bd3f0e4ab9092d2955d3a5f2fe61d662d7d5e13 100644 (file)
@@ -4577,7 +4577,7 @@ int aio_tests( void * the_argp )
        
        my_aiocbp = &my_aiocbs[ 0 ];
     my_aiocbp->aio_fildes = my_fd_list[ 0 ];
        
        my_aiocbp = &my_aiocbs[ 0 ];
     my_aiocbp->aio_fildes = my_fd_list[ 0 ];
-       my_aiocbp->aio_offset = 0;
+       my_aiocbp->aio_offset = 4096;
        my_aiocbp->aio_buf = my_buffers[ 0 ];
     my_aiocbp->aio_nbytes = AIO_TESTS_BUFFER_SIZE;
     my_aiocbp->aio_reqprio = 0;
        my_aiocbp->aio_buf = my_buffers[ 0 ];
     my_aiocbp->aio_nbytes = AIO_TESTS_BUFFER_SIZE;
     my_aiocbp->aio_reqprio = 0;