]> git.saurik.com Git - apple/xnu.git/commitdiff
xnu-1699.26.8.tar.gz mac-os-x-1074 v1699.26.8
authorApple <opensource@apple.com>
Thu, 10 May 2012 15:24:53 +0000 (15:24 +0000)
committerApple <opensource@apple.com>
Thu, 10 May 2012 15:24:53 +0000 (15:24 +0000)
92 files changed:
bsd/dev/i386/sysctl.c
bsd/hfs/hfs_readwrite.c
bsd/hfs/hfscommon/Misc/FileExtentMapping.c
bsd/kern/kern_fork.c
bsd/kern/kern_panicinfo.c
bsd/kern/kern_proc.c
bsd/kern/kern_symfile.c
bsd/kern/netboot.c
bsd/kern/pthread_support.c
bsd/kern/trace.codes
bsd/miscfs/specfs/spec_vnops.c
bsd/miscfs/specfs/specdev.h
bsd/net/dlil.c
bsd/net/if.c
bsd/net/if_var.h
bsd/nfs/nfs_vfsops.c
bsd/sys/buf.h
bsd/sys/buf_internal.h
bsd/sys/disk.h
bsd/sys/kernel_types.h
bsd/sys/sysctl.h
bsd/vfs/vfs_bio.c
bsd/vfs/vfs_fsevents.c
bsd/vfs/vfs_subr.c
config/MasterVersion
config/Private.exports
config/Private.i386.exports
config/Private.x86_64.exports
config/Unsupported.i386.exports
config/Unsupported.x86_64.exports
iokit/IOKit/IOHibernatePrivate.h
iokit/IOKit/IOService.h
iokit/IOKit/pwr_mgt/IOPM.h
iokit/IOKit/pwr_mgt/IOPMLibDefs.h
iokit/IOKit/pwr_mgt/IOPMPrivate.h
iokit/IOKit/pwr_mgt/RootDomain.h
iokit/Kernel/IOHibernateIO.cpp
iokit/Kernel/IOHibernateRestoreKernel.c
iokit/Kernel/IOLib.cpp
iokit/Kernel/IOMemoryDescriptor.cpp
iokit/Kernel/IOPMrootDomain.cpp
iokit/Kernel/IOServicePM.cpp
iokit/Kernel/IOServicePMPrivate.h
iokit/Kernel/RootDomainUserClient.cpp
libkern/c++/OSKext.cpp
libkern/libkern/OSAtomic.h
osfmk/console/video_console.c
osfmk/i386/AT386/model_dep.c
osfmk/i386/commpage/commpage.c
osfmk/i386/cpu_capabilities.h
osfmk/i386/cpu_threads.c
osfmk/i386/cpu_threads.h
osfmk/i386/cpu_topology.c
osfmk/i386/cpuid.c
osfmk/i386/cpuid.h
osfmk/i386/fpu.c
osfmk/i386/hibernate_restore.c
osfmk/i386/i386_init.c
osfmk/i386/i386_vm_init.c
osfmk/i386/locks_i386.c
osfmk/i386/misc_protos.h
osfmk/i386/mp.c
osfmk/i386/pal_hibernate.h
osfmk/i386/phys.c
osfmk/i386/pmCPU.c
osfmk/i386/pmap.c
osfmk/i386/pmap.h
osfmk/i386/pmap_common.c
osfmk/i386/pmap_x86_common.c
osfmk/i386/proc_reg.h
osfmk/i386/trap.c
osfmk/ipc/ipc_init.c
osfmk/kdp/kdp.h
osfmk/kdp/kdp_udp.c
osfmk/kdp/ml/i386/kdp_x86_common.c
osfmk/kern/debug.c
osfmk/kern/startup.c
osfmk/kern/thread.c
osfmk/kern/zalloc.c
osfmk/kern/zalloc.h
osfmk/mach/i386/thread_state.h
osfmk/vm/vm_init.h
osfmk/vm/vm_map.c
osfmk/vm/vm_map.h
osfmk/vm/vm_pageout.c
osfmk/vm/vm_resident.c
osfmk/x86_64/idt64.s
osfmk/x86_64/loose_ends.c
osfmk/x86_64/pmap.c
pexpert/i386/pe_init.c
pexpert/i386/pe_kprintf.c
pexpert/pexpert/i386/boot.h

index ba3bfc1eeecc34612410a08a1d2810a8f3c954b6..cb15eb632e49521e4ecf3b1f326afcfed347ea3e 100644 (file)
@@ -440,7 +440,6 @@ SYSCTL_PROC(_machdep_cpu_thermal, OID_AUTO, energy_policy,
            sizeof(boolean_t),
            cpu_thermal, "I", "Energy Efficient Policy Support");
 
-
 SYSCTL_NODE(_machdep_cpu, OID_AUTO, xsave, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
        "xsave");
 
index 7bf65093cabbb49ff18559d50f427af06b392df6..63acbac05944fc5edbf0d3f27a2d6b9974393477 100644 (file)
@@ -259,6 +259,7 @@ hfs_vnop_write(struct vnop_write_args *ap)
        int do_snapshot = 1;
        time_t orig_ctime=VTOC(vp)->c_ctime;
        int took_truncate_lock = 0;
+       struct rl_entry *invalid_range;
 
 #if HFS_COMPRESSION
        if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
@@ -328,7 +329,14 @@ hfs_vnop_write(struct vnop_write_args *ap)
 
 again:
        /* Protect against a size change. */
-       if (ioflag & IO_APPEND) {
+       /*
+        * Protect against a size change.
+        *
+        * Note: If took_truncate_lock is true, then we previously got the lock shared
+        * but needed to upgrade to exclusive.  So try getting it exclusive from the
+        * start.
+        */
+       if (ioflag & IO_APPEND || took_truncate_lock) {
                hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
        }       
        else {
@@ -350,17 +358,42 @@ again:
        writelimit = offset + resid;
        filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 
-       /* If the truncate lock is shared, and if we either have virtual 
-        * blocks or will need to extend the file, upgrade the truncate 
-        * to exclusive lock.  If upgrade fails, we lose the lock and 
-        * have to get exclusive lock again.  Note that we want to
-        * grab the truncate lock exclusive even if we're not allocating new blocks
-        * because we could still be growing past the LEOF.
+       /*
+        * We may need an exclusive truncate lock for several reasons, all
+        * of which are because we may be writing to a (portion of a) block
+        * for the first time, and we need to make sure no readers see the
+        * prior, uninitialized contents of the block.  The cases are:
+        *
+        * 1. We have unallocated (delayed allocation) blocks.  We may be
+        *    allocating new blocks to the file and writing to them.
+        *    (A more precise check would be whether the range we're writing
+        *    to contains delayed allocation blocks.)
+        * 2. We need to extend the file.  The bytes between the old EOF
+        *    and the new EOF are not yet initialized.  This is important
+        *    even if we're not allocating new blocks to the file.  If the
+        *    old EOF and new EOF are in the same block, we still need to
+        *    protect that range of bytes until they are written for the
+        *    first time.
+        * 3. The write overlaps some invalid ranges (delayed zero fill; that
+        *    part of the file has been allocated, but not yet written).
+        *
+        * If we had a shared lock with the above cases, we need to try to upgrade
+        * to an exclusive lock.  If the upgrade fails, we will lose the shared
+        * lock, and will need to take the truncate lock again; the took_truncate_lock
+        * flag will still be set, causing us to try for an exclusive lock next time.
+        *
+        * NOTE: Testing for #3 (delayed zero fill) needs to be done while the cnode
+        * lock is held, since it protects the range lists.
         */
        if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) &&
-           ((fp->ff_unallocblocks != 0) || (writelimit > origFileSize))) {
-               /* Lock upgrade failed and we lost our shared lock, try again */
+           ((fp->ff_unallocblocks != 0) ||
+            (writelimit > origFileSize))) {
                if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
+                       /*
+                        * Lock upgrade failed and we lost our shared lock, try again.
+                        * Note: we do not set took_truncate_lock=0 here.  Leaving it
+                        * set to 1 will cause us to try to get the lock exclusive.
+                        */
                        goto again;
                } 
                else {
@@ -374,11 +407,28 @@ again:
        }
        cnode_locked = 1;
        
-       if (cp->c_truncatelockowner == HFS_SHARED_OWNER) {
-               KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
-                            (int)offset, uio_resid(uio), (int)fp->ff_size,
-                            (int)filebytes, 0);
+       /*
+        * Now that we have the cnode lock, see if there are delayed zero fill ranges
+        * overlapping our write.  If so, we need the truncate lock exclusive (see above).
+        */
+       if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) &&
+           (rl_scan(&fp->ff_invalidranges, offset, writelimit-1, &invalid_range) != RL_NOOVERLAP)) {
+               /*
+                * When testing, it appeared that calling lck_rw_lock_shared_to_exclusive() causes
+                * a deadlock, rather than simply returning failure.  (That is, it apparently does
+                * not behave like a "try_lock").  Since this condition is rare, just drop the
+                * cnode lock and try again.  Since took_truncate_lock is set, we will
+                * automatically take the truncate lock exclusive.
+                */
+               hfs_unlock(cp);
+               cnode_locked = 0;
+               hfs_unlock_truncate(cp, 0);
+               goto again;
        }
+       
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
+                    (int)offset, uio_resid(uio), (int)fp->ff_size,
+                    (int)filebytes, 0);
 
        /* Check if we do not need to extend the file */
        if (writelimit <= filebytes) {
@@ -452,7 +502,6 @@ sizeok:
                off_t inval_end;
                off_t io_start;
                int lflag;
-               struct rl_entry *invalid_range;
 
                if (writelimit > fp->ff_size)
                        filesize = writelimit;
@@ -1966,85 +2015,7 @@ fail_change_next_allocation:
 
        case F_READBOOTSTRAP:
        case F_WRITEBOOTSTRAP:
-       {
-           struct vnode *devvp = NULL;
-           user_fbootstraptransfer_t *user_bootstrapp;
-           int devBlockSize;
-           int error;
-           uio_t auio;
-           daddr64_t blockNumber;
-           u_int32_t blockOffset;
-           u_int32_t xfersize;
-           struct buf *bp;
-           user_fbootstraptransfer_t user_bootstrap;
-
-               if (!vnode_isvroot(vp))
-                       return (EINVAL);
-               /* LP64 - when caller is a 64 bit process then we are passed a pointer 
-                * to a user_fbootstraptransfer_t else we get a pointer to a 
-                * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
-                */
-               if ((hfsmp->hfs_flags & HFS_READ_ONLY)
-                       && (ap->a_command == F_WRITEBOOTSTRAP)) {
-                       return (EROFS);
-               }
-               if (is64bit) {
-                       user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
-               }
-               else {
-                       user32_fbootstraptransfer_t *bootstrapp = (user32_fbootstraptransfer_t *)ap->a_data;
-                       user_bootstrapp = &user_bootstrap;
-                       user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
-                       user_bootstrap.fbt_length = bootstrapp->fbt_length;
-                       user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
-               }
-
-               if ((user_bootstrapp->fbt_offset < 0) || (user_bootstrapp->fbt_offset > 1024) || 
-                               (user_bootstrapp->fbt_length > 1024)) {
-                       return EINVAL;
-               }
-
-               if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024) 
-                       return EINVAL;
-           
-               devvp = VTOHFS(vp)->hfs_devvp;
-               auio = uio_create(1, user_bootstrapp->fbt_offset, 
-                                                 is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
-                                                 (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
-               uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
-
-           devBlockSize = vfs_devblocksize(vnode_mount(vp));
-
-           while (uio_resid(auio) > 0) {
-                       blockNumber = uio_offset(auio) / devBlockSize;
-                       error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
-                       if (error) {
-                               if (bp) buf_brelse(bp);
-                               uio_free(auio);
-                               return error;
-                       };
-
-                       blockOffset = uio_offset(auio) % devBlockSize;
-                       xfersize = devBlockSize - blockOffset;
-                       error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
-                       if (error) {
-                               buf_brelse(bp);
-                               uio_free(auio);
-                               return error;
-                       };
-                       if (uio_rw(auio) == UIO_WRITE) {
-                               error = VNOP_BWRITE(bp);
-                               if (error) {
-                                       uio_free(auio);
-                       return error;
-                               }
-                       } else {
-                               buf_brelse(bp);
-                       };
-               };
-               uio_free(auio);
-       };
-       return 0;
+               return 0;
 
        case _IOC(IOC_OUT,'h', 4, 0):     /* Create date in local time */
        {
index ec9881da8883bc81a091151d9851db71659d8298..998f97fa930b04bf808cd011b091c16ec0b80988 100644 (file)
@@ -497,6 +497,7 @@ OSErr MapFileBlockC (
        //
        //      Determine the end of the available space.  It will either be the end of the extent,
        //      or the file's PEOF, whichever is smaller.
+       
        //
        dataEnd = (off_t)((off_t)(nextFABN) * (off_t)(allocBlockSize));   // Assume valid data through end of this extent
        if (((off_t)fcb->ff_blocks * (off_t)allocBlockSize) < dataEnd)    // Is PEOF shorter?
@@ -529,6 +530,12 @@ OSErr MapFileBlockC (
        if (availableBytes)
        {
                tmpOff = dataEnd - offset;
+               /*
+                * Disallow negative runs.
+                */
+               if (tmpOff <= 0) {
+                       return EINVAL;
+               }
                if (tmpOff > (off_t)(numberOfBytes))
                        *availableBytes = numberOfBytes;  // more there than they asked for, so pin the output
                else
index 7746398bf7ec742b7d380318037451eeb0b61645..76c1fbae69d1d970f223040c9bddb289959f4a33 100644 (file)
@@ -1373,8 +1373,6 @@ uthread_zone_init(void)
                                        THREAD_CHUNK * sizeof(struct uthread),
                                        "uthreads");
                uthread_zone_inited = 1;
-
-               zone_change(uthread_zone, Z_NOENCRYPT, TRUE);
        }
 }
 
index 1a949de7b6baf08cfec977e407b06241f3dec4fb..eb5c5bfbd912d75efacc888b1d419dfbde53f4a8 100644 (file)
@@ -43,6 +43,7 @@
 
 /* prototypes not exported by osfmk/console. */
 extern void panic_dialog_test( void );
+extern void noroot_icon_test(void);
 extern int  panic_dialog_set_image( const unsigned char * ptr, unsigned int size );
 extern void panic_dialog_get_image( unsigned char ** ptr, unsigned int * size );
 
@@ -51,7 +52,6 @@ static int sysctl_dopanicinfo SYSCTL_HANDLER_ARGS;
 
 
 #define PANIC_IMAGE_SIZE_LIMIT (32 * 4096)                             /* 128K - Maximum amount of memory consumed for the panic UI */
-#define KERN_PANICINFO_TEST    (KERN_PANICINFO_IMAGE+2)                /* Allow the panic UI to be tested by root without causing a panic */
 
 /* Local data */
 static int image_size_limit = PANIC_IMAGE_SIZE_LIMIT;
@@ -92,6 +92,12 @@ sysctl_dopanicinfo SYSCTL_HANDLER_ARGS
                panic_dialog_test();
                break;
 
+       case KERN_PANICINFO_NOROOT_TEST:
+               printf("Testing noroot icon \n");
+
+               noroot_icon_test();
+               break;
+
        case KERN_PANICINFO_MAXSIZE:
 
                /* return the image size limits */
index 042a3a8641146c13dd37394a578e26251a4a71e7..f352a55bf34beaf2561d88e1b3e2c8239ff4daad 100644 (file)
@@ -2679,9 +2679,11 @@ cs_invalid_page(
        if (p->p_csflags & CS_KILL) {
                p->p_csflags |= CS_KILLED;
                proc_unlock(p);
-               printf("CODE SIGNING: cs_invalid_page(0x%llx): "
-                      "p=%d[%s] honoring CS_KILL, final status 0x%x\n",
-                      vaddr, p->p_pid, p->p_comm, p->p_csflags);
+               if (cs_debug) {
+                       printf("CODE SIGNING: cs_invalid_page(0x%llx): "
+                              "p=%d[%s] honoring CS_KILL, final status 0x%x\n",
+                              vaddr, p->p_pid, p->p_comm, p->p_csflags);
+               }
                cs_procs_killed++;
                psignal(p, SIGKILL);
                proc_lock(p);
@@ -2690,9 +2692,11 @@ cs_invalid_page(
        /* CS_HARD means fail the mapping operation so the process stays valid. */
        if (p->p_csflags & CS_HARD) {
                proc_unlock(p);
-               printf("CODE SIGNING: cs_invalid_page(0x%llx): "
-                      "p=%d[%s] honoring CS_HARD\n",
-                      vaddr, p->p_pid, p->p_comm);
+               if (cs_debug) {
+                       printf("CODE SIGNING: cs_invalid_page(0x%llx): "
+                              "p=%d[%s] honoring CS_HARD\n",
+                              vaddr, p->p_pid, p->p_comm);
+               }
                retval = 1;
        } else {
                if (p->p_csflags & CS_VALID) {
index dc6531b4295df1b5ecbbcd89d7a69664af8d42ae..b1db73f0c270fefaed3a5699654f32dd11de048a 100644 (file)
@@ -65,6 +65,8 @@
  * export, as there are no internal consumers.
  */
 int
+get_kernel_symfile(__unused proc_t p, __unused char const **symfile);
+int
 get_kernel_symfile(__unused proc_t p, __unused char const **symfile)
 {
     return KERN_FAILURE;
@@ -75,6 +77,8 @@ struct kern_direct_file_io_ref_t
     vfs_context_t  ctx;
     struct vnode * vp;
     dev_t          device;
+    uint32_t      blksize;
+    off_t                 filelength;
 };
 
 
@@ -91,6 +95,11 @@ static int device_ioctl(void * p1, __unused void * p2, u_long theIoctl, caddr_t
     return (VNOP_IOCTL(p1, theIoctl, result, 0, p2));
 }
 
+void
+kern_unmap_file(struct kern_direct_file_io_ref_t * ref, off_t f_offset, off_t end);
+int
+kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t addr, vm_size_t len);
+
 struct kern_direct_file_io_ref_t *
 kern_open_file_for_direct_io(const char * name, 
                             kern_get_file_extents_callback_t callback, 
@@ -110,7 +119,6 @@ kern_open_file_for_direct_io(const char * name,
     struct vnode_attr          va;
     int                                error;
     off_t                      f_offset;
-    off_t                      filelength;
     uint64_t                    fileblk;
     size_t                      filechunk;
     uint64_t                    physoffset;
@@ -198,22 +206,22 @@ kern_open_file_for_direct_io(const char * name,
 
     // get block size
 
-    error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &blksize);
+    error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &ref->blksize);
     if (error)
         goto out;
 
     if (ref->vp->v_type == VREG)
-        filelength = va.va_data_size;
+        ref->filelength = va.va_data_size;
     else
     {
         error = do_ioctl(p1, p2, DKIOCGETBLOCKCOUNT, (caddr_t) &fileblk);
         if (error)
             goto out;
-       filelength = fileblk * blksize;    
+       ref->filelength = fileblk * ref->blksize;    
     }
 
     f_offset = 0;
-    while (f_offset < filelength) 
+    while (f_offset < ref->filelength) 
     {
         if (ref->vp->v_type == VREG)
         {
@@ -224,12 +232,12 @@ kern_open_file_for_direct_io(const char * name,
             if (error)
                 goto out;
 
-            fileblk = blkno * blksize;
+            fileblk = blkno * ref->blksize;
         }
         else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
         {
             fileblk = f_offset;
-            filechunk = f_offset ? 0 : filelength;
+            filechunk = f_offset ? 0 : ref->filelength;
         }
 
         physoffset = 0;
@@ -362,9 +370,65 @@ kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t ad
                        vfs_context_proc(ref->ctx)));
 }
 
+void
+kern_unmap_file(struct kern_direct_file_io_ref_t * ref, off_t offset, off_t end)
+{
+    int error;
+       int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result);
+       void * p1;
+       void * p2;
+       dk_extent_t extent;
+       dk_unmap_t  unmap;
+    uint64_t    fileblk;
+    size_t      filechunk;
+
+       bzero(&extent, sizeof(dk_extent_t));
+       bzero(&unmap, sizeof(dk_unmap_t));
+       if (ref->vp->v_type == VREG)
+       {
+               p1 = &ref->device;
+               p2 = kernproc;
+               do_ioctl = &file_ioctl;
+       }
+       else
+       {
+               /* Partition. */
+               p1 = ref->vp;
+               p2 = ref->ctx;
+               do_ioctl = &device_ioctl;
+       }
+    while (offset < end) 
+    {
+        if (ref->vp->v_type == VREG)
+        {
+            daddr64_t blkno;
+                       filechunk = 1*1024*1024*1024;
+                       if (filechunk > (size_t)(end - offset))
+                               filechunk = (size_t)(end - offset);
+            error = VNOP_BLOCKMAP(ref->vp, offset, filechunk, &blkno, &filechunk, NULL, 0, NULL);
+                       if (error) break;
+            fileblk = blkno * ref->blksize;
+        }
+        else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
+        {
+            fileblk = offset;
+            filechunk = ref->filelength;
+        }
+               extent.offset = fileblk;
+               extent.length = filechunk;
+               unmap.extents = &extent;
+               unmap.extentsCount = 1;
+        error = do_ioctl(p1, p2, DKIOCUNMAP, (caddr_t)&unmap);
+//             kprintf("DKIOCUNMAP(%d) 0x%qx, 0x%qx\n", error, extent.offset, extent.length);
+               if (error) break;
+        offset += filechunk;
+    }
+}
+
 void
 kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref,
-                             off_t offset, caddr_t addr, vm_size_t len)
+                             off_t write_offset, caddr_t addr, vm_size_t write_length,
+                             off_t discard_offset, off_t discard_end)
 {
     int error;
     kprintf("kern_close_file_for_direct_io\n");
@@ -392,9 +456,13 @@ kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref,
         }
         (void) do_ioctl(p1, p2, DKIOCUNLOCKPHYSICALEXTENTS, NULL);
         
-        if (addr && len)
+        if (addr && write_length)
+        {
+            (void) kern_write_file(ref, write_offset, addr, write_length);
+        }
+        if (discard_offset && discard_end)
         {
-            (void) kern_write_file(ref, offset, addr, len);
+            (void) kern_unmap_file(ref, discard_offset, discard_end);
         }
 
         error = vnode_close(ref->vp, FWRITE, ref->ctx);
index 664f03ef7f2a033cd34eb1af3f7355a0d8adba7f..1eb975ed27c003794cfa84bf603a0c8a0959d6be 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2001-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -591,6 +591,7 @@ find_interface(void)
 {
     struct ifnet *             ifp = NULL;
 
+    dlil_if_lock();
     if (rootdevice[0]) {
                ifp = ifunit((char *)rootdevice);
     }
@@ -601,6 +602,7 @@ find_interface(void)
                                break;
                ifnet_head_done();
     }
+    dlil_if_unlock();
     return (ifp);
 }
 
index e5626dfa2dc0cfd8d339d2f62239e4a430cf2191..bcb0b0997cce9e43d42a541d0f6a911abdc0756a 100644 (file)
@@ -478,7 +478,7 @@ extern int ksyn_findobj(uint64_t mutex, uint64_t * object, uint64_t * offset);
 static void UPDATE_CVKWQ(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, uint64_t tid, int wqtype);
 extern thread_t port_name_to_thread(mach_port_name_t port_name);
 
-int ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, ksyn_waitq_element_t kwe, int log);
+kern_return_t ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, ksyn_waitq_element_t kwe, int log, thread_continue_t, void * parameter);
 kern_return_t ksyn_wakeup_thread(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe);
 void ksyn_freeallkwe(ksyn_queue_t kq);
 
@@ -503,6 +503,8 @@ void ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *update
 void ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatep, ksyn_queue_t kfreeq, int release);
 ksyn_waitq_element_t ksyn_queue_find_signalseq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t toseq, uint32_t lockseq);
 ksyn_waitq_element_t ksyn_queue_find_threadseq(ksyn_wait_queue_t ckwq, ksyn_queue_t kq, thread_t th, uint32_t toseq);
+void psynch_cvcontinue(void *, wait_result_t);
+void psynch_mtxcontinue(void *, wait_result_t);
 
 int ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int longreadset, int allreaders, uint32_t updatebits, int * wokenp);
 int kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int * type, uint32_t lowest[]);
@@ -762,6 +764,7 @@ psynch_mutexwait(__unused proc_t p, struct psynch_mutexwait_args * uap, uint32_t
        int firstfit = flags & _PTHREAD_MUTEX_POLICY_FIRSTFIT;
        uint32_t lockseq, updatebits=0;
        ksyn_waitq_element_t kwe;
+       kern_return_t kret;
 
 #if _PSYNCH_TRACE_
        __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_START, (uint32_t)mutex, mgen, ugen, flags, 0);
@@ -862,14 +865,50 @@ psynch_mutexwait(__unused proc_t p, struct psynch_mutexwait_args * uap, uint32_t
                goto out;
        }
        
-       error = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0);
-               /* drops the wq lock */
+       kret = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0, psynch_mtxcontinue, (void *)kwq);
+
+       psynch_mtxcontinue((void *)kwq, kret);
+
+       /* not expected to return from unix_syscall_return */
+       panic("psynch_mtxcontinue returned from unix_syscall_return");
+
+out:
+       ksyn_wqrelease(kwq, NULL, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX)); 
+#if _PSYNCH_TRACE_
+       __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_END, (uint32_t)mutex, 0xeeeeeeed, updatebits, error, 0);
+#endif /* _PSYNCH_TRACE_ */
+
+       return(error);
+}
+
+void 
+psynch_mtxcontinue(void * parameter, wait_result_t result)
+{
+       int error = 0;
+       uint32_t updatebits = 0;
+       uthread_t uth = current_uthread();
+       ksyn_wait_queue_t kwq = (ksyn_wait_queue_t)parameter;
+       ksyn_waitq_element_t kwe;
+
+       kwe = &uth->uu_kwe;
+
+       switch (result) {
+               case THREAD_TIMED_OUT:
+                       error  = ETIMEDOUT;
+                       break;
+               case THREAD_INTERRUPTED:
+                       error  = EINTR;
+                       break;
+               default:
+                       error = 0;
+                       break;
+       }
 
        if (error != 0) {
                ksyn_wqlock(kwq);
                
 #if _PSYNCH_TRACE_
-               __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_NONE, (uint32_t)mutex, 3, 0xdeadbeef, error, 0);
+               __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 3, 0xdeadbeef, error, 0);
 #endif /* _PSYNCH_TRACE_ */
                if (kwe->kwe_kwqqueue != NULL)
                        ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe);
@@ -877,18 +916,17 @@ psynch_mutexwait(__unused proc_t p, struct psynch_mutexwait_args * uap, uint32_t
        } else {
                updatebits = kwe->kwe_psynchretval;
                updatebits &= ~PTH_RWL_MTX_WAIT;
-               *retval = updatebits;
+               uth->uu_rval[0] = updatebits;
 
                if (updatebits == 0)
                        __FAILEDUSERTEST__("psynch_mutexwait: returning 0 lseq  in mutexwait with no EBIT \n");
        }
-out:
        ksyn_wqrelease(kwq, NULL, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX)); 
 #if _PSYNCH_TRACE_
-       __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_END, (uint32_t)mutex, 0xeeeeeeed, updatebits, error, 0);
+       __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_END, (uint32_t)kwq->kw_addr, 0xeeeeeeed, updatebits, error, 0);
 #endif /* _PSYNCH_TRACE_ */
 
-       return(error);
+       unix_syscall_return(error);
 }
 
 /*
@@ -1205,10 +1243,7 @@ psynch_cvwait(__unused proc_t p, struct psynch_cvwait_args * uap, uint32_t * ret
        uthread_t uth;
        ksyn_waitq_element_t kwe, nkwe = NULL;
        struct ksyn_queue  *kq, kfreeq;
-#if __TESTPANICS__
-       //int timeoutval = 3;           /* 3 secs */
-       //u_int64_t ntime = 0;
-#endif /* __TESTPANICS__ */
+       kern_return_t kret;
        
        /* for conformance reasons */
        __pthread_testcancel(0);
@@ -1243,9 +1278,6 @@ psynch_cvwait(__unused proc_t p, struct psynch_cvwait_args * uap, uint32_t * ret
                return(error);
        }
        
-#if __TESTPANICS__
-       //clock_interval_to_deadline(timeoutval, NSEC_PER_SEC, &ntime);
-#endif /* __TESTPANICS__ */
 
        if (mutex != (user_addr_t)0) {
                error = ksyn_wqfind(mutex, mgen, ugen, 0, 0, flags, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX), &kwq);
@@ -1367,20 +1399,53 @@ psynch_cvwait(__unused proc_t p, struct psynch_cvwait_args * uap, uint32_t * ret
                goto out;
        }
 
-#if 0 /* __TESTPANICS__ */
-       /* if no timeout  is passed, set 5 secs timeout to catch hangs */
-       error = ksyn_block_thread_locked(ckwq, (abstime == 0) ? ntime : abstime, kwe, 1);
-#else
-       error = ksyn_block_thread_locked(ckwq, abstime, kwe, 1);
-#endif /* __TESTPANICS__ */
+       kret = ksyn_block_thread_locked(ckwq, abstime, kwe, 1, psynch_cvcontinue, (void *)ckwq);
        /* lock dropped */
 
-       
+       psynch_cvcontinue(ckwq, kret);  
+       /* not expected to return from unix_syscall_return */
+       panic("psynch_cvcontinue returned from unix_syscall_return");
+
+out:
+#if _PSYNCH_TRACE_
+       __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_END, (uint32_t)cond, 0xeeeeeeed, (uint32_t)*retval, local_error, 0);
+#endif /* _PSYNCH_TRACE_ */
+       ksyn_wqrelease(ckwq, NULL, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
+       return(local_error);
+}
+
+
+void 
+psynch_cvcontinue(void * parameter, wait_result_t result)
+{
+       int error = 0, local_error = 0;
+       uthread_t uth = current_uthread();
+       ksyn_wait_queue_t ckwq = (ksyn_wait_queue_t)parameter;
+       ksyn_waitq_element_t kwe;
+       struct ksyn_queue  kfreeq;
+
+       switch (result) {
+               case THREAD_TIMED_OUT:
+                       error  = ETIMEDOUT;
+                       break;
+               case THREAD_INTERRUPTED:
+                       error  = EINTR;
+                       break;
+               default:
+                       error = 0;
+                       break;
+       }
+#if _PSYNCH_TRACE_
+               __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_THWAKEUP | DBG_FUNC_NONE, 0xf4f3f2f1, (uintptr_t)uth, result, 0, 0);
+#endif /* _PSYNCH_TRACE_ */
+
        local_error = error;
+       kwe = &uth->uu_kwe;
+
        if (error != 0) {
                ksyn_wqlock(ckwq);
                /* just in case it got woken up as we were granting */
-               *retval = kwe->kwe_psynchretval;
+               uth->uu_rval[0] = kwe->kwe_psynchretval;
 
 #if __TESTPANICS__
                if ((kwe->kwe_kwqqueue != NULL) && (kwe->kwe_kwqqueue != ckwq))
@@ -1394,31 +1459,28 @@ psynch_cvwait(__unused proc_t p, struct psynch_cvwait_args * uap, uint32_t * ret
                        kwe->kwe_kwqqueue = NULL;
                }
                if ((kwe->kwe_psynchretval & PTH_RWL_MTX_WAIT) != 0) {
-               /* the condition var granted.
+                       /* the condition var granted.
                         * reset the error so that the thread returns back.
                         */
                        local_error = 0;
                        /* no need to set any bits just return as cvsig/broad covers this */
                        ksyn_wqunlock(ckwq);
-                       *retval = 0;
                        goto out;
                }
 
                ckwq->kw_sword += PTHRW_INC;
        
-               /* set C and P bits, in the local error as well as updatebits */
+               /* set C and P bits, in the local error */
                if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
-                       updatebits |= PTH_RWS_CV_CBIT;
                        local_error |= ECVCERORR;
                        if (ckwq->kw_inqueue != 0) {
-                               (void)ksyn_queue_move_tofree(ckwq, kq, (ckwq->kw_lword & PTHRW_COUNT_MASK), &kfreeq, 1, 1);
+                               (void)ksyn_queue_move_tofree(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], (ckwq->kw_lword & PTHRW_COUNT_MASK), &kfreeq, 1, 1);
                        }
                        ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0;
                        ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT;
                } else {
                        /* everythig in the queue is a fake entry ? */
                        if ((ckwq->kw_inqueue != 0) && (ckwq->kw_fakecount == ckwq->kw_inqueue)) {
-                               updatebits |= PTH_RWS_CV_PBIT; 
                                local_error |= ECVPERORR;
                        }
                }
@@ -1427,17 +1489,19 @@ psynch_cvwait(__unused proc_t p, struct psynch_cvwait_args * uap, uint32_t * ret
        } else  {
                /* PTH_RWL_MTX_WAIT is removed */
                if ((kwe->kwe_psynchretval & PTH_RWS_CV_MBIT)  != 0)
-                       *retval = PTHRW_INC | PTH_RWS_CV_CBIT;
+                       uth->uu_rval[0] = PTHRW_INC | PTH_RWS_CV_CBIT;
                else
-                       *retval = 0;
+                       uth->uu_rval[0] = 0;
                local_error = 0;
        }
 out:
 #if _PSYNCH_TRACE_
-       __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_END, (uint32_t)cond, 0xeeeeeeed, (uint32_t)*retval, local_error, 0);
+       __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_END, (uint32_t)ckwq->kw_addr, 0xeeeeeeed, uth->uu_rval[0], local_error, 0);
 #endif /* _PSYNCH_TRACE_ */
        ksyn_wqrelease(ckwq, NULL, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
-       return(local_error);
+
+       unix_syscall_return(local_error);
+
 }
 
 /*
@@ -1524,6 +1588,7 @@ psynch_rw_rdlock(__unused proc_t p, struct psynch_rw_rdlock_args * uap, uint32_t
        int isinit = lgen & PTHRW_RWL_INIT;
        uint32_t returnbits  = 0;
        ksyn_waitq_element_t kwe;
+       kern_return_t kret;
 
 #if _PSYNCH_TRACE_
        __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0);
@@ -1635,8 +1700,19 @@ psynch_rw_rdlock(__unused proc_t p, struct psynch_rw_rdlock_args * uap, uint32_t
        if (error != 0)
                panic("psynch_rw_rdlock: failed to enqueue\n");
 #endif /* __TESTPANICS__ */
-       error = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0);
+       kret = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0, THREAD_CONTINUE_NULL, NULL);
        /* drops the kwq lock */
+       switch (kret) {
+               case THREAD_TIMED_OUT:
+                       error  = ETIMEDOUT;
+                       break;
+               case THREAD_INTERRUPTED:
+                       error  = EINTR;
+                       break;
+               default:
+                       error = 0;
+                       break;
+       }
        
 out:
        if (error != 0) {
@@ -1674,6 +1750,7 @@ psynch_rw_longrdlock(__unused proc_t p, __unused struct psynch_rw_longrdlock_arg
        int isinit = lgen & PTHRW_RWL_INIT;
        uint32_t returnbits=0;
        ksyn_waitq_element_t kwe;
+       kern_return_t kret;
 
        ksyn_wait_queue_t kwq;
        int error=0, block = 0 ;
@@ -1764,8 +1841,19 @@ psynch_rw_longrdlock(__unused proc_t p, __unused struct psynch_rw_longrdlock_arg
                panic("psynch_rw_longrdlock: failed to enqueue\n");
 #endif /* __TESTPANICS__ */
 
-       error = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0);
+       kret = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0, THREAD_CONTINUE_NULL, NULL);
        /* drops the kwq lock */
+       switch (kret) {
+               case THREAD_TIMED_OUT:
+                       error  = ETIMEDOUT;
+                       break;
+               case THREAD_INTERRUPTED:
+                       error  = EINTR;
+                       break;
+               default:
+                       error = 0;
+                       break;
+       }
 out:
        if (error != 0) {
 #if _PSYNCH_TRACE_
@@ -1809,6 +1897,7 @@ psynch_rw_wrlock(__unused proc_t p, struct psynch_rw_wrlock_args * uap, uint32_t
        int isinit = lgen & PTHRW_RWL_INIT;
        uint32_t returnbits  = 0;
        ksyn_waitq_element_t kwe;
+       kern_return_t kret;
 
 #if _PSYNCH_TRACE_
        __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0);
@@ -1899,8 +1988,19 @@ psynch_rw_wrlock(__unused proc_t p, struct psynch_rw_wrlock_args * uap, uint32_t
                panic("psynch_rw_wrlock: failed to enqueue\n");
 #endif /* __TESTPANICS__ */
 
-       error = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0);
+       kret = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0, THREAD_CONTINUE_NULL, NULL);
        /* drops the wq lock */
+       switch (kret) {
+               case THREAD_TIMED_OUT:
+                       error  = ETIMEDOUT;
+                       break;
+               case THREAD_INTERRUPTED:
+                       error  = EINTR;
+                       break;
+               default:
+                       error = 0;
+                       break;
+       }
 
 out:
        if (error != 0) {
@@ -1944,6 +2044,7 @@ psynch_rw_yieldwrlock(__unused proc_t p, __unused struct  psynch_rw_yieldwrlock_
        uthread_t uth;
        uint32_t returnbits=0;
        ksyn_waitq_element_t kwe;
+       kern_return_t kret;
 
 #if _PSYNCH_TRACE_
        __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0);
@@ -2031,7 +2132,18 @@ psynch_rw_yieldwrlock(__unused proc_t p, __unused struct  psynch_rw_yieldwrlock_
                panic("psynch_rw_yieldwrlock: failed to enqueue\n");
 #endif /* __TESTPANICS__ */
 
-       error = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0);
+       kret = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0, THREAD_CONTINUE_NULL, NULL);
+       switch (kret) {
+               case THREAD_TIMED_OUT:
+                       error  = ETIMEDOUT;
+                       break;
+               case THREAD_INTERRUPTED:
+                       error  = EINTR;
+                       break;
+               default:
+                       error = 0;
+                       break;
+       }
 
 out:
        if (error != 0) {
@@ -2190,6 +2302,7 @@ psynch_rw_upgrade(__unused proc_t p, struct psynch_rw_upgrade_args * uap, uint32
        uint32_t lockseq = 0, updatebits = 0, preseq = 0;
        int isinit = lgen & PTHRW_RWL_INIT;
        ksyn_waitq_element_t kwe;
+       kern_return_t kret;
 
 #if _PSYNCH_TRACE_
        __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0);
@@ -2276,8 +2389,19 @@ psynch_rw_upgrade(__unused proc_t p, struct psynch_rw_upgrade_args * uap, uint32
 #endif /* __TESTPANICS__ */
 
 
-       error = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0);
+       kret = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0, THREAD_CONTINUE_NULL, NULL);
        /* drops the lock */
+       switch (kret) {
+               case THREAD_TIMED_OUT:
+                       error  = ETIMEDOUT;
+                       break;
+               case THREAD_INTERRUPTED:
+                       error  = EINTR;
+                       break;
+               default:
+                       error = 0;
+                       break;
+       }
        
 out:
        if (error != 0) {
@@ -2934,8 +3058,12 @@ psynch_wq_cleanup(__unused void *  param, __unused void * param1)
 }
 
 
-int
-ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, ksyn_waitq_element_t kwe, int mylog)
+kern_return_t
+#if _PSYNCH_TRACE_
+ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, ksyn_waitq_element_t kwe, int mylog, thread_continue_t continuation, void * parameter)
+#else
+ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, ksyn_waitq_element_t kwe, __unused int mylog, thread_continue_t continuation, void * parameter)
+#endif
 {
        kern_return_t kret;
        int error = 0;
@@ -2947,7 +3075,12 @@ ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, ksyn_waitq_ele
        assert_wait_deadline(&kwe->kwe_psynchretval, THREAD_ABORTSAFE, abstime);
        ksyn_wqunlock(kwq);
 
-       kret = thread_block(NULL);
+       if (continuation == THREAD_CONTINUE_NULL)
+               kret = thread_block(NULL);
+       else
+               kret = thread_block_parameter(continuation, parameter);
+               
+#if _PSYNCH_TRACE_
        switch (kret) {
                case THREAD_TIMED_OUT:
                        error  = ETIMEDOUT;
@@ -2956,7 +3089,6 @@ ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, ksyn_waitq_ele
                        error  = EINTR;
                        break;
        }
-#if _PSYNCH_TRACE_
        uth = current_uthread();
 #if defined(__i386__)
        if (mylog != 0)
@@ -2967,7 +3099,7 @@ ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, ksyn_waitq_ele
 #endif
 #endif /* _PSYNCH_TRACE_ */
                
-       return(error);
+       return(kret);
 }
 
 kern_return_t
index 3792f3d37779fb0c4d64de0f67c42196e0c187c2..fbc026fb235ee58c1ae60b2a589adf4dc3b8b97f 100644 (file)
 0x53101a0      CPUPM_TEST_INFO
 0x53101a4      CPUPM_TEST_RUN_INFO
 0x53101a8      CPUPM_TEST_SLAVE_INFO
+0x53101ac      CPUPM_FORCED_IDLE
 0x5330000      HIBERNATE
 0x5330004      HIBERNATE_WRITE_IMAGE
 0x5330008      HIBERNATE_MACHINE_INIT
index 8050679f8eb274131f773fb43ea3b3161011304a..52cf1c806285bbf346e18db20a8fe4f3561dc9c1 100644 (file)
@@ -154,6 +154,20 @@ struct vnodeopv_desc spec_vnodeop_opv_desc =
 static void set_blocksize(vnode_t, dev_t);
 
 
+struct _throttle_io_info_t {
+       struct timeval  last_normal_IO_timestamp;
+       struct timeval  last_IO_timestamp;
+       SInt32 numthreads_throttling;
+       SInt32 refcnt;
+       SInt32 alloc;
+};
+
+struct _throttle_io_info_t _throttle_io_info[LOWPRI_MAX_NUM_DEV];
+
+static void throttle_info_update_internal(void *throttle_info, int flags, boolean_t isssd);
+
+
+
 /*
  * Trivial lookup routine that always fails.
  */
@@ -259,6 +273,38 @@ spec_open(struct vnop_open_args *ap)
                }
 
                devsw_unlock(dev, S_IFCHR);
+
+               if (error == 0 && cdevsw[maj].d_type == D_DISK && !vp->v_un.vu_specinfo->si_initted) {
+                       int     isssd = 0;
+                       uint64_t throttle_mask = 0;
+                       uint32_t devbsdunit = 0;
+
+                       if (VNOP_IOCTL(vp, DKIOCGETTHROTTLEMASK, (caddr_t)&throttle_mask, 0, NULL) == 0) {
+                       
+                               if (VNOP_IOCTL(vp, DKIOCISSOLIDSTATE, (caddr_t)&isssd, 0, ap->a_context) == 0) {
+                                       /*
+                                        * as a reasonable approximation, only use the lowest bit of the mask
+                                        * to generate a disk unit number
+                                        */
+                                       devbsdunit = num_trailing_0(throttle_mask);
+
+                                       vnode_lock(vp);
+                                       
+                                       vp->v_un.vu_specinfo->si_isssd = isssd;
+                                       vp->v_un.vu_specinfo->si_devbsdunit = devbsdunit;
+                                       vp->v_un.vu_specinfo->si_throttle_mask = throttle_mask;
+                                       vp->v_un.vu_specinfo->si_throttleable = 1;
+                                       vp->v_un.vu_specinfo->si_initted = 1;
+
+                                       vnode_unlock(vp);
+                               }
+                       }
+                       if (vp->v_un.vu_specinfo->si_initted == 0) {
+                               vnode_lock(vp);
+                               vp->v_un.vu_specinfo->si_initted = 1;
+                               vnode_unlock(vp);
+                       }
+               }
                return (error);
 
        case VBLK:
@@ -357,8 +403,17 @@ spec_read(struct vnop_read_args *ap)
        switch (vp->v_type) {
 
        case VCHR:
+                if (cdevsw[major(vp->v_rdev)].d_type == D_DISK && vp->v_un.vu_specinfo->si_throttleable) {
+                       struct _throttle_io_info_t *throttle_info;
+
+                       throttle_info = &_throttle_io_info[vp->v_un.vu_specinfo->si_devbsdunit];
+
+                       throttle_info_update_internal(throttle_info, 0, vp->v_un.vu_specinfo->si_isssd);
+                }
+
                error = (*cdevsw[major(vp->v_rdev)].d_read)
                        (vp->v_rdev, uio, ap->a_ioflag);
+
                return (error);
 
        case VBLK:
@@ -442,8 +497,19 @@ spec_write(struct vnop_write_args *ap)
        switch (vp->v_type) {
 
        case VCHR:
+                if (cdevsw[major(vp->v_rdev)].d_type == D_DISK && vp->v_un.vu_specinfo->si_throttleable) {
+                       struct _throttle_io_info_t *throttle_info;
+
+                       throttle_info = &_throttle_io_info[vp->v_un.vu_specinfo->si_devbsdunit];
+
+                       throttle_info_update_internal(throttle_info, 0, vp->v_un.vu_specinfo->si_isssd);
+
+                       microuptime(&throttle_info->last_IO_timestamp);
+                }
+
                error = (*cdevsw[major(vp->v_rdev)].d_write)
                        (vp->v_rdev, uio, ap->a_ioflag);
+
                return (error);
 
        case VBLK:
@@ -645,15 +711,6 @@ void IOSleep(int);
 #define LOWPRI_SLEEP_INTERVAL 2
 #endif
 
-struct _throttle_io_info_t {
-       struct timeval  last_normal_IO_timestamp;
-       struct timeval  last_IO_timestamp;
-       SInt32 numthreads_throttling;
-       SInt32 refcnt;
-       SInt32 alloc;
-};
-
-struct _throttle_io_info_t _throttle_io_info[LOWPRI_MAX_NUM_DEV];
 int    lowpri_IO_initial_window_msecs  = LOWPRI_INITIAL_WINDOW_MSECS;
 int    lowpri_IO_window_msecs_inc  = LOWPRI_WINDOW_MSECS_INC;
 int    lowpri_max_window_msecs  = LOWPRI_MAX_WINDOW_MSECS;
@@ -1210,6 +1267,7 @@ spec_strategy(struct vnop_strategy_args *ap)
 
        if (policy == IOPOL_THROTTLE) {
                bp->b_flags |= B_THROTTLED_IO;
+               bp->b_attr.ba_flags |= BA_THROTTLED_IO;
                bp->b_flags &= ~B_PASSIVE;
        } else if (policy == IOPOL_PASSIVE)
                bp->b_flags |= B_PASSIVE;
index 3394fedbf17cfd0d2d527b318da8ab0b9e8816e1..7b44d40e37ba5bb10cc8ac4fc30f3cae6106b517 100644 (file)
@@ -83,6 +83,12 @@ struct specinfo {
        daddr_t si_size;                /* device block size in bytes */
        daddr64_t       si_lastr;       /* last read blkno (read-ahead) */
        u_int64_t       si_devsize;     /* actual device size in bytes */
+
+       u_int8_t        si_initted;
+       u_int8_t        si_throttleable;
+       u_int16_t       si_isssd;
+       u_int32_t       si_devbsdunit;
+       u_int64_t       si_throttle_mask;
 };
 /*
  * Exported shorthand
index 272388f02b392b0f81ee85ae33f10c1abc759bec..9cca104a1b7aa9f06fbd453a599b894f337f4ff2 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 1999-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -235,8 +235,8 @@ static lck_grp_t *dlil_lock_group;
 lck_grp_t *ifnet_lock_group;
 static lck_grp_t *ifnet_head_lock_group;
 lck_attr_t *ifnet_lock_attr;
-decl_lck_rw_data(, ifnet_head_lock);
-decl_lck_mtx_data(, dlil_ifnet_lock);
+decl_lck_rw_data(static, ifnet_head_lock);
+decl_lck_mtx_data(static, dlil_ifnet_lock);
 u_int32_t dlil_filter_count = 0;
 extern u_int32_t       ipv4_ll_arp_aware;
 
@@ -2771,11 +2771,19 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
        if (ifp == NULL)
                return (EINVAL);
 
+       /*
+        * Serialize ifnet attach using dlil_ifnet_lock, in order to
+        * prevent the interface from being configured while it is
+        * embryonic, as ifnet_head_lock is dropped and reacquired
+        * below prior to marking the ifnet with IFRF_ATTACHED.
+        */
+       dlil_if_lock();
        ifnet_head_lock_exclusive();
        /* Verify we aren't already on the list */
        TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
                if (tmp_if == ifp) {
                        ifnet_head_done();
+                       dlil_if_unlock();
                        return (EEXIST);
                }
        }
@@ -2800,6 +2808,7 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
                } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
                        ifnet_lock_done(ifp);
                        ifnet_head_done();
+                       dlil_if_unlock();
                        return (EINVAL);
                }
        }
@@ -2813,6 +2822,7 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
                    "family module - %d\n", __func__, ifp->if_family);
                ifnet_lock_done(ifp);
                ifnet_head_done();
+               dlil_if_unlock();
                return (ENODEV);
        }
 
@@ -2822,6 +2832,7 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
        if (ifp->if_proto_hash == NULL) {
                ifnet_lock_done(ifp);
                ifnet_head_done();
+               dlil_if_unlock();
                return (ENOBUFS);
        }
        bzero(ifp->if_proto_hash, dlif_phash_size);
@@ -2855,6 +2866,7 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
                        ifp->if_index = 0;
                        ifnet_lock_done(ifp);
                        ifnet_head_done();
+                       dlil_if_unlock();
                        return (ENOBUFS);
                }
                ifp->if_index = idx;
@@ -2868,6 +2880,7 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
        if (ifa == NULL) {
                ifnet_lock_done(ifp);
                ifnet_head_done();
+               dlil_if_unlock();
                return (ENOBUFS);
        }
 
@@ -3026,6 +3039,7 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
        }
        ifnet_lock_done(ifp);
        lck_mtx_unlock(rnh_lock);
+       dlil_if_unlock();
 
 #if PF
        /*
@@ -3252,9 +3266,9 @@ ifnet_detach(ifnet_t ifp)
        dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0);
 
        /* Let worker thread take care of the rest, to avoid reentrancy */
-       lck_mtx_lock(&dlil_ifnet_lock);
+       dlil_if_lock();
        ifnet_detaching_enqueue(ifp);
-       lck_mtx_unlock(&dlil_ifnet_lock);
+       dlil_if_unlock();
 
        return (0);
 }
@@ -3262,7 +3276,7 @@ ifnet_detach(ifnet_t ifp)
 static void
 ifnet_detaching_enqueue(struct ifnet *ifp)
 {
-       lck_mtx_assert(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
+       dlil_if_lock_assert();
 
        ++ifnet_detaching_cnt;
        VERIFY(ifnet_detaching_cnt != 0);
@@ -3275,7 +3289,7 @@ ifnet_detaching_dequeue(void)
 {
        struct ifnet *ifp;
 
-       lck_mtx_assert(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
+       dlil_if_lock_assert();
 
        ifp = TAILQ_FIRST(&ifnet_detaching_head);
        VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
@@ -3295,7 +3309,7 @@ ifnet_delayed_thread_func(void)
        struct ifnet *ifp;
 
        for (;;) {
-               lck_mtx_lock(&dlil_ifnet_lock);
+               dlil_if_lock();
                while (ifnet_detaching_cnt == 0) {
                        (void) msleep(&ifnet_delayed_run, &dlil_ifnet_lock,
                            (PZERO - 1), "ifnet_delayed_thread", NULL);
@@ -3305,12 +3319,9 @@ ifnet_delayed_thread_func(void)
 
                /* Take care of detaching ifnet */
                ifp = ifnet_detaching_dequeue();
-               if (ifp != NULL) {
-                       lck_mtx_unlock(&dlil_ifnet_lock);
+               dlil_if_unlock();
+               if (ifp != NULL)
                        ifnet_detach_final(ifp);
-               } else {
-                       lck_mtx_unlock(&dlil_ifnet_lock);
-               }
        }
 }
 
@@ -3618,7 +3629,7 @@ int dlil_if_acquire(u_int32_t family, const void *uniqueid,
        void *buf, *base, **pbuf;
        int ret = 0;
 
-       lck_mtx_lock(&dlil_ifnet_lock);
+       dlil_if_lock();
        TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
                ifp1 = (struct ifnet *)dlifp1;
 
@@ -3705,7 +3716,7 @@ int dlil_if_acquire(u_int32_t family, const void *uniqueid,
        *ifp = ifp1;
 
 end:
-       lck_mtx_unlock(&dlil_ifnet_lock);
+       dlil_if_unlock();
 
        VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof (u_int64_t)) &&
            IS_P2ALIGNED(&ifp1->if_data, sizeof (u_int64_t))));
@@ -3736,6 +3747,24 @@ dlil_if_release(ifnet_t  ifp)
        ifnet_lock_done(ifp);
 }
 
+__private_extern__ void
+dlil_if_lock(void)
+{
+       lck_mtx_lock(&dlil_ifnet_lock);
+}
+
+__private_extern__ void
+dlil_if_unlock(void)
+{
+       lck_mtx_unlock(&dlil_ifnet_lock);
+}
+
+__private_extern__ void
+dlil_if_lock_assert(void)
+{
+       lck_mtx_assert(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
+}
+
 __private_extern__ void
 dlil_proto_unplumb_all(struct ifnet *ifp)
 {
index 26314b948f2ef3dd83bfe5e83855eb6b556502e5..595fcaea994c6467ab3ad9585a3a9b411135ba9c 100644 (file)
@@ -1452,7 +1452,13 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
 #endif /* IF_CLONE_LIST */
        }
 
+       /*
+        * ioctls which require ifp.  Note that we acquire dlil_ifnet_lock
+        * here to ensure that the ifnet, if found, has been fully attached.
+        */
+       dlil_if_lock();
        ifp = ifunit(ifr->ifr_name);
+       dlil_if_unlock();
        if (ifp == NULL)
                return (ENXIO);
 
index a76aa7dbbbfbbebab8525cc5620e947d353264db..f3e64b0e34ea6b5abe71a63a0d3d6ee7119984a4 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -807,6 +807,10 @@ __private_extern__ void if_attach_link_ifa(struct ifnet *, struct ifaddr *);
 __private_extern__ void if_detach_ifa(struct ifnet *, struct ifaddr *);
 __private_extern__ void if_detach_link_ifa(struct ifnet *, struct ifaddr *);
 
+__private_extern__ void dlil_if_lock(void);
+__private_extern__ void dlil_if_unlock(void);
+__private_extern__ void dlil_if_lock_assert(void);
+
 extern struct ifaddr *ifa_ifwithaddr(const struct sockaddr *);
 extern struct ifaddr *ifa_ifwithaddr_scoped(const struct sockaddr *, unsigned int);
 extern struct ifaddr *ifa_ifwithdstaddr(const struct sockaddr *);
index 484e47c2b156e2445d49efd1097e9cc60f6acc97..7a0323fde8191f3d361c2874dc657bf4f17ab7ee 100644 (file)
@@ -1575,12 +1575,8 @@ nfs_convert_old_nfs_args(mount_t mp, user_addr_t data, vfs_context_t ctx, int ar
        /* copy socket address */
        if (inkernel)
                bcopy(CAST_DOWN(void *, args.addr), &ss, args.addrlen);
-       else {
-               if ((size_t)args.addrlen > sizeof (struct sockaddr_storage))
-                       error = EINVAL;
-               else
-                       error = copyin(args.addr, &ss, args.addrlen);
-       }
+       else
+               error = copyin(args.addr, &ss, args.addrlen);
        nfsmout_if(error);
        ss.ss_len = args.addrlen;
 
index f1d7f924b474b119f38b88d392d51d427aea92a1..08216cac0e1e898b9eb60eb073a8d4222019eeed 100644 (file)
@@ -1025,6 +1025,14 @@ uint32_t buf_redundancy_flags(buf_t);
  */
 void   buf_set_redundancy_flags(buf_t, uint32_t);
 
+/*!
+ @function buf_attr
+ @abstract Gets the attributes for this buf.
+ @param bp Buffer whose attributes to get.
+ @return bufattr_t.
+ */
+bufattr_t buf_attr(buf_t);
+
 #ifdef KERNEL_PRIVATE
 void   buf_setfilter(buf_t, void (*)(buf_t, void *), void *, void (**)(buf_t, void *), void **);
 
@@ -1044,6 +1052,14 @@ void buf_setcpaddr(buf_t, void *);
  @return int.
  */
 void *buf_getcpaddr(buf_t);
+
+/*!
+ @function buf_throttled
+ @abstract Check if a buffer is throttled.
+ @param bap Buffer attribute to test.
+ @return Nonzero if the buffer is throttled, 0 otherwise.
+ */
+int bufattr_throttled(bufattr_t bap);
 #endif /* KERNEL_PRIVATE */
 
 
index d80eb21c865e35a6159acb921df021339dc33eee..2d259ac2a20f4aff239e2fa52ea1bab0e83a1247 100644 (file)
 
 #define NOLIST ((struct buf *)0x87654321)
 
+/*
+ * Attributes of an I/O to be used by lower layers
+ */
+struct bufattr {
+       uint64_t ba_flags;      /* flags. Some are only in-use on embedded devices */
+};
+
 /*
  * The buffer header describes an I/O operation in the kernel.
  */
@@ -128,6 +135,7 @@ struct buf {
 #if CONFIG_PROTECT
        struct cprotect *b_cpentry;     /* address of cp_entry, to be passed further down  */
 #endif /* CONFIG_PROTECT */
+       struct bufattr b_attr;
 #ifdef JOE_DEBUG
         void * b_owner;
         int     b_tag;
@@ -218,6 +226,12 @@ struct buf {
 #define B_SYNC         0x02    /* Do all allocations synchronously. */
 #define B_NOBUFF       0x04    /* Do not allocate struct buf */
 
+/*
+ * ba_flags (Buffer Attribute flags)
+ * Some of these may be in-use only on embedded devices.
+ */
+#define BA_THROTTLED_IO         0x000000002
+
 
 extern int niobuf_headers;             /* The number of IO buffer headers for cluster IO */
 extern int nbuf_headers;               /* The number of buffer headers */
index 745aa6710f78d113c217478f965d8610af27b74a..2bdd79a55337be0b3601c07719a95025b5cac7a4 100644 (file)
@@ -182,6 +182,10 @@ typedef struct
 #define DKIOCLOCKPHYSICALEXTENTS              _IO('d', 81)
 #define DKIOCGETPHYSICALEXTENT                _IOWR('d', 82, dk_physical_extent_t)
 #define DKIOCUNLOCKPHYSICALEXTENTS            _IO('d', 83)
+
+#ifdef PRIVATE
+#define _DKIOCGETMIGRATIONUNITBYTESIZE        _IOR('d', 85, uint32_t)
+#endif /* PRIVATE */
 #endif /* KERNEL */
 
 #endif /* _SYS_DISK_H_ */
index 155b57524b686314fd6f6ccd5e2664cfb62fe500..20a0bafb3d19623dd26759fb61b054498ff10f08 100644 (file)
@@ -84,6 +84,7 @@ struct __ifmultiaddr;
 struct __ifnet_filter;
 struct __rtentry;
 struct __if_clone;
+struct __bufattr;
 
 typedef        struct __ifnet*                 ifnet_t;
 typedef        struct __mbuf*                  mbuf_t;
@@ -95,6 +96,7 @@ typedef struct __ifmultiaddr* ifmultiaddr_t;
 typedef struct __ifnet_filter* interface_filter_t;
 typedef struct __rtentry*              route_t;
 typedef struct __if_clone*             if_clone_t;
+typedef struct __bufattr*              bufattr_t;
 
 #else /* BSD_BUILD */
 
@@ -123,6 +125,7 @@ typedef struct ifmultiaddr* ifmultiaddr_t;
 typedef struct ifnet_filter*   interface_filter_t;
 typedef struct rtentry*                route_t;
 typedef struct if_clone*       if_clone_t;
+typedef struct bufattr*                bufattr_t;
 #endif /* KERNEL_PRIVATE */
 
 #endif /* !BSD_BUILD */
index 1da032f48ede931317aa219122e4612eea37145a..a1f06467a47c1723cc8251f2ccb91a413ac9e155 100644 (file)
@@ -549,6 +549,8 @@ SYSCTL_DECL(_user);
 /* KERN_PANICINFO types */
 #define        KERN_PANICINFO_MAXSIZE  1       /* quad: panic UI image size limit */
 #define        KERN_PANICINFO_IMAGE    2       /* panic UI in 8-bit kraw format */
+#define KERN_PANICINFO_TEST    4       /* Allow the panic UI to be tested by root without causing a panic */
+#define KERN_PANICINFO_NOROOT_TEST     5       /* Allow the noroot UI to be tested by root */
 
 #define CTL_KERN_NAMES { \
        { 0, 0 }, \
index 0d474ed28733707be4778394b33babb91d0539a6..53f4f5576ba5474cedb90bf6188049aeb7efc85b 100644 (file)
@@ -405,6 +405,18 @@ buf_setcpaddr(buf_t bp __unused, void *cp_entry_addr __unused) {
 }
 #endif /* CONFIG_PROTECT */
 
+int
+bufattr_throttled(bufattr_t bap) {
+       if ( (bap->ba_flags & BA_THROTTLED_IO) )
+               return 1;
+       return 0;
+}
+
+bufattr_t
+buf_attr(buf_t bp) {
+       return &bp->b_attr;
+}
+
 errno_t
 buf_error(buf_t bp) {
         
@@ -3400,6 +3412,7 @@ bcleanbuf(buf_t bp, boolean_t discard)
 #ifdef CONFIG_PROTECT
                bp->b_cpentry = 0;
 #endif
+               bzero(&bp->b_attr, sizeof(struct bufattr));
 
                lck_mtx_lock_spin(buf_mtxp);
        }
@@ -3659,6 +3672,7 @@ buf_biodone(buf_t bp)
         * indicators
         */
        CLR(bp->b_flags, (B_WASDIRTY | B_THROTTLED_IO | B_PASSIVE));
+       CLR(bp->b_attr.ba_flags, (BA_THROTTLED_IO));
        DTRACE_IO1(done, buf_t, bp);
 
        if (!ISSET(bp->b_flags, B_READ) && !ISSET(bp->b_flags, B_RAW))
@@ -3853,6 +3867,7 @@ alloc_io_buf(vnode_t vp, int priv)
 #ifdef CONFIG_PROTECT
        bp->b_cpentry = 0;
 #endif
+       bzero(&bp->b_attr, sizeof(struct bufattr));
 
        if (vp && (vp->v_type == VBLK || vp->v_type == VCHR))
                bp->b_dev = vp->v_rdev;
index 0132a60dd8da5aff254d9da717c38bd8b4bcefee..b92b69a28415757af8abd9cb14c282606fa031ab 100644 (file)
@@ -198,15 +198,16 @@ fsevents_internal_init(void)
        printf("fsevents: failed to initialize the event zone.\n");
     }
 
-    if (zfill(event_zone, MAX_KFS_EVENTS) != MAX_KFS_EVENTS) {
-       printf("fsevents: failed to pre-fill the event zone.\n");       
-    }
-    
     // mark the zone as exhaustible so that it will not
     // ever grow beyond what we initially filled it with
     zone_change(event_zone, Z_EXHAUST, TRUE);
     zone_change(event_zone, Z_COLLECT, FALSE);
     zone_change(event_zone, Z_CALLERACCT, FALSE);
+
+    if (zfill(event_zone, MAX_KFS_EVENTS) < MAX_KFS_EVENTS) {
+       printf("fsevents: failed to pre-fill the event zone.\n");       
+    }
+    
 }
 
 static void
index 4280f3bfd6990dff312f25b9f9ea1f1f1d1eadc8..43352545d1e931171fd2cf9420ee85d70401b074 100644 (file)
@@ -1394,6 +1394,8 @@ found_alias:
                nvp->v_specflags = 0;
                nvp->v_speclastr = -1;
                nvp->v_specinfo->si_opencount = 0;
+               nvp->v_specinfo->si_initted = 0;
+               nvp->v_specinfo->si_throttleable = 0;
 
                SPECHASH_LOCK();
                
index b5a6d2aac369f7c4e766864f691f252c38339f55..23ba4083b7403d431e0fb3648328ab3029078d92 100644 (file)
@@ -1,4 +1,4 @@
-11.3.0
+11.4.0
 
 # The first line of this file contains the master version number for the kernel.
 # All other instances of the kernel version in xnu are derived from this file.
index 299cabf8e8d28318f386be1e8a79ac753c8b0827..364d8406958fa76bf11e1eb8451b157847987eee 100644 (file)
@@ -15,11 +15,13 @@ _bdevsw
 _boot
 _bsd_hostname
 _bsd_set_dependency_capable
+_buf_attr
 _buf_create_shadow
 _buf_getcpaddr
 _buf_setcpaddr
 _buf_setfilter
 _buf_shadow
+_bufattr_throttled
 _cdevsw
 _cdevsw_setkqueueok
 _clalloc
index b6b05d10339329b8907addb774bbf792a937957e..acb5515de1ee24cd3b0785af934503f9252e18b3 100644 (file)
@@ -33,3 +33,4 @@ _xts_decrypt
 _xts_done
 _xts_encrypt
 _xts_start
+_aes_decrypt
index a19ab484b72662163475dac9d2749cbe3cc75e95..9432c31a1c727157309edc0329e63e6777eb4744 100644 (file)
@@ -35,3 +35,4 @@ _xts_decrypt
 _xts_done
 _xts_encrypt
 _xts_start
+_aes_decrypt
index 38b70f0ff6cd93fc470cc54ea0fe0d41181d180e..99112a1615afb74a074c66af3a708ce841b78034 100644 (file)
@@ -57,7 +57,6 @@ _pffinddomain
 _pffindproto
 _pmCPUControl
 _pmKextRegister
-_pm_init_lock
 _pru_abort_notsupp
 _pru_accept_notsupp
 _pru_bind_notsupp
@@ -124,3 +123,4 @@ _udbinfo
 _hibernate_vm_lock
 _hibernate_vm_unlock
 _clock_get_system_value
+_PE_state
index 9413c7decfb68a212314925872ea440b3823de08..2e7f007d1f72a180a9e185b3b3cbba8e0c24dde8 100644 (file)
@@ -19,7 +19,6 @@ _mp_rendezvous
 _mp_rendezvous_no_intrs
 _pmCPUControl
 _pmKextRegister
-_pm_init_lock
 _rdmsr_carefully
 _real_ncpus
 _rtc_clock_napped
@@ -32,4 +31,4 @@ _tmrCvt
 _tsc_get_info
 _hibernate_vm_lock
 _hibernate_vm_unlock
-
+_PE_state
index 0fb3c53f3b2d88df8f2cddebbe40f00373e6335a..0cc86a55ca0f86701ec9e2b23f6947044c9ddfc2 100644 (file)
@@ -263,14 +263,10 @@ kern_open_file_for_direct_io(const char * name,
                              off_t offset,
                              caddr_t addr,
                              vm_size_t len);
-
-
 void
 kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref,
-                             off_t offset, caddr_t addr, vm_size_t len);
-int
-kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t addr, vm_size_t len);
-int get_kernel_symfile(struct proc *p, char const **symfile);
+                             off_t write_offset, caddr_t addr, vm_size_t write_length,
+                             off_t discard_offset, off_t discard_end);
 #endif /* _SYS_CONF_H_ */
 
 hibernate_page_list_t *
@@ -354,9 +350,9 @@ uint32_t
 hibernate_write_image(void);
 
 long
-hibernate_machine_entrypoint(IOHibernateImageHeader * header, void * p2, void * p3, void * p4);
+hibernate_machine_entrypoint(uint32_t p1, uint32_t p2, uint32_t p3, uint32_t p4);
 long
-hibernate_kernel_entrypoint(IOHibernateImageHeader * header, void * p2, void * p3, void * p4);
+hibernate_kernel_entrypoint(uint32_t p1, uint32_t p2, uint32_t p3, uint32_t p4);
 void
 hibernate_newruntime_map(void * map, vm_size_t map_size, 
                            uint32_t system_table_offset);
index c3282f8ead52dbd5b0b2eee3564fb009ae66ff5c..99c30699bcb86c0a59d2f7fb0bbd767fd8b2c959 100644 (file)
@@ -1721,6 +1721,7 @@ public:
     void deassertPMDriverCall( IOPMDriverCallEntry * callEntry );
     IOReturn changePowerStateWithOverrideTo( unsigned long ordinal );
     static const char * getIOMessageString( uint32_t msg );
+    IOReturn setIgnoreIdleTimer( bool ignore );
 
 #ifdef __LP64__
     static IOWorkLoop * getPMworkloop( void );
index f0002d5d6dcc211e65ada5ceeeaee71f94a90476..4bdddb751ebe3b03c48ac49f7e3d464757bbdf83 100644 (file)
@@ -245,7 +245,7 @@ enum {
  *  false       == Retain FV key when going to standby mode
  *  not present == Retain FV key when going to standby mode
  */
-#define kIOPMDestroyFVKeyOnStandbyKey            "DestroyFVKeyOnStandby"
+#define kIOPMDestroyFVKeyOnStandbyKey       "DestroyFVKeyOnStandby"
 
 /*******************************************************************************
  *
@@ -288,7 +288,15 @@ enum {
      */
     kIOPMDriverAssertionPreventDisplaySleepBit      = 0x40,
 
-    kIOPMDriverAssertionReservedBit7                = 0x80
+    /*! kIOPMDriverAssertionReservedBit7
+     * Reserved for storage family.
+     */
+    kIOPMDriverAssertionReservedBit7                = 0x80,
+
+    /*! kIOPMDriverAssertionReservedBit8
+     * Reserved for networking family.
+     */
+    kIOPMDriverAssertionReservedBit8                = 0x100
 };
 
  /* kIOPMAssertionsDriverKey
@@ -665,6 +673,7 @@ enum {
 // Maintenance wake calendar.
 #define kIOPMSettingMaintenanceWakeCalendarKey      "MaintenanceWakeCalendarDate"
 
+
 struct IOPMCalendarStruct {
     UInt32      year;
     UInt8       month;
@@ -672,6 +681,7 @@ struct IOPMCalendarStruct {
     UInt8       hour;
     UInt8       minute;
     UInt8       second;
+    UInt8       selector;
 };
 typedef struct IOPMCalendarStruct IOPMCalendarStruct;
 
@@ -760,6 +770,7 @@ struct stateChangeNote {
 };
 typedef struct stateChangeNote stateChangeNote;
 
+#endif /* KERNEL && __cplusplus */
 struct IOPowerStateChangeNotification {
     void *        powerRef;
     unsigned long    returnValue;
@@ -768,7 +779,6 @@ struct IOPowerStateChangeNotification {
 };
 typedef struct IOPowerStateChangeNotification IOPowerStateChangeNotification;
 typedef IOPowerStateChangeNotification sleepWakeNote;
-#endif /* KERNEL && __cplusplus */
 
 /*! @struct IOPMSystemCapabilityChangeParameters
     @abstract A structure describing a system capability change.
index 7e49682adac39c67dd234c08fbb4e800d7ccbef8..117732a715ea9003920ced303a334a4918df0d00 100644 (file)
@@ -37,5 +37,6 @@
 #define kPMSleepSystemOptions           7
 #define kPMSetMaintenanceWakeCalendar   8
 #define kPMSetUserAssertionLevels       9
+#define kPMActivityTickle               10
 
-#define kNumPMMethods                   10
+#define kNumPMMethods                   11
index 3e61d81e0a4bb6da4d22bb305ac9c91c24bbab55..09fdb19e82a7c694f8eda289f517efc46eb66c63 100644 (file)
 /* @constant kIOPMTimelineDictionaryKey
  * @abstract RootDomain key for dictionary describing Timeline's info
  */
-#define     kIOPMTimelineDictionaryKey                  "PMTimelineLogging"
+#define kIOPMTimelineDictionaryKey              "PMTimelineLogging"
 
 /* @constant kIOPMTimelineEnabledKey
  * @abstract Boolean value indicating whether the system is recording PM events.
  * @discussion Key may be found in the dictionary at IOPMrootDomain's property 
  * kIOPMTimelineDictionaryKey. uint32_t value; may be 0.
  */
-#define     kIOPMTimelineEnabledKey                     "TimelineEnabled"
+#define kIOPMTimelineEnabledKey                 "TimelineEnabled"
 
 /* @constant kIOMPTimelineSystemNumberTrackedKey
  * @abstract The maximum number of system power events the system may record.
  * @discussion Key may be found in the dictionary at IOPMrootDomain's property 
  * kIOPMTimelineDictionaryKey. uint32_t value; may be 0.
  */
-#define     kIOPMTimelineSystemNumberTrackedKey         "TimelineSystemEventsTracked"
+#define kIOPMTimelineSystemNumberTrackedKey     "TimelineSystemEventsTracked"
 
 /* @constant kIOPMTimelineSystemBufferSizeKey
  * @abstract Size in bytes  of buffer recording system PM events
  * @discussion Key may be found in the dictionary at IOPMrootDomain's property 
  * kIOPMTimelineDictionaryKey. uint32_t value; may be 0.
  */
-#define     kIOPMTimelineSystemBufferSizeKey            "TimelineSystemBufferSize"
+#define kIOPMTimelineSystemBufferSizeKey        "TimelineSystemBufferSize"
 
 
 
@@ -130,7 +130,8 @@ enum {
     kIOPMSleepReasonIdle                        = 105,
     kIOPMSleepReasonLowPower                    = 106,
     kIOPMSleepReasonThermalEmergency            = 107,
-    kIOPMSleepReasonMaintenance                 = 108
+    kIOPMSleepReasonMaintenance                 = 108,
+    kIOPMSleepReasonSleepServiceExit            = 109
 };
 
 /*
@@ -143,6 +144,7 @@ enum {
 #define kIOPMIdleSleepKey                           "Idle Sleep"
 #define kIOPMLowPowerSleepKey                       "Low Power Sleep"
 #define kIOPMThermalEmergencySleepKey               "Thermal Emergency Sleep"
+#define kIOPMSleepServiceExitKey                    "Sleep Service Back to Sleep"
 
 
 enum {
@@ -239,7 +241,32 @@ enum {
  * PM notification types
  */
 
-/* @constant kIOPMStateConsoleUserShutdown
+/*! @constant kIOPMSleepServiceScheduleImmediate
+ *
+ * Setting type used in calls to IOPMrootDomain::registerPMSettingController
+ * Use this type between powerd and IOKit.framework
+ *
+ */
+#define kIOPMSleepServiceScheduleImmediate     "SleepServiceImmediate"
+
+/*! @constant kIOPMSettingSleepServiceScheduleImmediate
+ *
+ * Setting type used in calls to IOPMrootDomain::registerPMSettingController
+ * Use this type between xnu and AppleRTC
+ */
+#define kIOPMSettingSleepServiceWakeCalendarKey     "SleepServiceWakeCalendarKey"
+
+/*! @constant kIOPMCalendarWakeTypes 
+ *
+ * These are valid values for IOPM.h:IOPMCalendarStruct->selector
+ */
+enum {
+    kPMCalendarTypeMaintenance = 1,
+    kPMCalendarTypeSleepService = 2
+};
+
+
+/* @constant kIOPMStateConsoleShutdown
  * @abstract Notification of GUI shutdown state available to kexts.
  * @discussion This type can be passed as arguments to registerPMSettingController()
  * to receive callbacks.
@@ -247,7 +274,7 @@ enum {
 #define kIOPMStateConsoleShutdown   "ConsoleShutdown"
 
 /* @enum ShutdownValues
- * @abstract Potential values shared with key kIOPMStateConsoleUserShutdown
+ * @abstract Potential values shared with key kIOPMStateConsoleShutdown
  */
 enum {
 /* @constant kIOPMStateConsoleShutdownNone
@@ -272,6 +299,22 @@ enum {
     kIOPMStateConsoleShutdownCertain = 4
 };
 
+/* @constant kIOPMSettingSilentRunningKey
+ * @abstract Notification of silent running mode changes to kexts.
+ * @discussion This key can be passed as an argument to registerPMSettingController()
+ * and also identifies the type of PMSetting notification callback.
+ */
+#define kIOPMSettingSilentRunningKey    "SilentRunning"
+#define kIOPMFeatureSilentRunningKey    kIOPMSettingSilentRunningKey
+
+/* @enum SilentRunningFlags
+ * @abstract The kIOPMSettingSilentRunningKey notification provides an OSNumber
+ * object with a value described by the following flags.
+ */
+enum {
+    kIOPMSilentRunningModeOn = 0x00000001
+};
+
 /*****************************************************************************/
 /*****************************************************************************/
 
@@ -311,14 +354,14 @@ enum {
 #define kIOPMStatsResponseCancel        "ResponseCancel"
 #define kIOPMStatsResponseSlow          "ResponseSlow"
 
+struct PMStatsBounds{
+    uint64_t start;
+    uint64_t stop;
+};
 typedef struct {
-    struct bounds{
-        uint64_t start;
-        uint64_t stop;
-    };
     
-    struct bounds    hibWrite;
-    struct bounds    hibRead;
+    struct PMStatsBounds    hibWrite;
+    struct PMStatsBounds    hibRead;
 //    bounds    driverNotifySleep;
 //    bounds    driverNotifyWake;
 //    bounds    appNotifySleep;
@@ -575,31 +618,162 @@ enum {
 #define kIOPMSleepWakeFailureUUIDKey        "UUID"
 #define kIOPMSleepWakeFailureDateKey        "Date"
 
-/******************************************************************************/
-/* System sleep policy
- * Shared between PM root domain and platform driver.
+/*****************************************************************************
+ *
+ * Root Domain private property keys
+ *
+ *****************************************************************************/
+
+/* kIOPMAutoPowerOffEnabledKey
+ * Indicates if Auto Power Off is enabled.
+ * It has a boolean value.
+ *  true        == Auto Power Off is enabled
+ *  false       == Auto Power Off is disabled
+ *  not present == Auto Power Off is not supported on this hardware
  */
+#define kIOPMAutoPowerOffEnabledKey         "AutoPowerOff Enabled"
 
-// Platform specific property added by the platform driver.
-// An OSData that describes the system sleep policy.
-#define kIOPlatformSystemSleepPolicyKey     "IOPlatformSystemSleepPolicy"
+/* kIOPMAutoPowerOffDelayKey
+ * Key refers to a CFNumberRef that represents the delay in seconds before
+ * entering the Auto Power Off state.  The property is not present if Auto
+ * Power Off is unsupported.
+ */
+#define kIOPMAutoPowerOffDelayKey           "AutoPowerOff Delay"
 
-// Root domain property updated before platform sleep.
-// An OSData that describes the system sleep parameters.
-#define kIOPMSystemSleepParametersKey       "IOPMSystemSleepParameters"
+/*****************************************************************************
+ *
+ * System Sleep Policy
+ *
+ *****************************************************************************/
 
-struct IOPMSystemSleepParameters
+#define kIOPMSystemSleepPolicySignature     0x54504c53
+#define kIOPMSystemSleepPolicyVersion       2
+
+/*!
+ * @defined kIOPMSystemSleepTypeKey
+ * @abstract Indicates the type of system sleep.
+ * @discussion An OSNumber property of root domain that describes the type
+ * of system sleep. This property is set after notifying priority sleep/wake
+ * clients, but before informing interested drivers and shutting down power
+ * plane drivers.
+ */
+#define kIOPMSystemSleepTypeKey             "IOPMSystemSleepType"
+
+struct IOPMSystemSleepPolicyVariables
 {
-    uint32_t    version;
-    uint32_t    sleepFlags;
-    uint32_t    sleepTimer;
-    uint32_t    wakeEvents;
+    uint32_t    signature;                  // kIOPMSystemSleepPolicySignature
+    uint32_t    version;                    // kIOPMSystemSleepPolicyVersion
+
+    uint64_t    currentCapability;          // current system capability bits
+    uint64_t    highestCapability;          // highest system capability bits
+
+    uint64_t    sleepFactors;               // sleep factor bits
+    uint32_t    sleepReason;                // kIOPMSleepReason*
+    uint32_t    sleepPhase;                 // identify the sleep phase
+    uint32_t    hibernateMode;              // current hibernate mode
+
+    uint32_t    standbyDelay;               // standby delay in seconds
+    uint32_t    poweroffDelay;              // auto-poweroff delay in seconds
+
+    uint32_t    reserved[51];               // pad sizeof 256 bytes
+};
+
+enum {
+    kIOPMSleepPhase1 = 1,
+    kIOPMSleepPhase2
+};
+
+// Sleep Factor Mask / Bits
+enum {
+    kIOPMSleepFactorSleepTimerWake          = 0x00000001ULL,
+    kIOPMSleepFactorLidOpen                 = 0x00000002ULL,
+    kIOPMSleepFactorACPower                 = 0x00000004ULL,
+    kIOPMSleepFactorBatteryLow              = 0x00000008ULL,
+    kIOPMSleepFactorStandbyNoDelay          = 0x00000010ULL,
+    kIOPMSleepFactorStandbyForced           = 0x00000020ULL,
+    kIOPMSleepFactorStandbyDisabled         = 0x00000040ULL,
+    kIOPMSleepFactorUSBExternalDevice       = 0x00000080ULL,
+    kIOPMSleepFactorBluetoothHIDDevice      = 0x00000100ULL,
+    kIOPMSleepFactorExternalMediaMounted    = 0x00000200ULL,
+    kIOPMSleepFactorThunderboltDevice       = 0x00000400ULL,
+    kIOPMSleepFactorRTCAlarmScheduled       = 0x00000800ULL,
+    kIOPMSleepFactorMagicPacketWakeEnabled  = 0x00001000ULL,
+    kIOPMSleepFactorHibernateForced         = 0x00010000ULL,
+    kIOPMSleepFactorAutoPowerOffDisabled    = 0x00020000ULL,
+    kIOPMSleepFactorAutoPowerOffForced      = 0x00040000ULL
+};
+
+// System Sleep Types
+enum {
+    kIOPMSleepTypeInvalid                   = 0,
+    kIOPMSleepTypeAbortedSleep              = 1,
+    kIOPMSleepTypeNormalSleep               = 2,
+    kIOPMSleepTypeSafeSleep                 = 3,
+    kIOPMSleepTypeHibernate                 = 4,
+    kIOPMSleepTypeStandby                   = 5,
+    kIOPMSleepTypePowerOff                  = 6,
+    kIOPMSleepTypeLast                      = 7
+};
+
+// System Sleep Flags
+enum {
+    kIOPMSleepFlagDisableHibernateAbort     = 0x00000001,
+    kIOPMSleepFlagDisableUSBWakeEvents      = 0x00000002,
+    kIOPMSleepFlagDisableBatlowAssertion    = 0x00000004
 };
 
-// Sleep flags
+// System Wake Events
 enum {
-    kIOPMSleepFlagHibernate         = 0x00000001,
-    kIOPMSleepFlagSleepTimerEnable  = 0x00000002
+    kIOPMWakeEventLidOpen                   = 0x00000001,
+    kIOPMWakeEventLidClose                  = 0x00000002,
+    kIOPMWakeEventACAttach                  = 0x00000004,
+    kIOPMWakeEventACDetach                  = 0x00000008,
+    kIOPMWakeEventPowerButton               = 0x00000100,
+    kIOPMWakeEventUserPME                   = 0x00000400,
+    kIOPMWakeEventSleepTimer                = 0x00000800,
+    kIOPMWakeEventBatteryLow                = 0x00001000,
+    kIOPMWakeEventDarkPME                   = 0x00002000
 };
 
+/*!
+ * @defined kIOPMSystemSleepParametersKey
+ * @abstract Sleep parameters describing the upcoming sleep
+ * @discussion Root domain updates this OSData property before system sleep
+ * to pass sleep parameters to the platform driver.  Some of the parameters
+ * are based on the chosen entry in the system sleep policy table.
+ */
+#define kIOPMSystemSleepParametersKey       "IOPMSystemSleepParameters"
+#define kIOPMSystemSleepParametersVersion   2
+
+struct IOPMSystemSleepParameters
+{
+    uint16_t    version;
+    uint16_t    reserved1;
+    uint32_t    sleepType;
+    uint32_t    sleepFlags;
+    uint32_t    ecWakeEvents;
+    uint32_t    ecWakeTimer;
+    uint32_t    ecPoweroffTimer;
+    uint32_t    reserved2[10];
+} __attribute__((packed));
+
+#ifdef KERNEL
+
+/*!
+ * @defined kIOPMInstallSystemSleepPolicyHandlerKey
+ * @abstract Name of the platform function to install a sleep policy handler.
+ * @discussion Pass to IOPMrootDomain::callPlatformFunction(), with a pointer
+ * to the C-function handler at param1, and an optional target at param2, to
+ * register a sleep policy handler. Only a single sleep policy handler can
+ * be installed.
+ */
+#define kIOPMInstallSystemSleepPolicyHandlerKey        \
+        "IOPMInstallSystemSleepPolicyHandler"
+
+typedef IOReturn (*IOPMSystemSleepPolicyHandler)(
+        void * target, const IOPMSystemSleepPolicyVariables * vars,
+        IOPMSystemSleepParameters * params );
+
+#endif /* KERNEL */
+
 #endif /* ! _IOKIT_IOPMPRIVATE_H */
index 760e7d6741304ee8575f099b4ab4d2749b25dfa1..55f4ebe945f3f8050067e12c6626588239887a4a 100644 (file)
@@ -381,8 +381,7 @@ public:
     void        handleQueueSleepWakeUUID(
                     OSObject *obj);
 
-    IOReturn    setMaintenanceWakeCalendar(
-                    const IOPMCalendarStruct * calendar );
+    IOReturn    setMaintenanceWakeCalendar(const IOPMCalendarStruct * calendar );
 
     // Handle callbacks from IOService::systemWillShutdown()
        void        acknowledgeSystemWillShutdown( IOService * from );
@@ -407,6 +406,9 @@ public:
     bool        systemMessageFilter(
                     void * object, void * arg1, void * arg2, void * arg3 );
 
+    void        publishPMSetting(
+                    const OSSymbol * feature, uint32_t where, uint32_t * featureID );
+
 /*! @function recordPMEvent
     @abstract Logs IOService PM event timing.
     @discussion Should only be called from IOServicePM. Should not be exported.
@@ -467,6 +469,7 @@ private:
     IOPMPowerStateQueue     *pmPowerStateQueue;
 
     OSArray                 *allowedPMSettings;
+    OSArray                 *noPublishPMSettings;
     PMTraceWorker           *pmTracer;
     PMAssertionsTracker     *pmAssertions;
 
@@ -565,6 +568,7 @@ private:
     unsigned int            logGraphicsClamp        :1;
     unsigned int            darkWakeToSleepASAP     :1;
     unsigned int            darkWakeMaintenance     :1;
+    unsigned int            darkWakeSleepService    :1;
     unsigned int            darkWakePostTickle      :1;
 
     unsigned int            sleepTimerMaintenance   :1;
@@ -585,6 +589,7 @@ private:
 
     IOOptionBits            platformSleepSupport;
     uint32_t                _debugWakeSeconds;
+    uint32_t                _lastDebugWakeSeconds;
 
     queue_head_t            aggressivesQueue;
     thread_call_t           aggressivesThreadCall;
@@ -601,6 +606,10 @@ private:
 
     IOPMTimeline            *timeline;
 
+    IOPMSystemSleepPolicyHandler    _sleepPolicyHandler;
+    void *                          _sleepPolicyTarget;
+    IOPMSystemSleepPolicyVariables *_sleepPolicyVars;
+
        // IOPMrootDomain internal sleep call
     IOReturn    privateSleepSystem( uint32_t sleepReason );
     void        reportUserInput( void );
@@ -647,11 +656,14 @@ private:
 
     void        evaluatePolicy( int stimulus, uint32_t arg = 0 );
 
+    void evaluateAssertions(IOPMDriverAssertionType newAssertions, 
+                                IOPMDriverAssertionType oldAssertions);
+
     void        deregisterPMSettingObject( PMSettingObject * pmso );
 
 #if HIBERNATION
     bool        getSleepOption( const char * key, uint32_t * option );
-    bool        evaluateSystemSleepPolicy( IOPMSystemSleepParameters * p );
+    bool        evaluateSystemSleepPolicy( IOPMSystemSleepParameters * p, int phase );
     void        evaluateSystemSleepPolicyEarly( void );
     void        evaluateSystemSleepPolicyFinal( void );
 #endif /* HIBERNATION */
index a4d7dbb4d08e62029d549932f4acb54ff7c51f78..002055ff145e94ea7bab384a39a29b74541ff198 100644 (file)
@@ -792,7 +792,7 @@ IOPolledFileOpen( const char * filename, IOBufferMemoryDescriptor * ioBuffer,
         HIBLOG("error 0x%x opening hibernation file\n", err);
        if (vars->fileRef)
        {
-           kern_close_file_for_direct_io(vars->fileRef, 0, 0, 0);
+           kern_close_file_for_direct_io(vars->fileRef, 0, 0, 0, 0, 0);
            gIOHibernateFileRef = vars->fileRef = NULL;
        }
     }
@@ -897,7 +897,7 @@ IOPolledFileWrite(IOPolledFileIOVars * vars,
             {
                 AbsoluteTime startTime, endTime;
 
-                uint32_t encryptLen, encryptStart;
+                uint64_t encryptLen, encryptStart;
                 encryptLen = vars->position - vars->encryptStart;
                 if (encryptLen > length)
                     encryptLen = length;
@@ -1713,7 +1713,7 @@ IOHibernateSystemWake(void)
     if (vars->ioBuffer)
        vars->ioBuffer->release();
     bzero(&gIOHibernateHandoffPages[0], gIOHibernateHandoffPageCount * sizeof(gIOHibernateHandoffPages[0]));
-    if (vars->handoffBuffer)
+    if (vars->handoffBuffer && (kIOHibernateStateWakingFromHibernate == gIOHibernateState))
     {
        IOHibernateHandoff * handoff;
        bool done = false;
@@ -1721,7 +1721,7 @@ IOHibernateSystemWake(void)
             !done;
             handoff = (IOHibernateHandoff *) &handoff->data[handoff->bytecount])
        {
-//         HIBPRINT("handoff %p, %x, %x\n", handoff, handoff->type, handoff->bytecount);
+           HIBPRINT("handoff %p, %x, %x\n", handoff, handoff->type, handoff->bytecount);
            uint8_t * data = &handoff->data[0];
            switch (handoff->type)
            {
@@ -1772,7 +1772,9 @@ IOHibernateSystemPostWake(void)
        gIOHibernateCurrentHeader->signature = kIOHibernateHeaderInvalidSignature;
        kern_close_file_for_direct_io(gIOHibernateFileRef,
                                       0, (caddr_t) gIOHibernateCurrentHeader, 
-                                      sizeof(IOHibernateImageHeader));
+                                      sizeof(IOHibernateImageHeader),
+                                      sizeof(IOHibernateImageHeader),
+                                      gIOHibernateCurrentHeader->imageSize);
         gIOHibernateFileRef = 0;
     }
     return (kIOReturnSuccess);
@@ -2198,7 +2200,7 @@ hibernate_write_image(void)
         {
             if (needEncrypt && (kEncrypt & pageType))
             {
-                vars->fileVars->encryptStart = (vars->fileVars->position & ~(AES_BLOCK_SIZE - 1));
+                vars->fileVars->encryptStart = (vars->fileVars->position & ~(((uint64_t)AES_BLOCK_SIZE) - 1));
                 vars->fileVars->encryptEnd   = UINT64_MAX;
                 HIBLOG("encryptStart %qx\n", vars->fileVars->encryptStart);
 
@@ -2338,8 +2340,7 @@ hibernate_write_image(void)
 
             if ((kEncrypt & pageType))
             {
-                vars->fileVars->encryptEnd = (vars->fileVars->position + AES_BLOCK_SIZE - 1) 
-                                              & ~(AES_BLOCK_SIZE - 1);
+                vars->fileVars->encryptEnd = ((vars->fileVars->position + 511) & ~511ULL);
                 HIBLOG("encryptEnd %qx\n", vars->fileVars->encryptEnd);
             }
 
@@ -2352,11 +2353,14 @@ hibernate_write_image(void)
             }
             if (kWiredClear == pageType)
             {
+               // enlarge wired image for test
+//              err = IOPolledFileWrite(vars->fileVars, 0, 0x60000000, cryptvars);
+
                 // end wired image
                 header->encryptStart = vars->fileVars->encryptStart;
                 header->encryptEnd   = vars->fileVars->encryptEnd;
                 image1Size = vars->fileVars->position;
-                HIBLOG("image1Size %qd, encryptStart1 %qx, End1 %qx\n",
+                HIBLOG("image1Size 0x%qx, encryptStart1 0x%qx, End1 0x%qx\n",
                         image1Size, header->encryptStart, header->encryptEnd);
             }
         }
@@ -2736,8 +2740,8 @@ hibernate_machine_init(void)
            }
        }
     }
-    if (pagesDone == gIOHibernateCurrentHeader->actualUncompressedPages)
-       err = kIOReturnLockedRead;
+    if ((kIOReturnSuccess == err) && (pagesDone == gIOHibernateCurrentHeader->actualUncompressedPages))
+       err = kIOReturnLockedRead;
 
     if (kIOReturnSuccess != err)
        panic("Hibernate restore error %x", err);
index 7259ab3ecb1e415bdc593769f088ad4daa7d420d..79410326b5521cd67088b64f43e4a582e47977e6 100644 (file)
@@ -201,7 +201,12 @@ enum
     kIOHibernateRestoreCodeWakeMapSize     = 'wkms',
     kIOHibernateRestoreCodeConflictPage            = 'cfpg',
     kIOHibernateRestoreCodeConflictSource   = 'cfsr',
-    kIOHibernateRestoreCodeNoMemory         = 'nomm'
+    kIOHibernateRestoreCodeNoMemory         = 'nomm',
+    kIOHibernateRestoreCodeTag              = 'tag ',
+    kIOHibernateRestoreCodeSignature        = 'sign',
+    kIOHibernateRestoreCodeMapVirt          = 'mapV',
+    kIOHibernateRestoreCodeHandoffPages     = 'hand',
+    kIOHibernateRestoreCodeHandoffCount     = 'hndc',
 };
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
@@ -335,6 +340,8 @@ hibernate_page_bitmap_count(hibernate_bitmap_t * bitmap, uint32_t set, uint32_t
        }
     }
 
+    if ((page + count) > (bitmap->last_page + 1)) count = (bitmap->last_page + 1) - page;
+
     return (count);
 }
 
@@ -403,12 +410,15 @@ bcopy_internal(const void *src, void *dst, uint32_t len)
 #define C_ASSERT(e) typedef char    __C_ASSERT__[(e) ? 1 : -1]
 
 long 
-hibernate_kernel_entrypoint(IOHibernateImageHeader * header
-                            void * p2, void * p3, void * p4)
+hibernate_kernel_entrypoint(uint32_t p1
+                            uint32_t p2, uint32_t p3, uint32_t p4)
 {
+    uint64_t headerPhys;
+    uint64_t mapPhys;
+    uint64_t srcPhys;
+    uint64_t imageReadPhys;
+    uint64_t pageIndexPhys;
     uint32_t idx;
-    uint32_t * src;
-    uint32_t * imageReadPos;
     uint32_t * pageIndexSource;
     hibernate_page_list_t * map;
     uint32_t stage;
@@ -418,8 +428,10 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header,
     uint32_t conflictCount;
     uint32_t compressedSize;
     uint32_t uncompressedPages;
-    uint32_t copyPageListHead;
+    uint32_t copyPageListHeadPage;
+    uint32_t pageListPage;
     uint32_t * copyPageList;
+    uint32_t * src;
     uint32_t copyPageIndex;
     uint32_t sum;
     uint32_t pageSum;
@@ -432,37 +444,43 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header,
 
     C_ASSERT(sizeof(IOHibernateImageHeader) == 512);
 
+    headerPhys = ptoa_64(p1);
+
     if ((kIOHibernateDebugRestoreLogs & gIOHibernateDebugFlags) && !debug_probe())
        gIOHibernateDebugFlags &= ~kIOHibernateDebugRestoreLogs;
 
-    debug_code(kIOHibernateRestoreCodeImageStart, (uintptr_t) header);
+    debug_code(kIOHibernateRestoreCodeImageStart, headerPhys);
+
+    bcopy_internal((void *) pal_hib_map(IMAGE_AREA, headerPhys), 
+                   gIOHibernateCurrentHeader, 
+                   sizeof(IOHibernateImageHeader));
 
-    bcopy_internal(header, 
-                gIOHibernateCurrentHeader, 
-                sizeof(IOHibernateImageHeader));
+    debug_code(kIOHibernateRestoreCodeSignature, gIOHibernateCurrentHeader->signature);
 
-    map = (hibernate_page_list_t *)
-                (((uintptr_t) &header->fileExtentMap[0]) 
-                            + header->fileExtentMapSize 
-                            + ptoa_32(header->restore1PageCount)
-                            + header->previewSize);
+    mapPhys = headerPhys
+             + (offsetof(IOHibernateImageHeader, fileExtentMap)
+            + gIOHibernateCurrentHeader->fileExtentMapSize 
+            + ptoa_32(gIOHibernateCurrentHeader->restore1PageCount)
+            + gIOHibernateCurrentHeader->previewSize);
 
-    lastImagePage = atop_32(((uintptr_t) header) + header->image1Size);
+    map = (hibernate_page_list_t *) pal_hib_map(BITMAP_AREA, mapPhys);
 
-    lastMapPage = atop_32(((uintptr_t) map) + header->bitmapSize);
+    lastImagePage = atop_64(headerPhys + gIOHibernateCurrentHeader->image1Size);
+    lastMapPage = atop_64(mapPhys + gIOHibernateCurrentHeader->bitmapSize);
 
-    handoffPages     = header->handoffPages;
-    handoffPageCount = header->handoffPageCount;
+    handoffPages     = gIOHibernateCurrentHeader->handoffPages;
+    handoffPageCount = gIOHibernateCurrentHeader->handoffPageCount;
 
     debug_code(kIOHibernateRestoreCodeImageEnd,       ptoa_64(lastImagePage));
-    debug_code(kIOHibernateRestoreCodeMapStart,       (uintptr_t) map);
+    debug_code(kIOHibernateRestoreCodeMapStart,       mapPhys);
     debug_code(kIOHibernateRestoreCodeMapEnd,         ptoa_64(lastMapPage));
 
-    debug_code('hand', ptoa_64(handoffPages));
-    debug_code('hnde', ptoa_64(handoffPageCount));
+    debug_code(kIOHibernateRestoreCodeMapVirt, (uintptr_t) map);
+    debug_code(kIOHibernateRestoreCodeHandoffPages, ptoa_64(handoffPages));
+    debug_code(kIOHibernateRestoreCodeHandoffCount, handoffPageCount);
 
     // knock all the image pages to be used out of free map
-    for (ppnum = atop_32((uintptr_t) header); ppnum <= lastImagePage; ppnum++)
+    for (ppnum = atop_64(headerPhys); ppnum <= lastImagePage; ppnum++)
     {
        hibernate_page_bitset(map, FALSE, ppnum);
     }
@@ -475,40 +493,39 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header,
     nextFree = 0;
     hibernate_page_list_grab(map, &nextFree);
 
-    pal_hib_window_setup(hibernate_page_list_grab(map, &nextFree));
-
-    sum = header->actualRestore1Sum;
-    gIOHibernateCurrentHeader->diag[0] = (uint32_t)(uintptr_t) header;
+    sum = gIOHibernateCurrentHeader->actualRestore1Sum;
+    gIOHibernateCurrentHeader->diag[0] = atop_64(headerPhys);
     gIOHibernateCurrentHeader->diag[1] = sum;
 
-    uncompressedPages = 0;
-    conflictCount     = 0;
-    copyPageListHead  = 0;
-    copyPageList      = 0;
-    copyPageIndex     = PAGE_SIZE >> 2;
+    uncompressedPages    = 0;
+    conflictCount        = 0;
+    copyPageListHeadPage = 0;
+    copyPageList         = 0;
+    copyPageIndex        = PAGE_SIZE >> 2;
 
-    compressedSize    = PAGE_SIZE;
-    stage             = 2;
-    count             = 0;
-    src               = NULL;
+    compressedSize       = PAGE_SIZE;
+    stage                = 2;
+    count                = 0;
+    srcPhys              = 0;
 
     if (gIOHibernateCurrentHeader->previewSize)
     {
-       pageIndexSource = (uint32_t *)
-                    (((uintptr_t) &header->fileExtentMap[0]) 
-                                + gIOHibernateCurrentHeader->fileExtentMapSize 
-                                + ptoa_32(gIOHibernateCurrentHeader->restore1PageCount));
-       imageReadPos = (uint32_t *) (((uintptr_t) pageIndexSource) + gIOHibernateCurrentHeader->previewPageListSize);
-       lastPageIndexPage = atop_32((uintptr_t) imageReadPos);
+       pageIndexPhys     = headerPhys
+                          + (offsetof(IOHibernateImageHeader, fileExtentMap)
+                          + gIOHibernateCurrentHeader->fileExtentMapSize 
+                          + ptoa_32(gIOHibernateCurrentHeader->restore1PageCount));
+       imageReadPhys     = (pageIndexPhys + gIOHibernateCurrentHeader->previewPageListSize);
+       lastPageIndexPage = atop_64(imageReadPhys);
+       pageIndexSource   = (uint32_t *) pal_hib_map(IMAGE2_AREA, pageIndexPhys);
     }
     else
     {
-       pageIndexSource   = NULL;
+       pageIndexPhys     = 0;
        lastPageIndexPage = 0;
-       imageReadPos =  (uint32_t *) (((uintptr_t) map) + gIOHibernateCurrentHeader->bitmapSize);
+       imageReadPhys     = (mapPhys + gIOHibernateCurrentHeader->bitmapSize);
     }
 
-    debug_code(kIOHibernateRestoreCodePageIndexStart, (uintptr_t) pageIndexSource);
+    debug_code(kIOHibernateRestoreCodePageIndexStart, pageIndexPhys);
     debug_code(kIOHibernateRestoreCodePageIndexEnd,   ptoa_64(lastPageIndexPage));
 
     while (1)
@@ -517,38 +534,35 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header,
        {
            case 2:
                // copy handoff data
-               count = src ? 0 : handoffPageCount;
+               count = srcPhys ? 0 : handoffPageCount;
                if (!count)
                    break;
-               if (count > gIOHibernateHandoffPageCount)
-                   count = gIOHibernateHandoffPageCount;
-               src = (uint32_t *) (uintptr_t) ptoa_64(handoffPages);
+               if (count > gIOHibernateHandoffPageCount) count = gIOHibernateHandoffPageCount;
+               srcPhys = ptoa_64(handoffPages);
                break;
        
            case 1:
                // copy pageIndexSource pages == preview image data
-               if (!src)
+               if (!srcPhys)
                {
-                   if (!pageIndexSource)
-                       break;
-                   src = imageReadPos;
+                   if (!pageIndexPhys) break;
+                   srcPhys = imageReadPhys;
                }
                ppnum = pageIndexSource[0];
                count = pageIndexSource[1];
                pageIndexSource += 2;
-               imageReadPos = src;
+               pageIndexPhys   += 2 * sizeof(pageIndexSource[0]);
+               imageReadPhys = srcPhys;
                break;
 
            case 0:
                // copy pages
-               if (!src)
-               {
-                   src =  (uint32_t *) (((uintptr_t) map) + gIOHibernateCurrentHeader->bitmapSize);
-               }
+               if (!srcPhys) srcPhys = (mapPhys + gIOHibernateCurrentHeader->bitmapSize);
+               src = (uint32_t *) pal_hib_map(IMAGE_AREA, srcPhys);
                ppnum = src[0];
                count = src[1];
-               src += 2;
-               imageReadPos = src;
+               srcPhys += 2 * sizeof(*src);
+               imageReadPhys = srcPhys;
                break;
        }
 
@@ -558,7 +572,7 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header,
            if (!stage)
                break;
            stage--;
-           src = NULL;
+           srcPhys = 0;
            continue;
        }
 
@@ -567,23 +581,26 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header,
            uint32_t tag;
            int conflicts;
 
-           if (2 == stage)
-               ppnum = gIOHibernateHandoffPages[page];
+           src = (uint32_t *) pal_hib_map(IMAGE_AREA, srcPhys);
+
+           if (2 == stage) ppnum = gIOHibernateHandoffPages[page];
            else if (!stage)
            {
                tag = *src++;
+//             debug_code(kIOHibernateRestoreCodeTag, (uintptr_t) tag);
+               srcPhys += sizeof(*src);
                compressedSize = kIOHibernateTagLength & tag;
            }
 
-           conflicts = (ppnum >= atop_32((uintptr_t) map)) && (ppnum <= lastMapPage);
+           conflicts = (ppnum >= atop_64(mapPhys)) && (ppnum <= lastMapPage);
 
-           conflicts |= ((ppnum >= atop_32((uintptr_t) imageReadPos)) && (ppnum <= lastImagePage));
+           conflicts |= ((ppnum >= atop_64(imageReadPhys)) && (ppnum <= lastImagePage));
 
            if (stage >= 2)
-               conflicts |= ((ppnum >= atop_32((uintptr_t) src)) && (ppnum <= (handoffPages + handoffPageCount - 1)));
+               conflicts |= ((ppnum >= atop_64(srcPhys)) && (ppnum <= (handoffPages + handoffPageCount - 1)));
 
            if (stage >= 1)
-               conflicts |= ((ppnum >= atop_32((uintptr_t) pageIndexSource)) && (ppnum <= lastPageIndexPage));
+               conflicts |= ((ppnum >= atop_64(pageIndexPhys)) && (ppnum <= lastPageIndexPage));
 
            if (!conflicts)
            {
@@ -610,15 +627,15 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header,
                if (copyPageIndex > ((PAGE_SIZE >> 2) - 3))
                {
                    // alloc new copy list page
-                   uint32_t pageListPage = hibernate_page_list_grab(map, &nextFree);
+                   pageListPage = hibernate_page_list_grab(map, &nextFree);
                    // link to current
                    if (copyPageList) {
                            copyPageList[1] = pageListPage;
                    } else {
-                           copyPageListHead = pageListPage;
+                           copyPageListHeadPage = pageListPage;
                    }
                    copyPageList = (uint32_t *)pal_hib_map(SRC_COPY_AREA, 
-                                   ptoa_32(pageListPage));
+                                   ptoa_64(pageListPage));
                    copyPageList[1] = 0;
                    copyPageIndex = 2;
                }
@@ -628,11 +645,12 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header,
                copyPageList[copyPageIndex++] = (compressedSize | (stage << 24));
                copyPageList[0] = copyPageIndex;
 
-               dst = (uint32_t *)pal_hib_map(DEST_COPY_AREA, ptoa_32(bufferPage));
+               dst = (uint32_t *)pal_hib_map(DEST_COPY_AREA, ptoa_64(bufferPage));
                for (idx = 0; idx < ((compressedSize + 3) >> 2); idx++)
                        dst[idx] = src[idx];
            }
-           src += ((compressedSize + 3) >> 2);
+           srcPhys += ((compressedSize + 3) & ~3);
+           src     += ((compressedSize + 3) >> 2);
        }
     }
 
@@ -641,16 +659,15 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header,
 
     // -- copy back conflicts
 
-    copyPageList = (uint32_t *)(uintptr_t) ptoa_32(copyPageListHead);
-
-    while (copyPageList)
+    pageListPage = copyPageListHeadPage;
+    while (pageListPage)
     {
-       copyPageList = (uint32_t *)pal_hib_map(COPY_PAGE_AREA, (uintptr_t)copyPageList);
+       copyPageList = (uint32_t *)pal_hib_map(COPY_PAGE_AREA, ptoa_64(pageListPage));
        for (copyPageIndex = 2; copyPageIndex < copyPageList[0]; copyPageIndex += 3)
        {
            ppnum          = copyPageList[copyPageIndex + 0];
-           src            = (uint32_t *) (uintptr_t) ptoa_32(copyPageList[copyPageIndex + 1]);
-           src            = (uint32_t *)pal_hib_map(SRC_COPY_AREA, (uintptr_t)src);
+           srcPhys        = ptoa_64(copyPageList[copyPageIndex + 1]);
+           src            = (uint32_t *) pal_hib_map(SRC_COPY_AREA, srcPhys);
            compressedSize = copyPageList[copyPageIndex + 2];
            stage          = compressedSize >> 24;
            compressedSize &= 0x1FFF;
@@ -660,7 +677,7 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header,
                sum += pageSum;
            uncompressedPages++;
        }
-       copyPageList = (uint32_t *) (uintptr_t) ptoa_32(copyPageList[1]);
+       pageListPage = copyPageList[1];
     }
 
     pal_hib_patchup();
index 50000299df4ece585cd2c3c1d51c44a23a73df10..b2714fc9b516986e62fc6f2e2fd7e97556dd5b88 100644 (file)
@@ -344,6 +344,7 @@ IOKernelFreePhysical(mach_vm_address_t address, mach_vm_size_t size)
        kfree((void *)allocationAddress, adjustedSize);
     }
 
+    IOStatisticsAlloc(kIOStatisticsFreeContiguous, size);
 #if IOALLOCDEBUG
     debug_iomalloc_size -= size;
 #endif
@@ -379,12 +380,18 @@ IOKernelAllocateWithPhysicalRestrict(mach_vm_size_t size, mach_vm_address_t maxP
         contiguous = (contiguous && (adjustedSize > page_size))
                            || (alignment > page_size);
 
-        if ((!contiguous) && (maxPhys <= 0xFFFFFFFF))
-        {
-            maxPhys = 0;
-            options |= KMA_LOMEM;
-        }
-
+       if (!contiguous)
+       {
+           if (maxPhys <= 0xFFFFFFFF)
+           {
+               maxPhys = 0;
+               options |= KMA_LOMEM;
+           }
+           else if (gIOLastPage && (atop_64(maxPhys) > gIOLastPage))
+           {
+               maxPhys = 0;
+           }
+       }
        if (contiguous || maxPhys)
        {
            kr = kmem_alloc_contig(kernel_map, &virt, size,
@@ -422,11 +429,12 @@ IOKernelAllocateWithPhysicalRestrict(mach_vm_size_t size, mach_vm_address_t maxP
            address = 0;
     }
 
-#if IOALLOCDEBUG
     if (address) {
+    IOStatisticsAlloc(kIOStatisticsMallocContiguous, size);
+#if IOALLOCDEBUG
        debug_iomalloc_size += size;
-    }
 #endif
+    }
 
     return (address);
 }
@@ -490,10 +498,6 @@ void * IOMallocContiguous(vm_size_t size, vm_size_t alignment,
     }
     while (false);
 
-       if (address) {
-           IOStatisticsAlloc(kIOStatisticsMallocContiguous, size);
-    }
-
     return (void *) address;
 }
 
@@ -531,8 +535,6 @@ void IOFreeContiguous(void * _address, vm_size_t size)
     {
        IOKernelFreePhysical((mach_vm_address_t) address, size);
     }
-
-    IOStatisticsAlloc(kIOStatisticsFreeContiguous, size);
 }
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
index 0a11064a16cc9d838350d021543b59df2ded8163..9b459094542f5dc4046584676621eae5dd633939 100644 (file)
@@ -1937,7 +1937,7 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection)
     assert(!_wireCount);
     assert(kIOMemoryTypeVirtual == type || kIOMemoryTypeVirtual64 == type || kIOMemoryTypeUIO == type);
 
-    if (_pages >= gIOMaximumMappedIOPageCount)
+    if (_pages > gIOMaximumMappedIOPageCount)
        return kIOReturnNoResources;
 
     dataP = getDataP(_memoryEntries);
index 60f3ec07acc147928283d3e90242a4711cbf4b79..d4e9fa423406635dbd23acce498e0389c779c088 100644 (file)
@@ -190,6 +190,7 @@ static IOPMPowerState ourPowerStates[NUM_POWER_STATES] =
     {1, kIOPMPowerOn,           kIOPMPowerOn,   ON_POWER,      0,0,0,0,0,0,0,0}
 };
 
+#define kIOPMRootDomainWakeTypeSleepService "SleepService"
 #define kIOPMRootDomainWakeTypeMaintenance  "Maintenance"
 #define kIOPMRootDomainWakeTypeSleepTimer   "SleepTimer"
 #define kIOPMrootDomainWakeTypeLowBattery   "LowBattery"
@@ -285,6 +286,8 @@ const OSSymbol *gIOPMStatsApplicationResponseTimedOut;
 const OSSymbol *gIOPMStatsApplicationResponseCancel;
 const OSSymbol *gIOPMStatsApplicationResponseSlow;
 
+#define kBadPMFeatureID     0
+
 /*
  * PMSettingHandle
  * Opaque handle passed to clients of registerPMSettingController()
@@ -796,13 +799,15 @@ static SYSCTL_INT(_debug, OID_AUTO, darkwake, CTLFLAG_RW, &gDarkWakeFlags, 0, ""
 static const OSSymbol * gIOPMSettingAutoWakeSecondsKey;
 static const OSSymbol * gIOPMSettingDebugWakeRelativeKey;
 static const OSSymbol * gIOPMSettingMaintenanceWakeCalendarKey;
+static const OSSymbol * gIOPMSettingSleepServiceWakeCalendarKey;
+static const OSSymbol * gIOPMSettingSilentRunningKey;
 
 //******************************************************************************
 // start
 //
 //******************************************************************************
 
-#define kRootDomainSettingsCount        16
+#define kRootDomainSettingsCount        17
 
 bool IOPMrootDomain::start( IOService * nub )
 {
@@ -815,8 +820,9 @@ bool IOPMrootDomain::start( IOService * nub )
     gRootDomain = this;
     gIOPMSettingAutoWakeSecondsKey = OSSymbol::withCString(kIOPMSettingAutoWakeSecondsKey);
     gIOPMSettingDebugWakeRelativeKey = OSSymbol::withCString(kIOPMSettingDebugWakeRelativeKey);
-    gIOPMSettingMaintenanceWakeCalendarKey =
-        OSSymbol::withCString(kIOPMSettingMaintenanceWakeCalendarKey);
+    gIOPMSettingMaintenanceWakeCalendarKey = OSSymbol::withCString(kIOPMSettingMaintenanceWakeCalendarKey);
+    gIOPMSettingSleepServiceWakeCalendarKey = OSSymbol::withCString(kIOPMSettingSleepServiceWakeCalendarKey);
+    gIOPMSettingSilentRunningKey = OSSymbol::withCStringNoCopy(kIOPMSettingSilentRunningKey);
 
     gIOPMStatsApplicationResponseTimedOut = OSSymbol::withCString(kIOPMStatsResponseTimedOut);
     gIOPMStatsApplicationResponseCancel = OSSymbol::withCString(kIOPMStatsResponseCancel);
@@ -842,7 +848,8 @@ bool IOPMrootDomain::start( IOService * nub )
             OSSymbol::withCString(kIOPMSettingDisplaySleepUsesDimKey),
             OSSymbol::withCString(kIOPMSettingMobileMotionModuleKey),
             OSSymbol::withCString(kIOPMSettingGraphicsSwitchKey),
-            OSSymbol::withCString(kIOPMStateConsoleShutdown)
+            OSSymbol::withCString(kIOPMStateConsoleShutdown),
+            gIOPMSettingSilentRunningKey
         };
 
     PE_parse_boot_argn("darkwake", &gDarkWakeFlags, sizeof(gDarkWakeFlags));
@@ -927,7 +934,12 @@ bool IOPMrootDomain::start( IOService * nub )
                     (const OSObject **)settingsArr,
                     kRootDomainSettingsCount,
                     0);
-                    
+
+    // List of PM settings that should not automatically publish itself
+    // as a feature when registered by a listener.
+    noPublishPMSettings = OSArray::withObjects(
+                    (const OSObject **) &gIOPMSettingSilentRunningKey, 1, 0);
+
     fPMSettingsDict = OSDictionary::withCapacity(5);
 
     PMinit();   // creates gIOPMWorkLoop
@@ -1160,6 +1172,14 @@ IOReturn IOPMrootDomain::setProperties( OSObject * props_obj )
     {
         setProperty(kIOPMDestroyFVKeyOnStandbyKey, b);
     }
+    if ((b = OSDynamicCast(OSBoolean, dict->getObject(kIOPMAutoPowerOffEnabledKey))))
+    {
+        setProperty(kIOPMAutoPowerOffEnabledKey, b);
+    }
+    if ((n = OSDynamicCast(OSNumber, dict->getObject(kIOPMAutoPowerOffDelayKey))))
+    {
+        setProperty(kIOPMAutoPowerOffDelayKey, n);
+    }
 
     // Relay our allowed PM settings onto our registered PM clients
     for(i = 0; i < allowedPMSettings->getCount(); i++) {
@@ -2030,6 +2050,9 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState )
             getPlatform()->PMLog(kIOPMrootDomainClass, kPMLogSystemWake, 0, 0);
             lowBatteryCondition = false;
             lastSleepReason = 0;
+            
+            _lastDebugWakeSeconds = _debugWakeSeconds;
+            _debugWakeSeconds = 0;
 
             // And start logging the wake event here
             // TODO: Publish the wakeReason string as an integer
@@ -2041,7 +2064,6 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState )
                        
             recordAndReleasePMEvent( details );
                        
-
 #ifndef __LP64__
             systemWake();
 #endif
@@ -2070,38 +2092,58 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState )
                 OSNumber * hibOptions = OSDynamicCast(
                     OSNumber, getProperty(kIOHibernateOptionsKey));
 
-                if (hibernateAborted ||
-                    ((hibOptions &&
-                     !(hibOptions->unsigned32BitValue() & kIOHibernateOptionDarkWake))) ||
-                    ((_debugWakeSeconds != 0) &&
-                      ((gDarkWakeFlags & kDarkWakeFlagAlarmIsDark) == 0)) ||
-                    (wakeType && (
-                     wakeType->isEqualTo(kIOPMRootDomainWakeTypeUser) ||
-                     wakeType->isEqualTo(kIOPMRootDomainWakeTypeAlarm))))
+                if (hibernateAborted || ((hibOptions &&
+                    !(hibOptions->unsigned32BitValue() & kIOHibernateOptionDarkWake))))
                 {
+                    // Hibernate aborted, or EFI brought up graphics
+                    wranglerTickled = true;
+                }
+                else
+                if (wakeType && (
+                    wakeType->isEqualTo(kIOPMRootDomainWakeTypeUser) ||
+                    wakeType->isEqualTo(kIOPMRootDomainWakeTypeAlarm)))
+                {
+                    // User wake or RTC alarm
                     wranglerTickled = true;
                 }
                 else
                 if (wakeType &&
-                    wakeType->isEqualTo(kIOPMRootDomainWakeTypeMaintenance))
+                    wakeType->isEqualTo(kIOPMRootDomainWakeTypeSleepTimer))
                 {
+                    // SMC standby timer trumps SleepX
                     darkWakeMaintenance = true;
                     darkWakeToSleepASAP = true;
+                    sleepTimerMaintenance = true;
+                }
+                else
+                if ((_lastDebugWakeSeconds != 0) &&
+                    ((gDarkWakeFlags & kDarkWakeFlagAlarmIsDark) == 0))
+                {
+                    // SleepX before maintenance
+                    wranglerTickled = true;
                 }
                 else
                 if (wakeType &&
-                    wakeType->isEqualTo(kIOPMRootDomainWakeTypeSleepTimer))
+                    wakeType->isEqualTo(kIOPMRootDomainWakeTypeMaintenance))
                 {
                     darkWakeMaintenance = true;
                     darkWakeToSleepASAP = true;
-                    sleepTimerMaintenance = true;
+                }
+                else
+                if (wakeType &&
+                    wakeType->isEqualTo(kIOPMRootDomainWakeTypeSleepService))
+                {
+                    darkWakeToSleepASAP = true;
+//                    darkWakeMaintenance = true; // ????
+                    darkWakeSleepService = true;
                 }
                 else
                 {
                     // Unidentified wake source, resume to full wake if debug
                     // alarm is pending.
 
-                    if (_debugWakeSeconds && (!wakeReason || wakeReason->isEqualTo("")))
+                    if (_lastDebugWakeSeconds &&
+                        (!wakeReason || wakeReason->isEqualTo("")))
                         wranglerTickled = true;
                     else
                         darkWakeToSleepASAP = true;
@@ -2109,11 +2151,18 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState )
             }
             else
             {
-                // Post a HID tickle immediately - except for maintenance wake.
-
-                if (hibernateAborted || !wakeType ||
-                    !wakeType->isEqualTo(kIOPMRootDomainWakeTypeMaintenance))
+                if (wakeType &&
+                    wakeType->isEqualTo(kIOPMRootDomainWakeTypeSleepTimer))
                 {
+                    darkWakeMaintenance = true;
+                    darkWakeToSleepASAP = true;
+                    sleepTimerMaintenance = true;
+                }
+                else if (hibernateAborted || !wakeType ||
+                    !wakeType->isEqualTo(kIOPMRootDomainWakeTypeMaintenance) ||
+                    !wakeReason || !wakeReason->isEqualTo("RTC"))
+                {
+                    // Post a HID tickle immediately - except for RTC maintenance wake.
                     wranglerTickled = true;
                 }
                 else
@@ -2156,9 +2205,6 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState )
                        
             recordAndReleasePMEvent( details );
 
-            if (previousPowerState != ON_STATE)
-                _debugWakeSeconds = 0;
-
             // Update childPreventSystemSleep flag using the capability computed
             // by IOSevice::rebuildChildClampBits().
 
@@ -2898,7 +2944,7 @@ void IOPMrootDomain::publishFeature(
     } else {
         // The easy case: no previously existing features listed. We simply
         // set the OSNumber at key 'feature' and we're on our way.
-        features->setObject(feature, new_feature_data);        
+        features->setObject(feature, new_feature_data);
     }
     
     new_feature_data->release();
@@ -2937,6 +2983,9 @@ IOReturn IOPMrootDomain::removePublishedFeature( uint32_t removeFeatureID )
     OSNumber                *osNum        = NULL;
     OSArray                 *arrayMemberCopy;
 
+    if (kBadPMFeatureID == removeFeatureID)
+        return kIOReturnNotFound;
+
     if(featuresDictLock) IOLockLock(featuresDictLock);
 
     OSDictionary *features =
@@ -3044,6 +3093,28 @@ exit:
     return ret;
 }
 
+//******************************************************************************
+// publishPMSetting (private)
+//
+// Should only be called by PMSettingObject to publish a PM Setting as a
+// supported feature.
+//******************************************************************************
+
+void IOPMrootDomain::publishPMSetting(
+    const OSSymbol * feature, uint32_t where, uint32_t * featureID )
+{
+    if (noPublishPMSettings &&
+        (noPublishPMSettings->getNextIndexOfObject(feature, 0) != (unsigned int)-1))
+    {
+        // Setting found in noPublishPMSettings array
+        *featureID = kBadPMFeatureID;
+        return;
+    }
+
+    publishFeature(
+        feature->getCStringNoCopy(), where, featureID);
+}
+
 //******************************************************************************
 // setPMSetting (private)
 //
@@ -3377,81 +3448,92 @@ void IOPMrootDomain::informCPUStateChange(
 // evaluateSystemSleepPolicy
 //******************************************************************************
 
+#define kIOPlatformSystemSleepPolicyKey     "IOPlatformSystemSleepPolicy"
+
+// Sleep flags
+enum {
+    kIOPMSleepFlagHibernate         = 0x00000001,
+    kIOPMSleepFlagSleepTimerEnable  = 0x00000002
+};
+
 struct IOPMSystemSleepPolicyEntry
 {
     uint32_t    factorMask;
     uint32_t    factorBits;
     uint32_t    sleepFlags;
     uint32_t    wakeEvents;
-};
+} __attribute__((packed));
 
 struct IOPMSystemSleepPolicyTable
 {
-    uint8_t     signature[4];
+    uint32_t    signature;
     uint16_t    version;
     uint16_t    entryCount;
     IOPMSystemSleepPolicyEntry  entries[];
-};
+} __attribute__((packed));
 
-enum {
-    kIOPMSleepFactorSleepTimerWake          = 0x00000001,
-    kIOPMSleepFactorLidOpen                 = 0x00000002,
-    kIOPMSleepFactorACPower                 = 0x00000004,
-    kIOPMSleepFactorLowBattery              = 0x00000008,
-    kIOPMSleepFactorDeepSleepNoDelay        = 0x00000010,
-    kIOPMSleepFactorDeepSleepDemand         = 0x00000020,
-    kIOPMSleepFactorDeepSleepDisable        = 0x00000040,
-    kIOPMSleepFactorUSBExternalDevice       = 0x00000080,
-    kIOPMSleepFactorBluetoothHIDDevice      = 0x00000100,
-    kIOPMSleepFactorExternalMediaMounted    = 0x00000200,
-    kIOPMSleepFactorDriverAssertBit5        = 0x00000400,   /* Reserved for ThunderBolt */
-    kIOPMSleepFactorDriverAssertBit6        = 0x00000800,
-    kIOPMSleepFactorDriverAssertBit7        = 0x00001000
-};
-
-bool IOPMrootDomain::evaluateSystemSleepPolicy( IOPMSystemSleepParameters * p )
+bool IOPMrootDomain::evaluateSystemSleepPolicy(
+    IOPMSystemSleepParameters * params, int sleepPhase )
 {
     const IOPMSystemSleepPolicyTable * pt;
     OSObject *  prop = 0;
     OSData *    policyData;
-    uint32_t    currentFactors;
-    uint32_t    deepSleepDelay = 0;
-    bool        success = false;
-
-    if (getProperty(kIOPMDeepSleepEnabledKey) != kOSBooleanTrue)
-        return false;
-
-    getSleepOption(kIOPMDeepSleepDelayKey, &deepSleepDelay);
-
-    prop = getServiceRoot()->copyProperty(kIOPlatformSystemSleepPolicyKey);
-    if (!prop)
-        return false;
-
-    policyData = OSDynamicCast(OSData, prop);
-    if (!policyData ||
-        (policyData->getLength() < sizeof(IOPMSystemSleepPolicyTable)))
-    {
-        goto done;
-    }
-
-    pt = (const IOPMSystemSleepPolicyTable *) policyData->getBytesNoCopy();
-    if ((pt->signature[0] != 'S') ||
-        (pt->signature[1] != 'L') ||
-        (pt->signature[2] != 'P') ||
-        (pt->signature[3] != 'T') ||
-        (pt->version      != 1)   ||
-        (pt->entryCount   == 0))
+    uint64_t    currentFactors = 0;
+    uint32_t    standbyDelay;
+    uint32_t    powerOffDelay;
+    uint32_t    mismatch;
+    bool        standbyEnabled;
+    bool        powerOffEnabled;
+    bool        found = false;
+
+    // Get platform's sleep policy table
+    if (!_sleepPolicyHandler)
+    {
+        prop = getServiceRoot()->copyProperty(kIOPlatformSystemSleepPolicyKey);
+        if (!prop) goto done;
+    }
+
+    // Fetch additional settings
+    standbyEnabled = (getSleepOption(kIOPMDeepSleepDelayKey, &standbyDelay)
+        && (getProperty(kIOPMDeepSleepEnabledKey) == kOSBooleanTrue));
+    powerOffEnabled = (getSleepOption(kIOPMAutoPowerOffDelayKey, &powerOffDelay)
+        && (getProperty(kIOPMAutoPowerOffEnabledKey) == kOSBooleanTrue));
+    DLOG("standby %d delay %u, powerOff %d delay %u, hibernate %u\n",
+        standbyEnabled, standbyDelay, powerOffEnabled, powerOffDelay,
+        hibernateMode);
+
+    // pmset level overrides
+    if ((hibernateMode & kIOHibernateModeOn) == 0)
     {
-        goto done;
+        standbyEnabled  = false;
+        powerOffEnabled = false;
     }
-
-    if ((policyData->getLength() - sizeof(IOPMSystemSleepPolicyTable)) !=
-        (sizeof(IOPMSystemSleepPolicyEntry) * pt->entryCount))
+    else if (!(hibernateMode & kIOHibernateModeSleep))
     {
-        goto done;
+        // Force hibernate (i.e. mode 25)
+        // If standby is enabled, force standy.
+        // If poweroff is enabled, force poweroff.
+        if (standbyEnabled)
+            currentFactors |= kIOPMSleepFactorStandbyForced;
+        else if (powerOffEnabled)
+            currentFactors |= kIOPMSleepFactorAutoPowerOffForced;
+        else
+            currentFactors |= kIOPMSleepFactorHibernateForced;
     }
 
-    currentFactors = 0;
+    // Current factors based on environment and assertions
+    if (sleepTimerMaintenance)
+        currentFactors |= kIOPMSleepFactorSleepTimerWake;
+    if (!clamshellClosed)
+        currentFactors |= kIOPMSleepFactorLidOpen;
+    if (acAdaptorConnected)
+        currentFactors |= kIOPMSleepFactorACPower;
+    if (lowBatteryCondition)
+        currentFactors |= kIOPMSleepFactorBatteryLow;
+    if (!standbyDelay)
+        currentFactors |= kIOPMSleepFactorStandbyNoDelay;
+    if (!standbyEnabled)
+        currentFactors |= kIOPMSleepFactorStandbyDisabled;
     if (getPMAssertionLevel(kIOPMDriverAssertionUSBExternalDeviceBit) !=
         kIOPMDriverAssertionLevelOff)
         currentFactors |= kIOPMSleepFactorUSBExternalDevice;
@@ -3461,57 +3543,101 @@ bool IOPMrootDomain::evaluateSystemSleepPolicy( IOPMSystemSleepParameters * p )
     if (getPMAssertionLevel(kIOPMDriverAssertionExternalMediaMountedBit) !=
         kIOPMDriverAssertionLevelOff)
         currentFactors |= kIOPMSleepFactorExternalMediaMounted;
-    if (getPMAssertionLevel(kIOPMDriverAssertionReservedBit5) !=    /* AssertionBit5 = Thunderbolt */
+    if (getPMAssertionLevel(kIOPMDriverAssertionReservedBit5) !=
         kIOPMDriverAssertionLevelOff)
-        currentFactors |= kIOPMSleepFactorDriverAssertBit5;
-    if (getPMAssertionLevel(kIOPMDriverAssertionReservedBit7) !=
+        currentFactors |= kIOPMSleepFactorThunderboltDevice;
+    if (getPMAssertionLevel(kIOPMDriverAssertionReservedBit8) !=
         kIOPMDriverAssertionLevelOff)
-        currentFactors |= kIOPMSleepFactorDriverAssertBit7;
-    if (0 == deepSleepDelay)
-        currentFactors |= kIOPMSleepFactorDeepSleepNoDelay;
-    if (!clamshellClosed)
-        currentFactors |= kIOPMSleepFactorLidOpen;
-    if (acAdaptorConnected)
-        currentFactors |= kIOPMSleepFactorACPower;
-    if (lowBatteryCondition)
-        currentFactors |= kIOPMSleepFactorLowBattery;
-    if (sleepTimerMaintenance)
-        currentFactors |= kIOPMSleepFactorSleepTimerWake;
+        currentFactors |= kIOPMSleepFactorMagicPacketWakeEnabled;
+    if (!powerOffEnabled)
+        currentFactors |= kIOPMSleepFactorAutoPowerOffDisabled;
 
-    // pmset overrides
-    if ((hibernateMode & kIOHibernateModeOn) == 0)
-        currentFactors |= kIOPMSleepFactorDeepSleepDisable;
-    else if ((hibernateMode & kIOHibernateModeSleep) == 0)
-        currentFactors |= kIOPMSleepFactorDeepSleepDemand;
-    
-    DLOG("Sleep policy %u entries, current factors 0x%x\n",
-        pt->entryCount, currentFactors);
+    DLOG("sleep factors 0x%llx\n", currentFactors);
+
+    // Clear the output params
+    bzero(params, sizeof(*params));
+
+    if (_sleepPolicyHandler)
+    {
+        if (!_sleepPolicyVars)
+        {
+            _sleepPolicyVars = IONew(IOPMSystemSleepPolicyVariables, 1);
+            if (!_sleepPolicyVars)
+                goto done;
+            bzero(_sleepPolicyVars, sizeof(*_sleepPolicyVars));
+        }
+        _sleepPolicyVars->signature = kIOPMSystemSleepPolicySignature;
+        _sleepPolicyVars->version   = kIOPMSystemSleepPolicyVersion;
+        if (kIOPMSleepPhase1 == sleepPhase)
+        {
+            _sleepPolicyVars->currentCapability = _currentCapability;
+            _sleepPolicyVars->highestCapability = _highestCapability;
+            _sleepPolicyVars->sleepReason   = lastSleepReason;
+            _sleepPolicyVars->hibernateMode = hibernateMode;
+            _sleepPolicyVars->standbyDelay  = standbyDelay;
+            _sleepPolicyVars->poweroffDelay = powerOffDelay;
+        }
+        _sleepPolicyVars->sleepFactors = currentFactors;
+        _sleepPolicyVars->sleepPhase   = sleepPhase;
+        
+        if ((_sleepPolicyHandler(_sleepPolicyTarget, _sleepPolicyVars, params) !=
+             kIOReturnSuccess) || (kIOPMSleepTypeInvalid == params->sleepType) ||
+             (params->sleepType >= kIOPMSleepTypeLast) ||
+             (kIOPMSystemSleepParametersVersion != params->version))
+        {
+            MSG("sleep policy handler error\n");
+            goto done;
+        }
+
+        DLOG("sleep params v%u, type %u, flags 0x%x, wake 0x%x, timer %u, poweroff %u\n",
+            params->version, params->sleepType, params->sleepFlags,
+            params->ecWakeEvents, params->ecWakeTimer, params->ecPoweroffTimer);
+        found = true;
+        goto done;
+    }
+
+    // Policy table is meaningless without standby enabled
+    if (!standbyEnabled)
+        goto done;
+
+    // Validate the sleep policy table
+    policyData = OSDynamicCast(OSData, prop);
+    if (!policyData || (policyData->getLength() <= sizeof(IOPMSystemSleepPolicyTable)))
+        goto done;
+
+    pt = (const IOPMSystemSleepPolicyTable *) policyData->getBytesNoCopy();
+    if ((pt->signature != kIOPMSystemSleepPolicySignature) ||
+        (pt->version != 1) || (0 == pt->entryCount))
+        goto done;
+
+    if (((policyData->getLength() - sizeof(IOPMSystemSleepPolicyTable)) !=
+         (sizeof(IOPMSystemSleepPolicyEntry) * pt->entryCount)))
+        goto done;
 
     for (uint32_t i = 0; i < pt->entryCount; i++)
     {
-        const IOPMSystemSleepPolicyEntry * policyEntry = &pt->entries[i];
+        const IOPMSystemSleepPolicyEntry * entry = &pt->entries[i];
+        mismatch = (((uint32_t)currentFactors ^ entry->factorBits) & entry->factorMask);
 
-        DLOG("factor mask 0x%08x, bits 0x%08x, flags 0x%08x, wake 0x%08x\n",
-            policyEntry->factorMask, policyEntry->factorBits,
-            policyEntry->sleepFlags, policyEntry->wakeEvents);
+        DLOG("mask 0x%08x, bits 0x%08x, flags 0x%08x, wake 0x%08x, mismatch 0x%08x\n",
+            entry->factorMask, entry->factorBits,
+            entry->sleepFlags, entry->wakeEvents, mismatch);
+        if (mismatch)
+            continue;
 
-        if ((currentFactors ^ policyEntry->factorBits) & policyEntry->factorMask)
-            continue;   // mismatch, try next
+        DLOG("^ found match\n");
+        found = true;
 
-        if (p)
-        {
-            p->version    = 1;
-            p->sleepFlags = policyEntry->sleepFlags;
-            p->sleepTimer = 0;
-            p->wakeEvents = policyEntry->wakeEvents;
-            if (p->sleepFlags & kIOPMSleepFlagSleepTimerEnable)
-            {
-                p->sleepTimer = deepSleepDelay;
-            }
-        }
+        params->version = kIOPMSystemSleepParametersVersion;
+        params->reserved1 = 1;
+        if (entry->sleepFlags & kIOPMSleepFlagHibernate)
+            params->sleepType = kIOPMSleepTypeStandby;
+        else
+            params->sleepType = kIOPMSleepTypeNormalSleep;
 
-        DLOG("matched policy entry %u\n", i);
-        success = true;
+        params->ecWakeEvents = entry->wakeEvents;
+        if (entry->sleepFlags & kIOPMSleepFlagSleepTimerEnable)
+            params->ecWakeTimer = standbyDelay;
         break;
     }
 
@@ -3519,14 +3645,14 @@ done:
     if (prop)
         prop->release();
 
-    return success;
+    return found;
 }
 
+static IOPMSystemSleepParameters gEarlySystemSleepParams;
+
 void IOPMrootDomain::evaluateSystemSleepPolicyEarly( void )
 {
-    IOPMSystemSleepParameters   params;
-
-    // Evaluate sleep policy before driver sleep phase.
+    // Evaluate early (priority interest phase), before drivers sleep.
 
     DLOG("%s\n", __FUNCTION__);
     removeProperty(kIOPMSystemSleepParametersKey);
@@ -3535,12 +3661,37 @@ void IOPMrootDomain::evaluateSystemSleepPolicyEarly( void )
     hibernateMode = 0;
     getSleepOption(kIOHibernateModeKey, &hibernateMode);
 
-    if (!hibernateNoDefeat &&
-        evaluateSystemSleepPolicy(&params) &&
-        ((params.sleepFlags & kIOPMSleepFlagHibernate) == 0))
+    // Save for late evaluation if sleep is aborted
+    bzero(&gEarlySystemSleepParams, sizeof(gEarlySystemSleepParams));
+
+    if (evaluateSystemSleepPolicy(&gEarlySystemSleepParams, kIOPMSleepPhase1))
+    {
+        if (!hibernateNoDefeat &&
+            (gEarlySystemSleepParams.sleepType == kIOPMSleepTypeNormalSleep))
+        {
+            // Disable hibernate setup for normal sleep
+            hibernateDisabled = true;
+        }
+    }
+
+    // Publish IOPMSystemSleepType
+    uint32_t sleepType = gEarlySystemSleepParams.sleepType;
+    if (sleepType == kIOPMSleepTypeInvalid)
+    {
+        // no sleep policy
+        sleepType = kIOPMSleepTypeNormalSleep;
+        if (hibernateMode & kIOHibernateModeOn)
+            sleepType = (hibernateMode & kIOHibernateModeSleep) ?
+                        kIOPMSleepTypeSafeSleep : kIOPMSleepTypeHibernate;
+    }
+    else if ((sleepType == kIOPMSleepTypeStandby) &&
+             (gEarlySystemSleepParams.ecPoweroffTimer))
     {
-        hibernateDisabled = true;
+        // report the lowest possible sleep state
+        sleepType = kIOPMSleepTypePowerOff;
     }
+
+    setProperty(kIOPMSystemSleepTypeKey, sleepType, 32);
 }
 
 void IOPMrootDomain::evaluateSystemSleepPolicyFinal( void )
@@ -3548,27 +3699,30 @@ void IOPMrootDomain::evaluateSystemSleepPolicyFinal( void )
     IOPMSystemSleepParameters   params;
     OSData *                    paramsData;
 
-    // Evaluate sleep policy after drivers but before platform sleep.
+    // Evaluate sleep policy after sleeping drivers but before platform sleep.
 
     DLOG("%s\n", __FUNCTION__);
 
-    if (evaluateSystemSleepPolicy(&params))
+    if (evaluateSystemSleepPolicy(&params, kIOPMSleepPhase2))
     {
         if ((hibernateDisabled || hibernateAborted) &&
-            (params.sleepFlags & kIOPMSleepFlagHibernate))
+            (params.sleepType != kIOPMSleepTypeNormalSleep))
         {
-            // Should hibernate but unable to or aborted.
-            // Arm timer for a short sleep and retry or wake fully.
+            // Final evaluation picked a state requiring hibernation,
+            // but hibernate setup was skipped. Retry using the early
+            // sleep parameters.
 
-            params.sleepFlags &= ~kIOPMSleepFlagHibernate;
-            params.sleepFlags |= kIOPMSleepFlagSleepTimerEnable;
-            params.sleepTimer = 1;
+            bcopy(&gEarlySystemSleepParams, &params, sizeof(params));
+            params.sleepType = kIOPMSleepTypeAbortedSleep;
+            params.ecWakeTimer = 1;
             hibernateNoDefeat = true;
             DLOG("wake in %u secs for hibernateDisabled %d, hibernateAborted %d\n",
-                        params.sleepTimer, hibernateDisabled, hibernateAborted);
+                params.ecWakeTimer, hibernateDisabled, hibernateAborted);
         }
         else
+        {
             hibernateNoDefeat = false;
+        }
 
         paramsData = OSData::withBytes(&params, sizeof(params));
         if (paramsData)
@@ -3577,25 +3731,28 @@ void IOPMrootDomain::evaluateSystemSleepPolicyFinal( void )
             paramsData->release();
         }
 
-        if (params.sleepFlags & kIOPMSleepFlagHibernate)
+        if (params.sleepType >= kIOPMSleepTypeHibernate)
         {
-            // Force hibernate
+            // Disable safe sleep to force the hibernate path
             gIOHibernateMode &= ~kIOHibernateModeSleep;
         }
     }
 }
 
 bool IOPMrootDomain::getHibernateSettings(
-    uint32_t *  hibernateMode,
+    uint32_t *  hibernateModePtr,
     uint32_t *  hibernateFreeRatio,
     uint32_t *  hibernateFreeTime )
 {
-    bool ok = getSleepOption(kIOHibernateModeKey, hibernateMode);
+    // Called by IOHibernateSystemSleep() after evaluateSystemSleepPolicyEarly()
+    // has updated the hibernateDisabled flag.
+
+    bool ok = getSleepOption(kIOHibernateModeKey, hibernateModePtr);
     getSleepOption(kIOHibernateFreeRatioKey, hibernateFreeRatio);
     getSleepOption(kIOHibernateFreeTimeKey, hibernateFreeTime);
     if (hibernateDisabled)
-        *hibernateMode = 0;
-    DLOG("hibernateMode 0x%x\n", *hibernateMode);
+        *hibernateModePtr = 0;
+    DLOG("hibernateMode 0x%x\n", *hibernateModePtr);
     return ok;
 }
 
@@ -4224,7 +4381,7 @@ void IOPMrootDomain::handleOurPowerChangeDone(
             {
                 if (((gDarkWakeFlags & kDarkWakeFlagIgnoreDiskIOInDark) == 0) &&
                     (kSystemTransitionWake == _systemTransitionType) &&
-                    (_debugWakeSeconds == 0))
+                    (_lastDebugWakeSeconds == 0))
                 {
                     OSObject * prop = copyProperty(kIOPMRootDomainWakeTypeKey);
                     if (prop)
@@ -4261,7 +4418,7 @@ void IOPMrootDomain::handleOurPowerChangeDone(
             _systemTransitionType, _systemStateGeneration,
             _systemMessageClientMask,
             _desiredCapability, _currentCapability, _pendingCapability,
-            _debugWakeSeconds);
+            _lastDebugWakeSeconds);
 
         // Update current system capability.
 
@@ -4716,8 +4873,15 @@ IOReturn IOPMrootDomain::setMaintenanceWakeCalendar(
     data = OSData::withBytesNoCopy((void *) calendar, sizeof(*calendar));
     if (!data)
         return kIOReturnNoMemory;
+
+    if (kPMCalendarTypeMaintenance == calendar->selector) {
+        ret = setPMSetting(gIOPMSettingMaintenanceWakeCalendarKey, data);
+    } else 
+    if (kPMCalendarTypeSleepService == calendar->selector)
+    {
+        ret = setPMSetting(gIOPMSettingSleepServiceWakeCalendarKey, data);
+    }
     
-    ret = setPMSetting(gIOPMSettingMaintenanceWakeCalendarKey, data);
 
     data->release();
     return ret;
@@ -5633,6 +5797,11 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg )
                 {
                     lastSleepReason = kIOPMSleepReasonMaintenance;
                     setProperty(kRootDomainSleepReasonKey, kIOPMMaintenanceSleepKey);
+                } 
+                else if (darkWakeSleepService)
+                {
+                    lastSleepReason = kIOPMSleepReasonSleepServiceExit;
+                    setProperty(kRootDomainSleepReasonKey, kIOPMSleepServiceExitKey);
                 }
                 changePowerStateWithOverrideTo( SLEEP_STATE );
             }
@@ -5759,6 +5928,31 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg )
     }
 }
 
+//******************************************************************************
+// evaluateAssertions
+//
+//******************************************************************************
+void IOPMrootDomain::evaluateAssertions(IOPMDriverAssertionType newAssertions, IOPMDriverAssertionType oldAssertions)
+{
+    IOPMDriverAssertionType changedBits = newAssertions ^ oldAssertions;
+
+    messageClients(kIOPMMessageDriverAssertionsChanged);        
+
+    if (changedBits & kIOPMDriverAssertionPreventDisplaySleepBit) {
+
+        if (wrangler) {
+            bool value = (newAssertions & kIOPMDriverAssertionPreventDisplaySleepBit) ? true : false;
+
+            DLOG("wrangler->setIgnoreIdleTimer\(%d)\n", value);
+            wrangler->setIgnoreIdleTimer( value );
+        }
+    }
+    if (changedBits & kIOPMDriverAssertionCPUBit)
+        evaluatePolicy(kStimulusDarkWakeEvaluate);
+
+
+}
+
 // MARK: -
 // MARK: Statistics
 
@@ -5930,6 +6124,18 @@ IOReturn IOPMrootDomain::callPlatformFunction(
 
         return kIOReturnSuccess;
     }
+    else if (functionName &&
+             functionName->isEqualTo(kIOPMInstallSystemSleepPolicyHandlerKey))
+    {
+        if (_sleepPolicyHandler)
+            return kIOReturnExclusiveAccess;
+        if (!param1)
+            return kIOReturnBadArgument;
+        _sleepPolicyHandler = (IOPMSystemSleepPolicyHandler) param1;
+        _sleepPolicyTarget  = (void *) param2;
+        setProperty("IOPMSystemSleepPolicyHandler", kOSBooleanTrue);
+        return kIOReturnSuccess;
+    }
 
     return super::callPlatformFunction(
         functionName, waitForFunction, param1, param2, param3, param4);
@@ -6790,7 +6996,7 @@ PMSettingObject *PMSettingObject::pmSettingObject(
         for (unsigned int i=0; i<settingCount; i++) {
             // Since there is now at least one listener to this setting, publish
             // PM root domain support for it.
-            parent_arg->publishFeature( settings[i]->getCStringNoCopy(),
+            parent_arg->publishPMSetting( settings[i],
                     supportedPowerSources, &pmso->publishedFeatureID[i] );
         }
     }
@@ -7267,18 +7473,7 @@ void PMAssertionsTracker::tabulate(void)
     if ((assertionsKernel != oldKernel) ||
         (assertionsCombined != oldCombined))
     {    
-        owner->messageClients(kIOPMMessageDriverAssertionsChanged);        
-        
-        if (((assertionsCombined & kIOPMDriverAssertionPreventDisplaySleepBit) != 0) 
-                && ((oldCombined & kIOPMDriverAssertionPreventDisplaySleepBit) == 0))
-        {
-            /* We react to a new PreventDisplaySleep assertion by waking the display
-             * with an activityTickle
-             */
-            owner->evaluatePolicy(kStimulusDarkWakeActivityTickle);
-        } else {
-            owner->evaluatePolicy(kStimulusDarkWakeEvaluate);
-        }
+        owner->evaluateAssertions(assertionsCombined, oldCombined);
     }
 }
 
index 11efcab256d367c193e9b0905ad737905dfd9062..4905ec2cd9feace6054e69ae2eaf840951a1f82a 100644 (file)
@@ -2867,6 +2867,24 @@ IOReturn IOService::setIdleTimerPeriod ( unsigned long period )
     return kIOReturnSuccess;
 }
 
+IOReturn IOService::setIgnoreIdleTimer( bool ignore )
+{
+    if (!initialized)
+               return IOPMNotYetInitialized;
+
+    OUR_PMLog(kIOPMRequestTypeIgnoreIdleTimer, ignore, 0);
+
+    IOPMRequest * request =
+        acquirePMRequest( this, kIOPMRequestTypeIgnoreIdleTimer );
+    if (!request)
+        return kIOReturnNoMemory;
+
+    request->fArg0 = (void *) ignore;
+    submitPMRequest( request );
+
+    return kIOReturnSuccess;
+}
+
 //******************************************************************************
 // [public] nextIdleTimeout
 //
@@ -2987,7 +3005,7 @@ void IOService::idleTimerExpired( void )
                // Device was active - do not drop power, restart timer.
                fDeviceWasActive = false;
        }
-       else
+       else if (!fIdleTimerIgnored)
        {
                // No device activity - drop power state by one level.
                // Decrement the cached tickle power state when possible.
@@ -5414,7 +5432,7 @@ void IOService::pmTellClientWithResponse ( OSObject * object, void * arg )
         getPMRootDomain()->traceDetail( detail );
     }
 
-    retCode = context->us->messageClient(msgType, object, (void *) &notify);
+    retCode = context->us->messageClient(msgType, object, (void *) &notify, sizeof(notify));
     if ( kIOReturnSuccess == retCode )
     {
         if ( 0 == notify.returnValue )
@@ -5732,7 +5750,7 @@ static void tellKernelClientApplier ( OSObject * object, void * arg )
     notify.stateNumber = context->stateNumber;
     notify.stateFlags  = context->stateFlags;
 
-    context->us->messageClient(context->messageType, object, &notify);
+    context->us->messageClient(context->messageType, object, &notify, sizeof(notify));
 
     if ((kIOLogDebugPower & gIOKitDebug) &&
         (OSDynamicCast(_IOServiceInterestNotifier, object)))
@@ -6804,6 +6822,10 @@ void IOService::executePMRequest( IOPMRequest * request )
             }
             break;
 
+        case kIOPMRequestTypeIgnoreIdleTimer:
+            fIdleTimerIgnored = request->fArg0 ? 1 : 0;
+            break;
+
                default:
                        panic("executePMRequest: unknown request type %x", request->getType());
        }
index 00f53cdfe834d71c77dc06b91be53d0d5c9a18cc..bd2ec9234ca82aed8cb44d1108b91da7ac0515d1 100644 (file)
@@ -54,6 +54,7 @@ enum {
     kIOPMRequestTypeSynchronizePowerTree        = 0x0D,
     kIOPMRequestTypeRequestPowerStateOverride   = 0x0E,
     kIOPMRequestTypeSetIdleTimerPeriod          = 0x0F,
+    kIOPMRequestTypeIgnoreIdleTimer             = 0x10,
     
     /* Reply Types */
     kIOPMRequestTypeReplyStart                  = 0x80,
@@ -240,6 +241,7 @@ private:
     unsigned int            IsPreChange:1;
     unsigned int            DriverCallBusy:1;
     unsigned int            PCDFunctionOverride:1;
+    unsigned int            IdleTimerIgnored:1;
 
     // Time of last device activity.
     AbsoluteTime            DeviceActiveTimestamp;
@@ -367,6 +369,7 @@ private:
 #define fIsPreChange                pwrMgt->IsPreChange
 #define fDriverCallBusy             pwrMgt->DriverCallBusy
 #define fPCDFunctionOverride        pwrMgt->PCDFunctionOverride
+#define fIdleTimerIgnored           pwrMgt->IdleTimerIgnored
 #define fDeviceActiveTimestamp      pwrMgt->DeviceActiveTimestamp
 #define fActivityLock               pwrMgt->ActivityLock
 #define fIdleTimerPeriod            pwrMgt->IdleTimerPeriod
index 29c90deef577fc29d1e3be0eb8b4737f21b1510a..92097acdef15cfb9790aa6f2b90c456084bbd165 100644 (file)
@@ -309,6 +309,11 @@ IOReturn RootDomainUserClient::externalMethod(
                         (uint32_t)arguments->scalarInput[0]);
             break;
             
+        case kPMActivityTickle:
+            fOwner->reportUserInput( );
+            ret = kIOReturnSuccess;
+            break;
+            
 /*
         case kPMMethodCopySystemTimeline:
             // intentional fallthrough
index 14f0643c2c4d5d2b79ecb92e39bd53112fdadebc..68139f092e5df43dc34b529cfbd2b08b37bfb908 100644 (file)
@@ -4874,8 +4874,9 @@ OSKext::jettisonLinkeditSegment(void)
     kernel_mach_header_t     * machhdr = (kernel_mach_header_t *)kmod_info->address;
     kernel_segment_command_t * linkedit = NULL;
     vm_size_t                  linkeditsize, kextsize;
+    vm_offset_t                linkeditaddr = 0;
     OSData                   * data = NULL;
-
+       
     if (sKeepSymbols || isLibrary() || !isExecutable() || !linkedExecutable) {
         goto finish;
     }
@@ -4899,7 +4900,10 @@ OSKext::jettisonLinkeditSegment(void)
     */
     linkeditsize = round_page(linkedit->vmsize);
     kextsize = kmod_info->size - linkeditsize;
-
+       
+       /* Save linkedit address as removeLinkeditHeaders() will zero it */
+       linkeditaddr = trunc_page(linkedit->vmaddr);
+       
     data = OSData::withBytesNoCopy((void *)kmod_info->address, kextsize);
     if (!data) {
         goto finish;
@@ -4921,7 +4925,7 @@ OSKext::jettisonLinkeditSegment(void)
 
    /* Free the linkedit segment.
     */
-    kext_free(linkedit->vmaddr, linkeditsize);
+    kext_free(linkeditaddr, linkeditsize);
 
 finish:
     return;
index 72ff3059476023ccf13c5bd12aa5a292cc1d2e32..d585c4175d7e8bfbe997800678f966fc269719f1 100644 (file)
@@ -638,7 +638,13 @@ extern void OSSpinLockUnlock(volatile OSSpinLock * lock);
 static __inline__ void OSSynchronizeIO(void)
 {
 }
-
+#if    defined(XNU_KERNEL_PRIVATE)
+#if   defined(__i386__) || defined(__x86_64__)
+static inline void OSMemoryBarrier(void) {
+       __asm__ volatile("mfence" ::: "memory");
+}
+#endif
+#endif /*XNU_KERNEL_PRIVATE */
 #if defined(__cplusplus)
 }
 #endif
index 9c5460016772d15d9fad5588dc95dd45c7a08318..503fa505362b757ea76dbf434cd6fc1504d14eed 100644 (file)
@@ -125,7 +125,15 @@ struct vc_info vinfo;
 /* allowed otherwise we won't use the panic dialog even if it is allowed */
 boolean_t panicDialogDesired;
 
+void noroot_icon_test(void);
+
+int
+vc_display_lzss_icon(uint32_t dst_x,       uint32_t dst_y,
+                     uint32_t image_width, uint32_t image_height,
+                     const uint8_t *compressed_image,
+                     uint32_t       compressed_size, 
+                     const uint8_t *clut);
+
 extern int       disableConsoleOutput;
 static boolean_t gc_enabled     = FALSE;
 static boolean_t gc_initialized = FALSE;
@@ -2139,6 +2147,150 @@ static void vc_blit_rect_30(int x, int y, int bx,
     }
 }
 
+
+/*
+ * Routines to render the lzss image format
+ */
+
+struct lzss_image_state {
+       uint32_t col;
+       uint32_t row;
+       uint32_t width;
+       uint32_t height;
+       uint32_t bytes_per_row;
+       volatile uint32_t * row_start;
+       const uint8_t* clut;
+};
+typedef struct lzss_image_state lzss_image_state;
+
+// returns 0 if OK, 1 if error
+static inline int 
+vc_decompress_lzss_next_pixel (int next_data, lzss_image_state* state) 
+{
+    uint32_t palette_index = 0;
+    uint32_t pixel_value   = 0;
+
+    palette_index = next_data * 3;
+
+    pixel_value = ( (uint32_t) state->clut[palette_index + 0] << 16) 
+                | ( (uint32_t) state->clut[palette_index + 1] << 8) 
+                | ( (uint32_t) state->clut[palette_index + 2]); 
+
+    *(state->row_start + state->col) = pixel_value;
+
+    if (++state->col >= state->width) {
+        state->col = 0;
+        if (++state->row >= state->height) {
+            return 1;
+        }
+        state->row_start = (volatile uint32_t *) (((uintptr_t)state->row_start) + state->bytes_per_row);
+    }
+    return 0;
+}
+
+
+/*
+ * Blit an lzss compressed image to the framebuffer
+ * Assumes 32 bit screen (which is everything we ship at the moment)
+ * The function vc_display_lzss_icon was copied from libkern/mkext.c, then modified.
+ */
+
+/* 
+ * TODO: Does lzss use too much stack? 4096 plus bytes... 
+ *     Can probably chop it down by 1/2.
+ */
+
+/**************************************************************
+ LZSS.C -- A Data Compression Program
+***************************************************************
+    4/6/1989 Haruhiko Okumura
+    Use, distribute, and modify this program freely.
+    Please send me your improved versions.
+        PC-VAN      SCIENCE
+        NIFTY-Serve PAF01022
+        CompuServe  74050,1022
+
+**************************************************************/
+
+#define N         4096  /* size of ring buffer - must be power of 2 */
+#define F         18    /* upper limit for match_length */
+#define THRESHOLD 2     /* encode string into position and length
+                           if match_length is greater than this */
+
+// returns 0 if OK, 1 if error
+// x and y indicate upper left corner of image location on screen
+int
+vc_display_lzss_icon(uint32_t dst_x,       uint32_t dst_y,
+                     uint32_t image_width, uint32_t image_height,
+                     const uint8_t *compressed_image,
+                     uint32_t       compressed_size, 
+                     const uint8_t *clut)
+{
+    uint32_t* image_start;
+    uint32_t bytes_per_pixel = 4;
+    uint32_t bytes_per_row = vinfo.v_rowbytes;
+
+    image_start = (uint32_t *) (vinfo.v_baseaddr + (dst_y * bytes_per_row) + (dst_x * bytes_per_pixel));
+    
+    lzss_image_state state = {0, 0, image_width, image_height, bytes_per_row, image_start, clut};
+
+    int rval = 0;
+
+    const uint8_t *src = compressed_image;
+    uint32_t srclen = compressed_size;
+
+    /* ring buffer of size N, with extra F-1 bytes to aid string comparison */
+    uint8_t text_buf[N + F - 1];
+    const uint8_t *srcend = src + srclen;
+    int  i, j, k, r, c;
+    unsigned int flags;
+
+    srcend = src + srclen;
+    for (i = 0; i < N - F; i++)
+        text_buf[i] = ' ';
+    r = N - F;
+    flags = 0;
+    for ( ; ; ) {
+        if (((flags >>= 1) & 0x100) == 0) {
+            if (src < srcend) c = *src++; else break;
+            flags = c | 0xFF00;  /* uses higher byte cleverly */
+        }   /* to count eight */
+        if (flags & 1) {
+            if (src < srcend) c = *src++; else break;
+            rval = vc_decompress_lzss_next_pixel(c, &state);
+            if (rval != 0)
+                return rval;
+            text_buf[r++] = c;
+            r &= (N - 1);
+        } else {
+            if (src < srcend) i = *src++; else break;
+            if (src < srcend) j = *src++; else break;
+            i |= ((j & 0xF0) << 4);
+            j  =  (j & 0x0F) + THRESHOLD;
+            for (k = 0; k <= j; k++) {
+                c = text_buf[(i + k) & (N - 1)];
+                rval = vc_decompress_lzss_next_pixel(c, &state);
+                if (rval != 0 )
+                    return rval;
+                text_buf[r++] = c;
+                r &= (N - 1);
+            }
+        }
+    }
+    return 0;
+}
+
+void noroot_icon_test(void) {
+    boolean_t o_vc_progress_enable = vc_progress_enable;
+
+    vc_progress_enable = 1;
+
+    PE_display_icon( 0, "noroot");
+
+    vc_progress_enable = o_vc_progress_enable;
+}
+
+
 void vc_display_icon( vc_progress_element * desc,
                        const unsigned char * data )
 {
index b020ed4196e05a09d795037757e772672487a096..a576ce445fe3a203c9ea1c7eaf891256376b6891 100644 (file)
 #include <libkern/kernel_mach_header.h>
 #include <libkern/OSKextLibPrivate.h>
 
+#if    DEBUG
+#define DPRINTF(x...)  kprintf(x)
+#else
 #define DPRINTF(x...)
-//#define DPRINTF(x...)        kprintf(x)
+#endif
 
 static void machine_conf(void);
 
@@ -571,7 +574,7 @@ efi_init(void)
                        (void *) (uintptr_t) mptr->VirtualStart,
                        (void *) vm_addr,
                        (void *) vm_size);
-               pmap_map(vm_addr, phys_addr, phys_addr + round_page(vm_size),
+               pmap_map_bd(vm_addr, phys_addr, phys_addr + round_page(vm_size),
                     (mptr->Type == kEfiRuntimeServicesCode) ? VM_PROT_READ | VM_PROT_EXECUTE : VM_PROT_READ|VM_PROT_WRITE,
                     (mptr->Type == EfiMemoryMappedIO)       ? VM_WIMG_IO   : VM_WIMG_USE_DEFAULT);
            }
@@ -580,7 +583,7 @@ efi_init(void)
         if (args->Version != kBootArgsVersion2)
             panic("Incompatible boot args version %d revision %d\n", args->Version, args->Revision);
 
-        kprintf("Boot args version %d revision %d mode %d\n", args->Version, args->Revision, args->efiMode);
+       DPRINTF("Boot args version %d revision %d mode %d\n", args->Version, args->Revision, args->efiMode);
         if (args->efiMode == kBootArgsEfiMode64) {
             efi_set_tables_64((EFI_SYSTEM_TABLE_64 *) ml_static_ptovirt(args->efiSystemTable));
         } else {
index cc52576c5bf3ebfc0a5452ad05f324065f5802c2..01f8b409e249cf612e258e8634545344c9f67a14 100644 (file)
@@ -261,8 +261,7 @@ commpage_init_cpu_capabilities( void )
        if (tscFreq <= SLOW_TSC_THRESHOLD)      /* is TSC too slow for _commpage_nanotime?  */
                bits |= kSlow;
 
-       if (cpuid_features() & CPUID_FEATURE_AES)
-               bits |= kHasAES;
+       bits |= (cpuid_features() & CPUID_FEATURE_AES) ? kHasAES : 0;
 
        _cpu_capabilities = bits;               // set kernel version for use by drivers etc
 }
index a820ea7aa442cabbfdda9476a84b67011d0c189c..eee6a8173eb72cffecb8e8e79ba9099b1411fcb4 100644 (file)
@@ -58,6 +58,9 @@
 #define        kUP                             0x00008000      /* set if (kNumCPUs == 1) */
 #define        kNumCPUs                        0x00FF0000      /* number of CPUs (see _NumCPUs() below) */
 #define        kHasAVX1_0                      0x01000000
+#define        kHasRDRAND                      0x02000000
+#define        kHasF16C                        0x04000000
+#define        kHasENFSTRG                     0x08000000
 #define        kNumCPUsShift                   16              /* see _NumCPUs() below */
 
 #ifndef        __ASSEMBLER__
index b92a796e9e6e18138f81cbbc1163a6131d231359..a29bfda2692f2e96f2161a9e94cf4458f9a63265 100644 (file)
 #include <i386/pmCPU.h>
 #include <i386/lock.h>
 
-//#define TOPO_DEBUG           1
-#if TOPO_DEBUG
-void debug_topology_print(void);
-#define DBG(x...)      kprintf("DBG: " x)
-#else
-#define DBG(x...)
-#endif /* TOPO_DEBUG */
-
+#define DIVISOR_GUARD(denom)                           \
+       if ((denom) == 0) {                             \
+               kprintf("%s: %d Zero divisor: " #denom, \
+                       __FILE__, __LINE__);            \
+       }
 
-void validate_topology(void);
+static void debug_topology_print(void);
 
-/* Only for 32bit values */
-#define bit(n)                 (1U << (n))
-#define bitmask(h,l)   ((bit(h)|(bit(h)-1)) & ~(bit(l)-1))
-#define bitfield(x,h,l)        (((x) & bitmask(h,l)) >> l)
+boolean_t      topo_dbg = FALSE;
 
 x86_pkg_t      *x86_pkgs               = NULL;
 uint32_t       num_Lx_caches[MAX_CACHE_DEPTH]  = { 0 };
@@ -67,6 +61,15 @@ x86_topology_parameters_t    topoParms;
 
 decl_simple_lock_data(, x86_topo_lock);
  
+static struct cpu_cache {
+       int     level;  int     type;
+} cpu_caches [LCACHE_MAX] = {
+       [L1D] { 1,      CPU_CACHE_TYPE_DATA },
+       [L1I] { 1,      CPU_CACHE_TYPE_INST },
+       [L2U] { 2,      CPU_CACHE_TYPE_UNIF },
+       [L3U] { 3,      CPU_CACHE_TYPE_UNIF },
+};
+
 static boolean_t
 cpu_is_hyperthreaded(void)
 {
@@ -107,66 +110,30 @@ x86_cache_alloc(void)
 static void
 x86_LLC_info(void)
 {
-    uint32_t           index;
-    uint32_t           cache_info[4];
-    uint32_t           cache_level     = 0;
+    int                        cache_level     = 0;
     uint32_t           nCPUsSharing    = 1;
     i386_cpu_info_t    *cpuinfo;
+    struct cpu_cache   *cachep;
+    int                        i;
 
     cpuinfo = cpuid_info();
 
-    do_cpuid(0, cache_info);
-
-    if (cache_info[eax] < 4) {
-       /*
-        * Processor does not support deterministic
-        * cache information. Set LLC sharing to 1, since
-        * we have no better information.
-        */
-       if (cpu_is_hyperthreaded()) {
-           topoParms.nCoresSharingLLC = 1;
-           topoParms.nLCPUsSharingLLC = 2;
-           topoParms.maxSharingLLC = 2;
-       } else {
-           topoParms.nCoresSharingLLC = 1;
-           topoParms.nLCPUsSharingLLC = 1;
-           topoParms.maxSharingLLC = 1;
-       }
-       return;
-    }
-
-    for (index = 0; ; index += 1) {
-       uint32_t                this_level;
-
-       cache_info[eax] = 4;
-       cache_info[ecx] = index;
-       cache_info[ebx] = 0;
-       cache_info[edx] = 0;
-
-       cpuid(cache_info);
+    for (i = 0, cachep = &cpu_caches[0]; i < LCACHE_MAX; i++, cachep++) {
 
-       /*
-        * See if all levels have been queried.
-        */
-       if (bitfield(cache_info[eax], 4, 0) == 0)
-           break;
-
-       /*
-        * Get the current level.
-        */
-       this_level = bitfield(cache_info[eax], 7, 5);
+       if (cachep->type == 0 || cpuid_info()->cache_size[i] == 0)
+           continue;
 
        /*
         * Only worry about it if it's a deeper level than
         * what we've seen before.
         */
-       if (this_level > cache_level) {
-           cache_level = this_level;
+       if (cachep->level > cache_level) {
+           cache_level = cachep->level;
 
            /*
             * Save the number of CPUs sharing this cache.
             */
-           nCPUsSharing = bitfield(cache_info[eax], 25, 14) + 1;
+           nCPUsSharing = cpuinfo->cache_sharing[i];
        }
     }
 
@@ -204,6 +171,8 @@ initTopoParms(void)
 
     cpuinfo = cpuid_info();
 
+    PE_parse_boot_argn("-topo", &topo_dbg, sizeof(topo_dbg));
+
     /*
      * We need to start with getting the LLC information correct.
      */
@@ -212,15 +181,21 @@ initTopoParms(void)
     /*
      * Compute the number of threads (logical CPUs) per core.
      */
+    DIVISOR_GUARD(cpuinfo->core_count);
     topoParms.nLThreadsPerCore = cpuinfo->thread_count / cpuinfo->core_count;
+    DIVISOR_GUARD(cpuinfo->cpuid_cores_per_package);
     topoParms.nPThreadsPerCore = cpuinfo->cpuid_logical_per_package / cpuinfo->cpuid_cores_per_package;
 
     /*
      * Compute the number of dies per package.
      */
+     DIVISOR_GUARD(topoParms.nCoresSharingLLC);
     topoParms.nLDiesPerPackage = cpuinfo->core_count / topoParms.nCoresSharingLLC;
+    DIVISOR_GUARD(topoParms.nPThreadsPerCore);
+    DIVISOR_GUARD(topoParms.maxSharingLLC / topoParms.nPThreadsPerCore);
     topoParms.nPDiesPerPackage = cpuinfo->cpuid_cores_per_package / (topoParms.maxSharingLLC / topoParms.nPThreadsPerCore);
 
+
     /*
      * Compute the number of cores per die.
      */
@@ -245,27 +220,27 @@ initTopoParms(void)
     topoParms.nLThreadsPerPackage = topoParms.nLThreadsPerCore * topoParms.nLCoresPerPackage;
     topoParms.nPThreadsPerPackage = topoParms.nPThreadsPerCore * topoParms.nPCoresPerPackage;
 
-    DBG("\nCache Topology Parameters:\n");
-    DBG("\tLLC Depth:           %d\n", topoParms.LLCDepth);
-    DBG("\tCores Sharing LLC:   %d\n", topoParms.nCoresSharingLLC);
-    DBG("\tThreads Sharing LLC: %d\n", topoParms.nLCPUsSharingLLC);
-    DBG("\tmax Sharing of LLC:  %d\n", topoParms.maxSharingLLC);
-
-    DBG("\nLogical Topology Parameters:\n");
-    DBG("\tThreads per Core:  %d\n", topoParms.nLThreadsPerCore);
-    DBG("\tCores per Die:     %d\n", topoParms.nLCoresPerDie);
-    DBG("\tThreads per Die:   %d\n", topoParms.nLThreadsPerDie);
-    DBG("\tDies per Package:  %d\n", topoParms.nLDiesPerPackage);
-    DBG("\tCores per Package: %d\n", topoParms.nLCoresPerPackage);
-    DBG("\tThreads per Package: %d\n", topoParms.nLThreadsPerPackage);
-
-    DBG("\nPhysical Topology Parameters:\n");
-    DBG("\tThreads per Core: %d\n", topoParms.nPThreadsPerCore);
-    DBG("\tCores per Die:     %d\n", topoParms.nPCoresPerDie);
-    DBG("\tThreads per Die:   %d\n", topoParms.nPThreadsPerDie);
-    DBG("\tDies per Package:  %d\n", topoParms.nPDiesPerPackage);
-    DBG("\tCores per Package: %d\n", topoParms.nPCoresPerPackage);
-    DBG("\tThreads per Package: %d\n", topoParms.nPThreadsPerPackage);
+    TOPO_DBG("\nCache Topology Parameters:\n");
+    TOPO_DBG("\tLLC Depth:           %d\n", topoParms.LLCDepth);
+    TOPO_DBG("\tCores Sharing LLC:   %d\n", topoParms.nCoresSharingLLC);
+    TOPO_DBG("\tThreads Sharing LLC: %d\n", topoParms.nLCPUsSharingLLC);
+    TOPO_DBG("\tmax Sharing of LLC:  %d\n", topoParms.maxSharingLLC);
+
+    TOPO_DBG("\nLogical Topology Parameters:\n");
+    TOPO_DBG("\tThreads per Core:  %d\n", topoParms.nLThreadsPerCore);
+    TOPO_DBG("\tCores per Die:     %d\n", topoParms.nLCoresPerDie);
+    TOPO_DBG("\tThreads per Die:   %d\n", topoParms.nLThreadsPerDie);
+    TOPO_DBG("\tDies per Package:  %d\n", topoParms.nLDiesPerPackage);
+    TOPO_DBG("\tCores per Package: %d\n", topoParms.nLCoresPerPackage);
+    TOPO_DBG("\tThreads per Package: %d\n", topoParms.nLThreadsPerPackage);
+
+    TOPO_DBG("\nPhysical Topology Parameters:\n");
+    TOPO_DBG("\tThreads per Core: %d\n", topoParms.nPThreadsPerCore);
+    TOPO_DBG("\tCores per Die:     %d\n", topoParms.nPCoresPerDie);
+    TOPO_DBG("\tThreads per Die:   %d\n", topoParms.nPThreadsPerDie);
+    TOPO_DBG("\tDies per Package:  %d\n", topoParms.nPDiesPerPackage);
+    TOPO_DBG("\tCores per Package: %d\n", topoParms.nPCoresPerPackage);
+    TOPO_DBG("\tThreads per Package: %d\n", topoParms.nPThreadsPerPackage);
 
     topoParmsInited = TRUE;
 }
@@ -291,50 +266,29 @@ x86_cache_list(void)
     x86_cpu_cache_t    *root   = NULL;
     x86_cpu_cache_t    *cur    = NULL;
     x86_cpu_cache_t    *last   = NULL;
-    uint32_t           index;
-    uint32_t           cache_info[4];
-    uint32_t           nsets;
-
-    do_cpuid(0, cache_info);
-
-    if (cache_info[eax] < 4) {
-       /*
-        * Processor does not support deterministic
-        * cache information. Don't report anything
-        */
-       return NULL;
-    }
-
-    for (index = 0; ; index += 1) {
-       cache_info[eax] = 4;
-       cache_info[ecx] = index;
-       cache_info[ebx] = 0;
-       cache_info[edx] = 0;
-
-       cpuid(cache_info);
+    struct cpu_cache   *cachep;
+    int                        i;
 
-       /*
-        * See if all levels have been queried.
-        */
-       if (bitfield(cache_info[eax], 4, 0) == 0)
-           break;
+    /*
+     * Cons up a list driven not by CPUID leaf 4 (deterministic cache params)
+     * but by the table above plus parameters already cracked from cpuid...
+     */
+    for (i = 0, cachep = &cpu_caches[0]; i < LCACHE_MAX; i++, cachep++) {
 
+       if (cachep->type == 0 || cpuid_info()->cache_size[i] == 0)
+           continue;
+       
        cur = x86_cache_alloc();
-       if (cur == NULL) {
+       if (cur == NULL)
            break;
-       }
 
-       cur->type = bitfield(cache_info[eax], 4, 0);
-       cur->level = bitfield(cache_info[eax], 7, 5);
-       cur->nlcpus = (bitfield(cache_info[eax], 25, 14) + 1);
-       if (cpuid_info()->cpuid_model == 26)
-               cur->nlcpus /= cpu_is_hyperthreaded() ? 1 : 2;
-       cur->maxcpus = (bitfield(cache_info[eax], 25, 14) + 1);
-       cur->line_size = bitfield(cache_info[ebx], 11, 0) + 1;
-       cur->partitions = bitfield(cache_info[ebx], 21, 12) + 1;
-       cur->ways = bitfield(cache_info[ebx], 31, 22) + 1;
-       nsets = bitfield(cache_info[ecx], 31, 0) + 1;
-       cur->cache_size = cur->line_size * cur->ways * cur->partitions * nsets;
+       cur->type       = cachep->type;
+       cur->level      = cachep->level;
+       cur->nlcpus     = 0;
+       cur->maxcpus    = cpuid_info()->cache_sharing[i];
+       cur->partitions = cpuid_info()->cache_partitions[i];
+       cur->cache_size = cpuid_info()->cache_size[i];
+       cur->line_size  = cpuid_info()->cache_linesize;
 
        if (last == NULL) {
            root = cur;
@@ -343,14 +297,12 @@ x86_cache_list(void)
            last->next = cur;
            last = cur;
        }
-
-       cur->nlcpus = 0;
        num_Lx_caches[cur->level - 1] += 1;
     }
-
-    return(root);
+    return root;
 }
 
+
 static x86_cpu_cache_t *
 x86_match_cache(x86_cpu_cache_t *list, x86_cpu_cache_t *matcher)
 {
@@ -361,7 +313,6 @@ x86_match_cache(x86_cpu_cache_t *list, x86_cpu_cache_t *matcher)
        if (cur_cache->maxcpus  == matcher->maxcpus
            && cur_cache->type  == matcher->type
            && cur_cache->level == matcher->level
-           && cur_cache->ways  == matcher->ways
            && cur_cache->partitions == matcher->partitions
            && cur_cache->line_size  == matcher->line_size
            && cur_cache->cache_size == matcher->cache_size)
@@ -1060,6 +1011,9 @@ validate_topology(void)
     uint32_t           nCores;
     uint32_t           nCPUs;
 
+    if (topo_dbg)
+       debug_topology_print();
+
     /*
      * XXX
      *
@@ -1091,13 +1045,13 @@ validate_topology(void)
                panic("Die %d points to package %d, should be %d",
                      die->pdie_num, die->package->lpkg_num, pkg->lpkg_num);
 
-           DBG("Die(%d)->package %d\n",
+           TOPO_DBG("Die(%d)->package %d\n",
                die->pdie_num, pkg->lpkg_num);
 
            /*
             * Make sure that the die has the correct number of cores.
             */
-           DBG("Die(%d)->cores: ", die->pdie_num);
+           TOPO_DBG("Die(%d)->cores: ", die->pdie_num);
            nCores = 0;
            core = die->cores;
            while (core != NULL) {
@@ -1108,10 +1062,10 @@ validate_topology(void)
                    panic("Core %d points to die %d, should be %d",
                          core->pcore_num, core->die->pdie_num, die->pdie_num);
                nCores += 1;
-               DBG("%d ", core->pcore_num);
+               TOPO_DBG("%d ", core->pcore_num);
                core = core->next_in_die;
            }
-           DBG("\n");
+           TOPO_DBG("\n");
 
            if (nCores != topoParms.nLCoresPerDie)
                panic("Should have %d Cores, but only found %d for Die %d",
@@ -1120,7 +1074,7 @@ validate_topology(void)
            /*
             * Make sure that the die has the correct number of CPUs.
             */
-           DBG("Die(%d)->lcpus: ", die->pdie_num);
+           TOPO_DBG("Die(%d)->lcpus: ", die->pdie_num);
            nCPUs = 0;
            lcpu = die->lcpus;
            while (lcpu != NULL) {
@@ -1131,10 +1085,10 @@ validate_topology(void)
                    panic("CPU %d points to die %d, should be %d",
                          lcpu->cpu_num, lcpu->die->pdie_num, die->pdie_num);
                nCPUs += 1;
-               DBG("%d ", lcpu->cpu_num);
+               TOPO_DBG("%d ", lcpu->cpu_num);
                lcpu = lcpu->next_in_die;
            }
-           DBG("\n");
+           TOPO_DBG("\n");
 
            if (nCPUs != topoParms.nLThreadsPerDie)
                panic("Should have %d Threads, but only found %d for Die %d",
@@ -1160,7 +1114,7 @@ validate_topology(void)
            if (core->package != pkg)
                panic("Core %d points to package %d, should be %d",
                      core->pcore_num, core->package->lpkg_num, pkg->lpkg_num);
-           DBG("Core(%d)->package %d\n",
+           TOPO_DBG("Core(%d)->package %d\n",
                core->pcore_num, pkg->lpkg_num);
 
            /*
@@ -1168,7 +1122,7 @@ validate_topology(void)
             */
            nCPUs = 0;
            lcpu = core->lcpus;
-           DBG("Core(%d)->lcpus: ", core->pcore_num);
+           TOPO_DBG("Core(%d)->lcpus: ", core->pcore_num);
            while (lcpu != NULL) {
                if (lcpu->core == NULL)
                    panic("CPU(%d)->core is NULL",
@@ -1176,11 +1130,11 @@ validate_topology(void)
                if (lcpu->core != core)
                    panic("CPU %d points to core %d, should be %d",
                          lcpu->cpu_num, lcpu->core->pcore_num, core->pcore_num);
-               DBG("%d ", lcpu->cpu_num);
+               TOPO_DBG("%d ", lcpu->cpu_num);
                nCPUs += 1;
                lcpu = lcpu->next_in_core;
            }
-           DBG("\n");
+           TOPO_DBG("\n");
 
            if (nCPUs != topoParms.nLThreadsPerCore)
                panic("Should have %d Threads, but only found %d for Core %d",
@@ -1205,7 +1159,7 @@ validate_topology(void)
            if (lcpu->package != pkg)
                panic("CPU %d points to package %d, should be %d",
                      lcpu->cpu_num, lcpu->package->lpkg_num, pkg->lpkg_num);
-           DBG("CPU(%d)->package %d\n",
+           TOPO_DBG("CPU(%d)->package %d\n",
                lcpu->cpu_num, pkg->lpkg_num);
            nCPUs += 1;
            lcpu = lcpu->next_in_pkg;
@@ -1219,11 +1173,10 @@ validate_topology(void)
     }
 }
 
-#if TOPO_DEBUG
 /*
  * Prints out the topology
  */
-void
+static void
 debug_topology_print(void)
 {
     x86_pkg_t          *pkg;
@@ -1276,4 +1229,3 @@ debug_topology_print(void)
        pkg = pkg->next;
     }
 }
-#endif /* TOPO_DEBUG */
index fc7ef83e64df940595791ef4e596d92b1a6fe2dd..a576ef70d41bcbc487a784b9b4b78e7ec86390ac 100644 (file)
@@ -75,4 +75,13 @@ extern void x86_set_pkg_numbers(x86_pkg_t *pkg, x86_lcpu_t *lcpu);
 
 extern x86_topology_parameters_t       topoParms;
 
+extern boolean_t       topo_dbg;
+#define TOPO_DBG(x...)                 \
+       do {                            \
+               if (topo_dbg)           \
+                       kprintf(x);     \
+       } while (0)                     \
+
+extern void validate_topology(void);
+
 #endif /* _I386_CPU_THREADS_H_ */
index 24c4f5c8177999b4580149fe6e9dbfd90afb225e..6be77e6ffb3b04f2a97cb6ce149ca3ca72fa54a6 100644 (file)
 #include <i386/lapic.h>
 #include <i386/machine_routines.h>
 
-//#define TOPO_DEBUG 1
-#if TOPO_DEBUG
-#define DBG(x...)      kprintf("DBG: " x)
-#else
-#define DBG(x...)
-#endif
-void debug_topology_print(void);
-void validate_topology(void);
-
 __private_extern__ void qsort(
     void * array,
     size_t nmembers,
@@ -85,15 +76,16 @@ cpu_topology_sort(int ncpus)
        /* Lights out for this */
        istate = ml_set_interrupts_enabled(FALSE);
 
-#ifdef TOPO_DEBUG
-       DBG("cpu_topology_start() %d cpu%s registered\n",
-               ncpus, (ncpus > 1) ? "s" : "");
-       for (i = 0; i < ncpus; i++) {
-               cpu_data_t      *cpup = cpu_datap(i);
-               DBG("\tcpu_data[%d]:0x%08x local apic 0x%x\n",
-                       i, (unsigned) cpup, cpup->cpu_phys_number);
+       if (topo_dbg) {
+               TOPO_DBG("cpu_topology_start() %d cpu%s registered\n",
+                       ncpus, (ncpus > 1) ? "s" : "");
+               for (i = 0; i < ncpus; i++) {
+                       cpu_data_t      *cpup = cpu_datap(i);
+                       TOPO_DBG("\tcpu_data[%d]:%p local apic 0x%x\n",
+                               i, (void *) cpup, cpup->cpu_phys_number);
+               }
        }
-#endif
+
        /*
         * Re-order the cpu_data_ptr vector sorting by physical id.
         * Skip the boot processor, it's required to be correct.
@@ -104,14 +96,14 @@ cpu_topology_sort(int ncpus)
                        sizeof(cpu_data_t *),
                        lapicid_cmp);
        }
-#ifdef TOPO_DEBUG
-       DBG("cpu_topology_start() after sorting:\n");
-       for (i = 0; i < ncpus; i++) {
-               cpu_data_t      *cpup = cpu_datap(i);
-               DBG("\tcpu_data[%d]:0x%08x local apic 0x%x\n",
-                       i, (unsigned) cpup, cpup->cpu_phys_number);
+       if (topo_dbg) {
+               TOPO_DBG("cpu_topology_start() after sorting:\n");
+               for (i = 0; i < ncpus; i++) {
+                       cpu_data_t      *cpup = cpu_datap(i);
+                       TOPO_DBG("\tcpu_data[%d]:%p local apic 0x%x\n",
+                               i, (void *) cpup, cpup->cpu_phys_number);
+               }
        }
-#endif
 
        /*
         * Fix up logical numbers and reset the map kept by the lapic code.
@@ -142,13 +134,10 @@ cpu_topology_sort(int ncpus)
                x86_set_pkg_numbers(pkg, &cpup->lcpu);
        }
 
-#if TOPO_DEBUG
-       debug_topology_print();
-#endif /* TOPO_DEBUG */
        validate_topology();
 
        ml_set_interrupts_enabled(istate);
-       DBG("cpu_topology_start() LLC is L%d\n", topoParms.LLCDepth + 1);
+       TOPO_DBG("cpu_topology_start() LLC is L%d\n", topoParms.LLCDepth + 1);
 
        /*
         * Let the CPU Power Management know that the topology is stable.
@@ -161,7 +150,7 @@ cpu_topology_sort(int ncpus)
         * for their LLC cache. Each affinity set possesses a processor set
         * into which each logical processor is added.
         */
-       DBG("cpu_topology_start() creating affinity sets:\n");
+       TOPO_DBG("cpu_topology_start() creating affinity sets:\n");
        for (i = 0; i < ncpus; i++) {
                cpu_data_t              *cpup = cpu_datap(i);
                x86_lcpu_t              *lcpup = cpu_to_lcpu(i);
@@ -184,11 +173,11 @@ cpu_topology_sort(int ncpus)
                                        pset_create(pset_node_root());
                        if (aset->pset == PROCESSOR_SET_NULL)
                                panic("cpu_topology_start: pset_create");
-                       DBG("\tnew set %p(%d) pset %p for cache %p\n",
+                       TOPO_DBG("\tnew set %p(%d) pset %p for cache %p\n",
                                aset, aset->num, aset->pset, aset->cache);
                }
 
-               DBG("\tprocessor_init set %p(%d) lcpup %p(%d) cpu %p processor %p\n",
+               TOPO_DBG("\tprocessor_init set %p(%d) lcpup %p(%d) cpu %p processor %p\n",
                        aset, aset->num, lcpup, lcpup->cpu_num, cpup, cpup->cpu_processor);
 
                if (i != master_cpu)
@@ -213,10 +202,10 @@ cpu_topology_start_cpu( int cpunum )
        int             i = cpunum;
 
        /* Decide whether to start a CPU, and actually start it */
-       DBG("cpu_topology_start() processor_start():\n");
+       TOPO_DBG("cpu_topology_start() processor_start():\n");
        if( i < ncpus)
        {
-               DBG("\tlcpu %d\n", cpu_datap(i)->cpu_number);
+               TOPO_DBG("\tlcpu %d\n", cpu_datap(i)->cpu_number);
                processor_start(cpu_datap(i)->cpu_processor); 
                return KERN_SUCCESS;
        }
@@ -230,7 +219,7 @@ lapicid_cmp(const void *x, const void *y)
        cpu_data_t      *cpu_x = *((cpu_data_t **)(uintptr_t)x);
        cpu_data_t      *cpu_y = *((cpu_data_t **)(uintptr_t)y);
 
-       DBG("lapicid_cmp(%p,%p) (%d,%d)\n",
+       TOPO_DBG("lapicid_cmp(%p,%p) (%d,%d)\n",
                x, y, cpu_x->cpu_phys_number, cpu_y->cpu_phys_number);
        if (cpu_x->cpu_phys_number < cpu_y->cpu_phys_number)
                return -1;
index c6891aefb76adc506f04278eaad490dde561aa7d..b72a4b14fcb7637aaebc96ad0e5d4371845d2cd8 100644 (file)
 #include <ddb/db_expr.h>
 #endif
 
+static boolean_t       cpuid_dbg
+#if DEBUG
+                                 = TRUE;
+#else
+                                 = FALSE;
+#endif
+#define DBG(x...)                      \
+       do {                            \
+               if (cpuid_dbg)          \
+                       kprintf(x);     \
+       } while (0)                     \
+
 #define min(a,b) ((a) < (b) ? (a) : (b))
 #define quad(hi,lo)    (((uint64_t)(hi)) << 32 | (lo))
 
@@ -231,6 +243,8 @@ static i386_cpu_info_t      cpuid_cpu_info;
 static void cpuid_fn(uint32_t selector, uint32_t *result)
 {
        do_cpuid(selector, result);
+       DBG("cpuid_fn(0x%08x) eax:0x%08x ebx:0x%08x ecx:0x%08x edx:0x%08x\n",
+               selector, result[0], result[1], result[2], result[3]);
 }
 #else
 static void cpuid_fn(uint32_t selector, uint32_t *result)
@@ -248,9 +262,15 @@ static void cpuid_fn(uint32_t selector, uint32_t *result)
        } else {
                do_cpuid(selector, result);
        }
+       DBG("cpuid_fn(0x%08x) eax:0x%08x ebx:0x%08x ecx:0x%08x edx:0x%08x\n",
+               selector, result[0], result[1], result[2], result[3]);
 }
 #endif
 
+static const char *cache_type_str[LCACHE_MAX] = {
+       "Lnone", "L1I", "L1D", "L2U", "L3U"
+};
+
 /* this function is Intel-specific */
 static void
 cpuid_set_cache_info( i386_cpu_info_t * info_p )
@@ -263,6 +283,8 @@ cpuid_set_cache_info( i386_cpu_info_t * info_p )
        unsigned int    j;
        boolean_t       cpuid_deterministic_supported = FALSE;
 
+       DBG("cpuid_set_cache_info(%p)\n", info_p);
+
        bzero( linesizes, sizeof(linesizes) );
 
        /* Get processor cache descriptor info using leaf 2.  We don't use
@@ -311,7 +333,7 @@ cpuid_set_cache_info( i386_cpu_info_t * info_p )
                reg[eax] = 4;           /* cpuid request 4 */
                reg[ecx] = index;       /* index starting at 0 */
                cpuid(reg);
-//kprintf("cpuid(4) index=%d eax=%p\n", index, reg[eax]);
+               DBG("cpuid(4) index=%d eax=0x%x\n", index, reg[eax]);
                cache_type = bitfield32(reg[eax], 4, 0);
                if (cache_type == 0)
                        break;          /* no more caches */
@@ -354,6 +376,13 @@ cpuid_set_cache_info( i386_cpu_info_t * info_p )
                        info_p->cache_partitions[type] = cache_partitions;
                        linesizes[type] = cache_linesize;
 
+                       DBG(" cache_size[%s]      : %d\n",
+                           cache_type_str[type], cache_size);
+                       DBG(" cache_sharing[%s]   : %d\n",
+                           cache_type_str[type], cache_sharing);
+                       DBG(" cache_partitions[%s]: %d\n",
+                           cache_type_str[type], cache_partitions);
+
                        /*
                         * Overwrite associativity determined via
                         * CPUID.0x80000006 -- this leaf is more
@@ -389,6 +418,7 @@ cpuid_set_cache_info( i386_cpu_info_t * info_p )
                                vm_cache_geometry_colors = colors;
                }
        } 
+       DBG(" vm_cache_geometry_colors: %d\n", vm_cache_geometry_colors);
        
        /*
         * If deterministic cache parameters are not available, use
@@ -403,6 +433,13 @@ cpuid_set_cache_info( i386_cpu_info_t * info_p )
                info_p->cache_partitions[L2U] = 1;
 
                linesizes[L2U] = info_p->cpuid_cache_linesize;
+
+               DBG(" cache_size[L2U]      : %d\n",
+                   info_p->cache_size[L2U]);
+               DBG(" cache_sharing[L2U]   : 1\n");
+               DBG(" cache_partitions[L2U]: 1\n");
+               DBG(" linesizes[L2U]       : %d\n",
+                   info_p->cpuid_cache_linesize);
        }
        
        /*
@@ -414,16 +451,19 @@ cpuid_set_cache_info( i386_cpu_info_t * info_p )
        else if (linesizes[L1D])
                info_p->cache_linesize = linesizes[L1D];
        else panic("no linesize");
+       DBG(" cache_linesize    : %d\n", info_p->cache_linesize);
 
        /*
         * Extract and publish TLB information from Leaf 2 descriptors.
         */
+       DBG(" %ld leaf2 descriptors:\n", sizeof(info_p->cache_info));
        for (i = 1; i < sizeof(info_p->cache_info); i++) {
                cpuid_cache_descriptor_t        *descp;
                int                             id;
                int                             level;
                int                             page;
 
+               DBG(" 0x%02x", info_p->cache_info[i]);
                descp = cpuid_leaf2_find(info_p->cache_info[i]);
                if (descp == NULL)
                        continue;
@@ -458,6 +498,7 @@ cpuid_set_cache_info( i386_cpu_info_t * info_p )
                        info_p->cpuid_stlb = descp->entries;
                }
        }
+       DBG("\n");
 }
 
 static void
@@ -466,6 +507,8 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p)
        uint32_t        reg[4];
         char            str[128], *p;
 
+       DBG("cpuid_set_generic_info(%p)\n", info_p);
+
        /* do cpuid 0 to get vendor */
        cpuid_fn(0, reg);
        info_p->cpuid_max_basic = reg[eax];
@@ -575,11 +618,30 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p)
                                quad(reg[ecx], reg[edx]);
        }
 
+       DBG(" max_basic           : %d\n", info_p->cpuid_max_basic);
+       DBG(" max_ext             : 0x%08x\n", info_p->cpuid_max_ext);
+       DBG(" vendor              : %s\n", info_p->cpuid_vendor);
+       DBG(" brand_string        : %s\n", info_p->cpuid_brand_string);
+       DBG(" signature           : 0x%08x\n", info_p->cpuid_signature);
+       DBG(" stepping            : %d\n", info_p->cpuid_stepping);
+       DBG(" model               : %d\n", info_p->cpuid_model);
+       DBG(" family              : %d\n", info_p->cpuid_family);
+       DBG(" type                : %d\n", info_p->cpuid_type);
+       DBG(" extmodel            : %d\n", info_p->cpuid_extmodel);
+       DBG(" extfamily           : %d\n", info_p->cpuid_extfamily);
+       DBG(" brand               : %d\n", info_p->cpuid_brand);
+       DBG(" features            : 0x%016llx\n", info_p->cpuid_features);
+       DBG(" extfeatures         : 0x%016llx\n", info_p->cpuid_extfeatures);
+       DBG(" logical_per_package : %d\n", info_p->cpuid_logical_per_package);
+        DBG(" microcode_version   : 0x%08x\n", info_p->cpuid_microcode_version);
+
        /* Fold in the Invariant TSC feature bit, if present */
        if (info_p->cpuid_max_ext >= 0x80000007) {
                cpuid_fn(0x80000007, reg);  
                info_p->cpuid_extfeatures |=
                                reg[edx] & (uint32_t)CPUID_EXTFEATURE_TSCI;
+               DBG(" extfeatures         : 0x%016llx\n",
+                   info_p->cpuid_extfeatures);
        }
 
        if (info_p->cpuid_max_basic >= 0x5) {
@@ -594,6 +656,12 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p)
                cmp->extensions   = reg[ecx];
                cmp->sub_Cstates  = reg[edx];
                info_p->cpuid_mwait_leafp = cmp;
+
+               DBG(" Monitor/Mwait Leaf:\n");
+               DBG("  linesize_min : %d\n", cmp->linesize_min);
+               DBG("  linesize_max : %d\n", cmp->linesize_max);
+               DBG("  extensions   : %d\n", cmp->extensions);
+               DBG("  sub_Cstates  : 0x%08x\n", cmp->sub_Cstates);
        }
 
        if (info_p->cpuid_max_basic >= 0x6) {
@@ -614,6 +682,18 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p)
                ctp->hardware_feedback    = bitfield32(reg[ecx], 1, 1);
                ctp->energy_policy        = bitfield32(reg[ecx], 2, 2);
                info_p->cpuid_thermal_leafp = ctp;
+
+               DBG(" Thermal/Power Leaf:\n");
+               DBG("  sensor               : %d\n", ctp->sensor);
+               DBG("  dynamic_acceleration : %d\n", ctp->dynamic_acceleration);
+               DBG("  invariant_APIC_timer : %d\n", ctp->invariant_APIC_timer);
+               DBG("  core_power_limits    : %d\n", ctp->core_power_limits);
+               DBG("  fine_grain_clock_mod : %d\n", ctp->fine_grain_clock_mod);
+               DBG("  package_thermal_intr : %d\n", ctp->package_thermal_intr);
+               DBG("  thresholds           : %d\n", ctp->thresholds);
+               DBG("  ACNT_MCNT            : %d\n", ctp->ACNT_MCNT);
+               DBG("  hardware_feedback    : %d\n", ctp->hardware_feedback);
+               DBG("  energy_policy        : %d\n", ctp->energy_policy);
        }
 
        if (info_p->cpuid_max_basic >= 0xa) {
@@ -631,6 +711,15 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p)
                capp->fixed_number  = bitfield32(reg[edx],  4,  0);
                capp->fixed_width   = bitfield32(reg[edx], 12,  5);
                info_p->cpuid_arch_perf_leafp = capp;
+
+               DBG(" Architectural Performance Monitoring Leaf:\n");
+               DBG("  version       : %d\n", capp->version);
+               DBG("  number        : %d\n", capp->number);
+               DBG("  width         : %d\n", capp->width);
+               DBG("  events_number : %d\n", capp->events_number);
+               DBG("  events        : %d\n", capp->events);
+               DBG("  fixed_number  : %d\n", capp->fixed_number);
+               DBG("  fixed_width   : %d\n", capp->fixed_width);
        }
 
        if (info_p->cpuid_max_basic >= 0xd) {
@@ -640,6 +729,12 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p)
                 */
                cpuid_fn(0xd, info_p->cpuid_xsave_leaf.extended_state);
                info_p->cpuid_xsave_leafp = xsp;
+
+               DBG(" XSAVE Leaf:\n");
+               DBG("  EAX           : 0x%x\n", xsp->extended_state[eax]);
+               DBG("  EBX           : 0x%x\n", xsp->extended_state[ebx]);
+               DBG("  ECX           : 0x%x\n", xsp->extended_state[ecx]);
+               DBG("  EDX           : 0x%x\n", xsp->extended_state[edx]);
        }
 
        return;
@@ -684,6 +779,7 @@ cpuid_set_cpufamily(i386_cpu_info_t *info_p)
        }
 
        info_p->cpuid_cpufamily = cpufamily;
+       DBG("cpuid_set_cpufamily(%p) returning 0x%x\n", info_p, cpufamily);
        return cpufamily;
 }
 /*
@@ -694,7 +790,9 @@ void
 cpuid_set_info(void)
 {
        i386_cpu_info_t         *info_p = &cpuid_cpu_info;
-       
+
+       PE_parse_boot_argn("-cpuid", &cpuid_dbg, sizeof(cpuid_dbg));
+
        bzero((void *)info_p, sizeof(cpuid_cpu_info));
 
        cpuid_set_generic_info(info_p);
@@ -734,11 +832,14 @@ cpuid_set_info(void)
                info_p->core_count   = info_p->cpuid_cores_per_package;
                info_p->thread_count = info_p->cpuid_logical_per_package;
        }
+       DBG("cpuid_set_info():\n");
+       DBG("  core_count   : %d\n", info_p->core_count);
+       DBG("  thread_count : %d\n", info_p->thread_count);
 
        cpuid_cpu_info.cpuid_model_string = ""; /* deprecated */
 }
 
-static struct {
+static struct table {
        uint64_t        mask;
        const char      *name;
 } feature_map[] = {
@@ -811,6 +912,28 @@ extfeature_map[] = {
        {0, 0}
 };
 
+static char *
+cpuid_get_names(struct table *map, uint64_t bits, char *buf, unsigned buf_len)
+{
+       size_t  len = 0;
+       char    *p = buf;
+       int     i;
+
+       for (i = 0; map[i].mask != 0; i++) {
+               if ((bits & map[i].mask) == 0)
+                       continue;
+               if (len && ((size_t) (p - buf) < (buf_len - 1)))
+                       *p++ = ' ';
+               len = min(strlen(map[i].name), (size_t)((buf_len-1)-(p-buf)));
+               if (len == 0)
+                       break;
+               bcopy(map[i].name, p, len);
+               p += len;
+       }
+       *p = '\0';
+       return buf;
+}
+
 i386_cpu_info_t        *
 cpuid_info(void)
 {
@@ -825,58 +948,24 @@ cpuid_info(void)
 char *
 cpuid_get_feature_names(uint64_t features, char *buf, unsigned buf_len)
 {
-       size_t  len = 0;
-       char    *p = buf;
-       int     i;
-
-       for (i = 0; feature_map[i].mask != 0; i++) {
-               if ((features & feature_map[i].mask) == 0)
-                       continue;
-               if (len && ((size_t)(p - buf) < (buf_len - 1)))
-                       *p++ = ' ';
-
-               len = min(strlen(feature_map[i].name), (size_t) ((buf_len-1) - (p-buf)));
-               if (len == 0)
-                       break;
-               bcopy(feature_map[i].name, p, len);
-               p += len;
-       }
-       *p = '\0';
-       return buf;
+       return cpuid_get_names(feature_map, features, buf, buf_len); 
 }
 
 char *
 cpuid_get_extfeature_names(uint64_t extfeatures, char *buf, unsigned buf_len)
 {
-       size_t  len = 0;
-       char    *p = buf;
-       int     i;
-
-       for (i = 0; extfeature_map[i].mask != 0; i++) {
-               if ((extfeatures & extfeature_map[i].mask) == 0)
-                       continue;
-               if (len && ((size_t) (p - buf) < (buf_len - 1)))
-                       *p++ = ' ';
-               len = min(strlen(extfeature_map[i].name), (size_t) ((buf_len-1)-(p-buf)));
-               if (len == 0)
-                       break;
-               bcopy(extfeature_map[i].name, p, len);
-               p += len;
-       }
-       *p = '\0';
-       return buf;
+       return cpuid_get_names(extfeature_map, extfeatures, buf, buf_len); 
 }
 
-
 void
 cpuid_feature_display(
        const char      *header)
 {
        char    buf[256];
 
-       kprintf("%s: %s\n", header,
-                 cpuid_get_feature_names(cpuid_features(),
-                                               buf, sizeof(buf)));
+       kprintf("%s: %s", header,
+                cpuid_get_feature_names(cpuid_features(), buf, sizeof(buf)));
+       kprintf("\n");
        if (cpuid_features() & CPUID_FEATURE_HTT) {
 #define s_if_plural(n) ((n > 1) ? "s" : "")
                kprintf("  HTT: %d core%s per package;"
@@ -962,7 +1051,7 @@ cpuid_extfeatures(void)
        return cpuid_info()->cpuid_extfeatures;
 }
  
-
 #if MACH_KDB
 
 /*
index 51bd428f61808fc899184ecbd66ff5a7e3f5a5f2..c953289613e9fdbae27a1d4809ce3409628bdc3c 100644 (file)
 #define CPUID_FEATURE_TM2       _HBit(8)  /* Thermal Monitor 2 */
 #define CPUID_FEATURE_SSSE3     _HBit(9)  /* Supplemental SSE3 instructions */
 #define CPUID_FEATURE_CID       _HBit(10) /* L1 Context ID */
+#define CPUID_FEATURE_SEGLIM64  _HBit(11) /* 64-bit segment limit checking */
 #define CPUID_FEATURE_CX16      _HBit(13) /* CmpXchg16b instruction */
 #define CPUID_FEATURE_xTPR      _HBit(14) /* Send Task PRiority msgs */
 #define CPUID_FEATURE_PDCM      _HBit(15) /* Perf/Debug Capability MSR */
 
+#define CPUID_FEATURE_PCID      _HBit(17) /* ASID-PCID support */
 #define CPUID_FEATURE_DCA       _HBit(18) /* Direct Cache Access */
 #define CPUID_FEATURE_SSE4_1    _HBit(19) /* Streaming SIMD extensions 4.1 */
 #define CPUID_FEATURE_SSE4_2    _HBit(20) /* Streaming SIMD extensions 4.2 */
 #define CPUID_FEATURE_xAPIC     _HBit(21) /* Extended APIC Mode */
 #define CPUID_FEATURE_MOVBE     _HBit(22) /* MOVBE instruction */
 #define CPUID_FEATURE_POPCNT    _HBit(23) /* POPCNT instruction */
+#define CPUID_FEATURE_TSCTMR    _HBit(24) /* TSC deadline timer */
 #define CPUID_FEATURE_AES       _HBit(25) /* AES instructions */
 #define CPUID_FEATURE_XSAVE     _HBit(26) /* XSAVE instructions */
 #define CPUID_FEATURE_OSXSAVE   _HBit(27) /* XGETBV/XSETBV instructions */
-#define CPUID_FEATURE_VMM       _HBit(31) /* VMM (Hypervisor) present */
-#define CPUID_FEATURE_SEGLIM64  _HBit(11) /* 64-bit segment limit checking */
-#define CPUID_FEATURE_PCID      _HBit(17) /* ASID-PCID support */
-#define CPUID_FEATURE_TSCTMR    _HBit(24) /* TSC deadline timer */
 #define CPUID_FEATURE_AVX1_0   _HBit(28) /* AVX 1.0 instructions */
+#define CPUID_FEATURE_VMM       _HBit(31) /* VMM (Hypervisor) present */
+#define CPUID_FEATURE_RDRAND   _HBit(29) /* RDRAND instruction */
+#define CPUID_FEATURE_F16C     _HBit(30) /* Float16 convert instructions */
+
+/*
+ * Leaf 7, subleaf 0 additional features.
+ * Bits returned in %ebx to a CPUID request with {%eax,%ecx} of (0x7,0x0}:
+ */
+#define CPUID_LEAF7_FEATURE_RDWRFSGS _Bit(0)   /* FS/GS base read/write */
+#define CPUID_LEAF7_FEATURE_SMEP     _Bit(7)   /* Supervisor Mode Execute Protect */
+#define CPUID_LEAF7_FEATURE_ENFSTRG  _Bit(9)   /* ENhanced Fast STRinG copy */
 
 /*
  * The CPUID_EXTFEATURE_XXX values define 64-bit values
 #define CPUID_MODEL_SANDYBRIDGE        0x2A
 #define CPUID_MODEL_JAKETOWN   0x2D
 
+
 #ifndef ASSEMBLER
 #include <stdint.h>
 #include <mach/mach_types.h>
@@ -322,6 +333,7 @@ typedef struct {
        cpuid_thermal_leaf_t    *cpuid_thermal_leafp;
        cpuid_arch_perf_leaf_t  *cpuid_arch_perf_leafp;
        cpuid_xsave_leaf_t      *cpuid_xsave_leafp;
+       uint32_t                cpuid_leaf7_features;
 } i386_cpu_info_t;
 
 #ifdef __cplusplus
@@ -338,9 +350,11 @@ extern void                cpuid_feature_display(const char *);
 extern void            cpuid_extfeature_display(const char *);
 extern char *          cpuid_get_feature_names(uint64_t, char *, unsigned);
 extern char *          cpuid_get_extfeature_names(uint64_t, char *, unsigned);
+extern char *          cpuid_get_leaf7_feature_names(uint64_t, char *, unsigned);
 
 extern uint64_t                cpuid_features(void);
 extern uint64_t                cpuid_extfeatures(void);
+extern uint64_t                cpuid_leaf7_features(void);
 extern uint32_t                cpuid_family(void);
 extern uint32_t                cpuid_cpufamily(void);
        
index 7227b93a2db4c686191c26aebc42e8af272e6067..478eb2b4e257ffb03b8688b9b08b274cf2193a9f 100644 (file)
@@ -485,7 +485,7 @@ fpu_set_fxstate(
        struct x86_fx_thread_state *new_ifps;
        x86_float_state64_t     *state;
        pcb_t   pcb;
-       size_t  state_size = (((f == x86_AVX_STATE32) || (f == x86_AVX_STATE64)) && (fpu_YMM_present == TRUE)) ? sizeof(struct x86_avx_thread_state) : sizeof(struct x86_fx_thread_state);
+       size_t  state_size = sizeof(struct x86_fx_thread_state);
        boolean_t       old_valid;
        if (fp_kind == FP_NO)
            return KERN_FAILURE;
@@ -538,11 +538,29 @@ fpu_set_fxstate(
                    panic("fpu_set_fxstate inconsistency, thread: %p not stopped", thr_act);
            }
 #endif
+           /*
+            * Clear any reserved bits in the MXCSR to prevent a GPF
+            * when issuing an FXRSTOR.
+            */
+
+           state->fpu_mxcsr &= mxcsr_capability_mask;
 
            bcopy((char *)&state->fpu_fcw, (char *)ifps, state_size);
 
            if (fpu_YMM_present) {
                struct x86_avx_thread_state *iavx = (void *) ifps;
+               uint32_t fpu_nyreg = 0;
+
+               if (f == x86_AVX_STATE32)
+                       fpu_nyreg = 8;
+               else if (f == x86_AVX_STATE64)
+                       fpu_nyreg = 16;
+
+               if (fpu_nyreg) {
+                       x86_avx_state64_t *ystate = (x86_avx_state64_t *) state;
+                       bcopy(&ystate->__fpu_ymmh0, &iavx->x_YMMH_reg[0][0], fpu_nyreg * sizeof(_STRUCT_XMM_REG));
+               }
+
                iavx->fp_save_layout = thread_is_64bit(thr_act) ? XSAVE64 : XSAVE32;
                /* Sanitize XSAVE header */
                bzero(&iavx->_xh.xhrsvd[0], sizeof(iavx->_xh.xhrsvd));
@@ -561,11 +579,6 @@ fpu_set_fxstate(
                    set_ts();
                    ml_set_interrupts_enabled(istate);
            }
-               /*
-                * Clear any reserved bits in the MXCSR to prevent a GPF
-                * when issuing an FXRSTOR.
-                */
-           ifps->fx_MXCSR &= mxcsr_capability_mask;
 
            simple_unlock(&pcb->lock);
 
@@ -591,7 +604,7 @@ fpu_get_fxstate(
        x86_float_state64_t     *state;
        kern_return_t   ret = KERN_FAILURE;
        pcb_t   pcb;
-       size_t  state_size = (((f == x86_AVX_STATE32) || (f == x86_AVX_STATE64)) && (fpu_YMM_present == TRUE)) ? sizeof(struct x86_avx_thread_state) : sizeof(struct x86_fx_thread_state);
+       size_t  state_size = sizeof(struct x86_fx_thread_state);
 
        if (fp_kind == FP_NO)
                return KERN_FAILURE;
@@ -633,6 +646,21 @@ fpu_get_fxstate(
        }
        if (ifps->fp_valid) {
                bcopy((char *)ifps, (char *)&state->fpu_fcw, state_size);
+               if (fpu_YMM_present) {
+                       struct x86_avx_thread_state *iavx = (void *) ifps;
+                       uint32_t fpu_nyreg = 0;
+
+                       if (f == x86_AVX_STATE32)
+                               fpu_nyreg = 8;
+                       else if (f == x86_AVX_STATE64)
+                               fpu_nyreg = 16;
+
+                       if (fpu_nyreg) {
+                               x86_avx_state64_t *ystate = (x86_avx_state64_t *) state;
+                               bcopy(&iavx->x_YMMH_reg[0][0], &ystate->__fpu_ymmh0, fpu_nyreg * sizeof(_STRUCT_XMM_REG));
+                       }
+               }
+
                ret = KERN_SUCCESS;
        }
        simple_unlock(&pcb->lock);
index bf0508a69f05b43348f6d6c6449af42acecf6c43..47a5b9c7a791267d886a00e217726e1a5622a058 100644 (file)
@@ -33,9 +33,6 @@
 
 extern pd_entry_t BootstrapPTD[2048];
 
-#define TWO_MEG_MASK 0xFFFFFFFFFFE00000ULL
-#define FOUR_K_MASK 0xFFFFFFFFFFFFF000ULL
-
 // src is virtually mapped, not page aligned, 
 // dst is a physical 4k page aligned ptr, len is one 4K page
 // src & dst will not overlap
@@ -63,63 +60,41 @@ hibernate_restore_phys_page(uint64_t src, uint64_t dst, uint32_t len, uint32_t p
 
 void hibprintf(const char *fmt, ...);
 
-void
-pal_hib_window_setup(ppnum_t page)
-{
-       uint64_t *pp;
-       uint64_t phys = ptoa_64(page);
-       int i;
-
-       BootstrapPTD[2047] = (phys & ~((uint64_t)I386_LPGMASK)) | INTEL_PTE_PS  | INTEL_PTE_VALID | INTEL_PTE_WRITE;
-
-       invlpg(HIB_PTES);
-
-       pp = (uint64_t *)(uintptr_t)(HIB_PTES + (phys & I386_LPGMASK));
-
-       for(i=0;i<512;i++)
-               *pp = 0;
-
-       pp[0] = phys | INTEL_PTE_VALID | INTEL_PTE_WRITE;
-       BootstrapPTD[2047] = phys | INTEL_PTE_VALID | INTEL_PTE_WRITE;
-
-       invlpg(HIB_PTES);
-}
-
 uintptr_t
-pal_hib_map(uintptr_t v, uint64_t p)
+pal_hib_map(uintptr_t virt, uint64_t phys)
 {
-       int index;
-
-       switch(v) {
-               case DEST_COPY_AREA:
-                       index = 1;
-                       break;
-               case SRC_COPY_AREA:
-                       index = 2;
-                       break;
-               case COPY_PAGE_AREA:
-                       index = 3;
-                       break;
-               default:
-                       index = -1;
-                       asm("cli;hlt;");
-       }
-
-       uint64_t *ptes = (uint64_t *)HIB_PTES;
-
-       /* Outside 1-1 4G map so set up the mappings for the dest page using 2MB pages */
-       ptes[index] = (p & FOUR_K_MASK) | INTEL_PTE_VALID | INTEL_PTE_WRITE;
-               
-       /* Invalidate the page tables for this */
-       invlpg((uintptr_t)v);
-
-       return v;
+    uintptr_t index;
+
+    switch (virt)
+    {
+       case DEST_COPY_AREA:
+       case SRC_COPY_AREA:
+       case COPY_PAGE_AREA:
+       case BITMAP_AREA:
+       case IMAGE_AREA:
+       case IMAGE2_AREA:
+           break;
+       default:
+           asm("cli;hlt;");
+           break;
+    }
+
+    index = (virt >> I386_LPGSHIFT);
+    virt += (uintptr_t)(phys & I386_LPGMASK);
+    phys  = ((phys & ~((uint64_t)I386_LPGMASK)) | INTEL_PTE_PS  | INTEL_PTE_VALID | INTEL_PTE_WRITE);
+    BootstrapPTD[index] = phys;
+    invlpg(virt);
+    BootstrapPTD[index + 1] = (phys + I386_LPGBYTES);
+    invlpg(virt + I386_LPGBYTES);
+
+    return (virt);
 }
 
 void hibernateRestorePALState(uint32_t *arg)
 {
-       (void)arg;
+    (void)arg;
 }
+
 void
 pal_hib_patchup(void)
 {
index 596888b5f3efd13af4b8136992ca5add7d9e11d3..560a88ffc65d907e25b0715d435caa1075c1021a 100644 (file)
@@ -199,7 +199,7 @@ x86_64_post_sleep(uint64_t new_cr3)
 // Set up the physical mapping - NPHYSMAP GB of memory mapped at a high address
 // NPHYSMAP is determined by the maximum supported RAM size plus 4GB to account
 // the PCI hole (which is less 4GB but not more).
-#define NPHYSMAP MAX(K64_MAXMEM/GB + 4, 4)
+
 // Compile-time guard:
 extern int maxphymapsupported[NPHYSMAP <= PTE_PER_PAGE ? 1 : -1];
 static void
@@ -335,9 +335,22 @@ vstart(vm_offset_t boot_args_start)
 
                cpu = 0;
                cpu_data_alloc(TRUE);
+
+                               
+               /*
+                * Setup boot args given the physical start address.
+                */
+               kernelBootArgs = (boot_args *)
+                   ml_static_ptovirt(boot_args_start);
+               DBG("i386_init(0x%lx) kernelBootArgs=%p\n",
+                   (unsigned long)boot_args_start, kernelBootArgs);
+
+               PE_init_platform(FALSE, kernelBootArgs);
+               postcode(PE_INIT_PLATFORM_D);
        } else {
                /* Find our logical cpu number */
                cpu = lapic_to_cpu[(LAPIC_READ(ID)>>LAPIC_ID_SHIFT) & LAPIC_ID_MASK];
+               DBG("CPU: %d, GSBASE initial value: 0x%llx\n", cpu, rdmsr64(MSR_IA32_GS_BASE));
 #ifdef __x86_64__
                if (cpuid_extfeatures() & CPUID_EXTFEATURE_XD) {
                        wrmsr64(MSR_IA32_EFER, rdmsr64(MSR_IA32_EFER) | MSR_IA32_EFER_NXE);
@@ -373,7 +386,7 @@ vstart(vm_offset_t boot_args_start)
        }
 
        if (is_boot_cpu)
-               i386_init(boot_args_start);
+               i386_init();
        else
                i386_init_slave();
        /*NOTREACHED*/
@@ -406,7 +419,7 @@ vstart(vm_offset_t boot_args_start)
  *     set up.
  */
 void
-i386_init(vm_offset_t boot_args_start)
+i386_init(void)
 {
        unsigned int    maxmem;
        uint64_t        maxmemtouse;
@@ -423,16 +436,6 @@ i386_init(vm_offset_t boot_args_start)
        mca_cpu_init();
 #endif
 
-       /*
-        * Setup boot args given the physical start address.
-        */
-       kernelBootArgs = (boot_args *)
-               ml_static_ptovirt(boot_args_start);
-       DBG("i386_init(0x%lx) kernelBootArgs=%p\n",
-               (unsigned long)boot_args_start, kernelBootArgs);
-
-       PE_init_platform(FALSE, kernelBootArgs);
-       postcode(PE_INIT_PLATFORM_D);
 
        kernel_early_bootstrap();
 
index 989895eb0904cde360ee093e239c97088373f031..866dfa1fb1ace6562e2f4af626756c4199a74145 100644 (file)
@@ -99,8 +99,10 @@ ppnum_t              lowest_lo = 0;
 ppnum_t                lowest_hi = 0;
 ppnum_t                highest_hi = 0;
 
+enum {PMAP_MAX_RESERVED_RANGES = 32};
 uint32_t pmap_reserved_pages_allocated = 0;
-uint32_t pmap_last_reserved_range = 0xFFFFFFFF;
+uint32_t pmap_reserved_range_indices[PMAP_MAX_RESERVED_RANGES];
+uint32_t pmap_last_reserved_range_index = 0;
 uint32_t pmap_reserved_ranges = 0;
 
 extern unsigned int bsd_mbuf_cluster_reserve(boolean_t *);
@@ -161,7 +163,7 @@ i386_vm_init(uint64_t       maxmem,
        uint32_t maxdmaaddr;
        uint32_t  mbuf_reserve = 0;
        boolean_t mbuf_override = FALSE;
-
+       boolean_t coalescing_permitted;
 #if DEBUG
        kprintf("Boot args revision: %d version: %d",
                args->Revision, args->Version);
@@ -256,6 +258,12 @@ i386_vm_init(uint64_t      maxmem,
                }
                base = (ppnum_t) (mptr->PhysicalStart >> I386_PGSHIFT);
                top = (ppnum_t) (((mptr->PhysicalStart) >> I386_PGSHIFT) + mptr->NumberOfPages - 1);
+
+#if    MR_RSV_TEST
+               static uint32_t nmr = 0;
+               if ((base > 0x20000) && (nmr++ < 4))
+                       mptr->Attribute |= EFI_MEMORY_KERN_RESERVED;
+#endif
                region_bytes = (uint64_t)(mptr->NumberOfPages << I386_PGSHIFT);
                pmap_type = mptr->Type;
 
@@ -347,6 +355,19 @@ i386_vm_init(uint64_t      maxmem,
                                prev_pmptr = 0;
                                continue;
                        }
+                       /*
+                        * A range may be marked with with the
+                        * EFI_MEMORY_KERN_RESERVED attribute
+                        * on some systems, to indicate that the range
+                        * must not be made available to devices.
+                        */
+
+                       if (mptr->Attribute & EFI_MEMORY_KERN_RESERVED) {
+                               if (++pmap_reserved_ranges > PMAP_MAX_RESERVED_RANGES) {
+                                       panic("Too many reserved ranges %u\n", pmap_reserved_ranges);
+                               }
+                       }
+
                        if (top < fap) {
                                /*
                                 * entire range below first_avail
@@ -361,21 +382,11 @@ i386_vm_init(uint64_t     maxmem,
 
                                pmptr->end = top;
 
-                               /*
-                                * A range may be marked with with the
-                                * EFI_MEMORY_KERN_RESERVED attribute
-                                * on some systems, to indicate that the range
-                                * must not be made available to devices.
-                                * Simplifying assumptions are made regarding
-                                * the placement of the range.
-                                */
-                               if (mptr->Attribute & EFI_MEMORY_KERN_RESERVED)
-                                       pmap_reserved_ranges++;
 
                                if ((mptr->Attribute & EFI_MEMORY_KERN_RESERVED) &&
                                    (top < I386_KERNEL_IMAGE_BASE_PAGE)) {
                                        pmptr->alloc = pmptr->base;
-                                       pmap_last_reserved_range = pmap_memory_region_count;
+                                       pmap_reserved_range_indices[pmap_last_reserved_range_index++] = pmap_memory_region_count;
                                }
                                else {
                                        /*
@@ -384,6 +395,7 @@ i386_vm_init(uint64_t       maxmem,
                                        pmptr->alloc = top;
                                }
                                pmptr->type = pmap_type;
+                               pmptr->attribute = mptr->Attribute;
                        }
                        else if ( (base < fap) && (top > fap) ) {
                                /*
@@ -394,39 +406,48 @@ i386_vm_init(uint64_t     maxmem,
                                pmptr->base = base;
                                pmptr->alloc = pmptr->end = (fap - 1);
                                pmptr->type = pmap_type;
+                               pmptr->attribute = mptr->Attribute;
                                /*
                                 * we bump these here inline so the accounting
                                 * below works correctly
                                 */
                                pmptr++;
                                pmap_memory_region_count++;
+
                                pmptr->alloc = pmptr->base = fap;
                                pmptr->type = pmap_type;
+                               pmptr->attribute = mptr->Attribute;
                                pmptr->end = top;
-                       }
-                       else {
+
+                               if (mptr->Attribute & EFI_MEMORY_KERN_RESERVED)
+                                       pmap_reserved_range_indices[pmap_last_reserved_range_index++] = pmap_memory_region_count;
+                       } else {
                                /*
                                 * entire range useable
                                 */
                                pmptr->alloc = pmptr->base = base;
                                pmptr->type = pmap_type;
+                               pmptr->attribute = mptr->Attribute;
                                pmptr->end = top;
+                               if (mptr->Attribute & EFI_MEMORY_KERN_RESERVED)
+                                       pmap_reserved_range_indices[pmap_last_reserved_range_index++] = pmap_memory_region_count;
                        }
 
                        if (i386_ptob(pmptr->end) > avail_end )
                                avail_end = i386_ptob(pmptr->end);
 
                        avail_remaining += (pmptr->end - pmptr->base);
-
+                       coalescing_permitted = (prev_pmptr && (pmptr->attribute == prev_pmptr->attribute) && ((pmptr->attribute & EFI_MEMORY_KERN_RESERVED) == 0));
                        /*
                         * Consolidate contiguous memory regions, if possible
                         */
                        if (prev_pmptr &&
-                           pmptr->type == prev_pmptr->type &&
-                           pmptr->base == pmptr->alloc &&
-                               pmptr->base == (prev_pmptr->end + 1))
+                           (pmptr->type == prev_pmptr->type) &&
+                           (coalescing_permitted) &&
+                           (pmptr->base == pmptr->alloc) &&
+                           (pmptr->base == (prev_pmptr->end + 1)))
                        {
-                               if(prev_pmptr->end == prev_pmptr->alloc)
+                               if (prev_pmptr->end == prev_pmptr->alloc)
                                        prev_pmptr->alloc = pmptr->base;
                                prev_pmptr->end = pmptr->end;
                        } else {
@@ -603,11 +624,12 @@ boolean_t pmap_next_page_reserved(ppnum_t *);
  */
 boolean_t
 pmap_next_page_reserved(ppnum_t *pn) {
-       if (pmap_reserved_ranges && pmap_last_reserved_range != 0xFFFFFFFF) {
+       if (pmap_reserved_ranges) {
                uint32_t n;
                pmap_memory_region_t *region;
-               for (n = 0; n <= pmap_last_reserved_range; n++) {
-                       region = &pmap_memory_regions[n];
+               for (n = 0; n < pmap_last_reserved_range_index; n++) {
+                       uint32_t reserved_index = pmap_reserved_range_indices[n];
+                       region = &pmap_memory_regions[reserved_index];
                        if (region->alloc < region->end) {
                                *pn = region->alloc++;
                                avail_remaining--;
@@ -619,6 +641,11 @@ pmap_next_page_reserved(ppnum_t *pn) {
                                        lowest_lo = *pn;
 
                                pmap_reserved_pages_allocated++;
+#if DEBUG
+                               if (region->alloc == region->end) {
+                                       kprintf("Exhausted reserved range index: %u, base: 0x%x end: 0x%x, type: 0x%x, attribute: 0x%llx\n", reserved_index, region->base, region->end, region->type, region->attribute);
+                               }
+#endif
                                return TRUE;
                        }
                }
index ef38300e41d5805f3496223be75fadbadeadaf33..048dc704dd89fb0cbf1439a884221c5a114cab87 100644 (file)
@@ -370,9 +370,10 @@ usimple_lock(
 
                if (uslock_acquired == FALSE) {
                        uint32_t lock_cpu;
+                       uintptr_t lowner = (uintptr_t)l->interlock.lock_data;
                        spinlock_timed_out = l;
-                       lock_cpu = spinlock_timeout_NMI((uintptr_t)l->interlock.lock_data);
-                       panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x", l, (uintptr_t)l->interlock.lock_data, current_thread(), lock_cpu);
+                       lock_cpu = spinlock_timeout_NMI(lowner);
+                       panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x, current owner: 0x%lx", l, lowner,  current_thread(), lock_cpu, (uintptr_t)l->interlock.lock_data);
                }
        }
        USLDBG(usld_lock_post(l, pc));
index 724490e22435bd9a46abd31146b0747cd65925a9..474733e96de174fbdeb99af8135e9b223db96481 100644 (file)
@@ -38,7 +38,8 @@ struct boot_args;
 struct cpu_data;
 
 extern void            vstart(vm_offset_t);
-extern void            i386_init(vm_offset_t);
+extern void            i386_init(void);
+
 extern void            i386_vm_init(
                                uint64_t,
                                boolean_t,
index e90a298f9e104271bd8844efc16c2917002f043d..f4221f9640c3f09d136682933cbe0118f2fb51d2 100644 (file)
@@ -147,7 +147,7 @@ static volatile long        mp_rv_complete __attribute__((aligned(64)));
 volatile       uint64_t        debugger_entry_time;
 volatile       uint64_t        debugger_exit_time;
 #if MACH_KDP
-
+#include <kdp/kdp.h>
 extern int kdp_snapshot;
 static struct _kdp_xcpu_call_func {
        kdp_x86_xcpu_func_t func;
@@ -579,12 +579,12 @@ NMIInterruptHandler(x86_saved_state_t *regs)
                        goto NMExit;
 
        if (spinlock_timed_out) {
-               char pstr[160];
+               char pstr[192];
                snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): NMIPI for spinlock acquisition timeout, spinlock: %p, spinlock owner: %p, current_thread: %p, spinlock_owner_cpu: 0x%x\n", cpu_number(), spinlock_timed_out, (void *) spinlock_timed_out->interlock.lock_data, current_thread(), spinlock_owner_cpu);
                panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs);
        } else if (pmap_tlb_flush_timeout == TRUE) {
                char pstr[128];
-               snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor (this CPU did not acknowledge interrupts) TLB state:%d\n", cpu_number(), current_cpu_datap()->cpu_tlb_invalid);
+               snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor (this CPU did not acknowledge interrupts) TLB state:0x%x\n", cpu_number(), current_cpu_datap()->cpu_tlb_invalid);
                panic_i386_backtrace(stackptr, 48, &pstr[0], TRUE, regs);
        }
 
@@ -1315,6 +1315,7 @@ i386_activate_cpu(void)
        cdp->cpu_running = TRUE;
        started_cpu();
        simple_unlock(&x86_topo_lock);
+       flush_tlb_raw();
 }
 
 extern void etimer_timer_expire(void   *arg);
@@ -1372,6 +1373,13 @@ mp_kdp_enter(void)
         */
        mp_kdp_state = ml_set_interrupts_enabled(FALSE);
        my_cpu = cpu_number();
+
+       if (my_cpu == (unsigned) debugger_cpu) {
+               kprintf("\n\nRECURSIVE DEBUGGER ENTRY DETECTED\n\n");
+               kdp_reset();
+               return;
+       }
+
        cpu_datap(my_cpu)->debugger_entry_time = mach_absolute_time();
        simple_lock(&mp_kdp_lock);
 
index 025e56ea4565703845e894f96124c5cba95d138a..a1fefe4e501d85fd1ecf6237aa1b5e61aea154a2 100644 (file)
 #ifndef _I386_PAL_HIBERNATE_H
 #define _I386_PAL_HIBERNATE_H
 
-#define HIB_PTES               (4*GB - 1*I386_LPGBYTES) /*4GB - 2m */
-#define DEST_COPY_AREA (HIB_PTES + 1*I386_PGBYTES)
-#define SRC_COPY_AREA  (HIB_PTES + 2*I386_PGBYTES)
-#define COPY_PAGE_AREA (HIB_PTES + 3*I386_PGBYTES)
+#define HIB_MAP_SIZE    (2*I386_LPGBYTES)
+#define DEST_COPY_AREA (4*GB - HIB_MAP_SIZE) /*4GB - 2*2m */
+#define SRC_COPY_AREA  (DEST_COPY_AREA - HIB_MAP_SIZE)
+#define COPY_PAGE_AREA (SRC_COPY_AREA  - HIB_MAP_SIZE)
+#define BITMAP_AREA    (COPY_PAGE_AREA - HIB_MAP_SIZE)
+#define IMAGE_AREA     (BITMAP_AREA    - HIB_MAP_SIZE)
+#define IMAGE2_AREA    (IMAGE_AREA     - HIB_MAP_SIZE)
 
 #define HIB_BASE sectINITPTB
 #define HIB_ENTRYPOINT acpi_wake_prot_entry
 
-void pal_hib_window_setup(ppnum_t page);
 uintptr_t pal_hib_map(uintptr_t v, uint64_t p);
 void hibernateRestorePALState(uint32_t *src);
 void pal_hib_patchup(void);
index c8a2f520653b56a3f789966d5e17a79e3df498e9..bfbb48d4ba6243ce5119ae37cf2d14004edc8dd5 100644 (file)
@@ -229,6 +229,9 @@ kvtophys(
        return ((addr64_t)pa);
 }
 
+extern pt_entry_t *debugger_ptep;
+extern vm_map_offset_t debugger_window_kva;
+
 __private_extern__ void ml_copy_phys(addr64_t src64, addr64_t dst64, vm_size_t bytes) {
        void *src, *dst;
 
@@ -243,6 +246,36 @@ __private_extern__ void ml_copy_phys(addr64_t src64, addr64_t dst64, vm_size_t b
 #elif defined(__x86_64__)
        src = PHYSMAP_PTOV(src64);
        dst = PHYSMAP_PTOV(dst64);
+
+       addr64_t debug_pa = 0;
+
+       /* If either destination or source are outside the
+        * physical map, establish a physical window onto the target frame.
+        */
+       assert(physmap_enclosed(src64) || physmap_enclosed(dst64));
+
+       if (physmap_enclosed(src64) == FALSE) {
+               src = (void *)(debugger_window_kva | (src64 & INTEL_OFFMASK));
+               debug_pa = src64 & PG_FRAME;
+       } else if (physmap_enclosed(dst64) == FALSE) {
+               dst = (void *)(debugger_window_kva | (dst64 & INTEL_OFFMASK));
+               debug_pa = dst64 & PG_FRAME;
+       }
+       /* DRK: debugger only routine, we don't bother checking for an
+        * identical mapping.
+        */
+       if (debug_pa) {
+               if (debugger_window_kva == 0)
+                       panic("%s: invoked in non-debug mode", __FUNCTION__);
+               /* Establish a cache-inhibited physical window; some platforms
+                * may not cover arbitrary ranges with MTRRs
+                */
+               pmap_store_pte(debugger_ptep, debug_pa | INTEL_PTE_NCACHE | INTEL_PTE_RW | INTEL_PTE_REF| INTEL_PTE_MOD | INTEL_PTE_VALID);
+               flush_tlb_raw();
+#if    DEBUG
+               kprintf("Remapping debugger physical window at %p to 0x%llx\n", (void *)debugger_window_kva, debug_pa);
+#endif
+       }
 #endif
        /* ensure we stay within a page */
        if (((((uint32_t)src64 & (I386_PGBYTES-1)) + bytes) > I386_PGBYTES) || ((((uint32_t)dst64 & (I386_PGBYTES-1)) + bytes) > I386_PGBYTES) ) {
@@ -251,17 +284,17 @@ __private_extern__ void ml_copy_phys(addr64_t src64, addr64_t dst64, vm_size_t b
 
        switch (bytes) {
        case 1:
-               *((uint8_t *) dst) = *((uint8_t *) src);
+               *((uint8_t *) dst) = *((volatile uint8_t *) src);
                break;
        case 2:
-               *((uint16_t *) dst) = *((uint16_t *) src);
+               *((uint16_t *) dst) = *((volatile uint16_t *) src);
                break;
        case 4:
-               *((uint32_t *) dst) = *((uint32_t *) src);
+               *((uint32_t *) dst) = *((volatile uint32_t *) src);
                break;
                /* Should perform two 32-bit reads */
        case 8:
-               *((uint64_t *) dst) = *((uint64_t *) src);
+               *((uint64_t *) dst) = *((volatile uint64_t *) src);
                break;
        default:
                bcopy(src, dst, bytes);
index 22eafd1b8df26dee214e90833f7a10a0eec22f91..1f064b614dfb000a4de194581fe78789457afd69 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 
 extern int disableConsoleOutput;
 
-decl_simple_lock_data(,pm_init_lock);
+#define DELAY_UNSET            0xFFFFFFFFFFFFFFFFULL
 
 /*
  * The following is set when the KEXT loads and initializes.
  */
 pmDispatch_t   *pmDispatch     = NULL;
 
-static uint32_t                pmInitDone      = 0;
-static boolean_t       earlyTopology   = FALSE;
+static uint32_t                pmInitDone              = 0;
+static boolean_t       earlyTopology           = FALSE;
+static uint64_t                earlyMaxBusDelay        = DELAY_UNSET;
+static uint64_t                earlyMaxIntDelay        = DELAY_UNSET;
 
 
 /*
@@ -71,16 +73,6 @@ static boolean_t     earlyTopology   = FALSE;
 void
 power_management_init(void)
 {
-    static boolean_t   initialized     = FALSE;
-
-    /*
-     * Initialize the lock for the KEXT initialization.
-     */
-    if (!initialized) {
-       simple_lock_init(&pm_init_lock, 0);
-       initialized = TRUE;
-    }
-
     if (pmDispatch != NULL && pmDispatch->cstateInit != NULL)
        (*pmDispatch->cstateInit)();
 }
@@ -101,6 +93,20 @@ machine_idle(void)
     DBGLOG(cpu_handle, cpu_number(), MP_IDLE);
     MARK_CPU_IDLE(cpu_number());
 
+    if (pmInitDone) {
+       /*
+        * Handle case where ml_set_maxbusdelay() or ml_set_maxintdelay()
+        * were called prior to the CPU PM kext being registered.  We do
+        * this here since we know at this point since it'll be at idle
+        * where the decision using these values will be used.
+        */
+       if (earlyMaxBusDelay != DELAY_UNSET)
+           ml_set_maxbusdelay((uint32_t)(earlyMaxBusDelay & 0xFFFFFFFF));
+
+       if (earlyMaxIntDelay != DELAY_UNSET)
+           ml_set_maxintdelay(earlyMaxIntDelay);
+    }
+
     if (pmInitDone
        && pmDispatch != NULL
        && pmDispatch->MachineIdle != NULL)
@@ -201,8 +207,12 @@ pmMarkAllCPUsOff(void)
 static void
 pmInitComplete(void)
 {
-    if (earlyTopology && pmDispatch != NULL && pmDispatch->pmCPUStateInit != NULL)
+    if (earlyTopology
+       && pmDispatch != NULL
+       && pmDispatch->pmCPUStateInit != NULL) {
        (*pmDispatch->pmCPUStateInit)();
+       earlyTopology = FALSE;
+    }
 
     pmInitDone = 1;
 }
@@ -284,7 +294,7 @@ pmCPUGetDeadline(cpu_data_t *cpu)
 {
     uint64_t   deadline        = 0;
 
-       if (pmInitDone
+    if (pmInitDone
        && pmDispatch != NULL
        && pmDispatch->GetDeadline != NULL)
        deadline = (*pmDispatch->GetDeadline)(&cpu->lcpu);
@@ -448,7 +458,8 @@ ml_get_maxsnoop(void)
 {
     uint64_t   max_snoop       = 0;
 
-    if (pmDispatch != NULL
+    if (pmInitDone
+       && pmDispatch != NULL
        && pmDispatch->getMaxSnoop != NULL)
        max_snoop = pmDispatch->getMaxSnoop();
 
@@ -461,7 +472,8 @@ ml_get_maxbusdelay(void)
 {
     uint64_t   max_delay       = 0;
 
-    if (pmDispatch != NULL
+    if (pmInitDone
+       && pmDispatch != NULL
        && pmDispatch->getMaxBusDelay != NULL)
        max_delay = pmDispatch->getMaxBusDelay();
 
@@ -482,8 +494,11 @@ ml_set_maxbusdelay(uint32_t mdelay)
     uint64_t   maxdelay        = mdelay;
 
     if (pmDispatch != NULL
-       && pmDispatch->setMaxBusDelay != NULL)
+       && pmDispatch->setMaxBusDelay != NULL) {
+       earlyMaxBusDelay = DELAY_UNSET;
        pmDispatch->setMaxBusDelay(maxdelay);
+    } else
+       earlyMaxBusDelay = maxdelay;
 }
 
 uint64_t
@@ -505,8 +520,11 @@ void
 ml_set_maxintdelay(uint64_t mdelay)
 {
     if (pmDispatch != NULL
-       && pmDispatch->setMaxIntDelay != NULL)
+       && pmDispatch->setMaxIntDelay != NULL) {
+       earlyMaxIntDelay = DELAY_UNSET;
        pmDispatch->setMaxIntDelay(mdelay);
+    } else
+       earlyMaxIntDelay = mdelay;
 }
 
 boolean_t
@@ -811,6 +829,12 @@ pmKextRegister(uint32_t version, pmDispatch_t *cpuFuncs,
     if (cpuFuncs != NULL) {
        pmDispatch = cpuFuncs;
 
+       if (earlyTopology
+           && pmDispatch->pmCPUStateInit != NULL) {
+           (*pmDispatch->pmCPUStateInit)();
+           earlyTopology = FALSE;
+       }
+
        if (pmDispatch->pmIPIHandler != NULL) {
            lapic_set_pm_func((i386_intr_func_t)pmDispatch->pmIPIHandler);
        }
index b672bdc6b3ed20f917551e1f2d3ec37ee67468e7..c31ab2d8aa1f10a1131874c06eca5473d962309f 100644 (file)
@@ -564,56 +564,6 @@ pmap_map(
        return(virt);
 }
 
-/*
- *     Back-door routine for mapping kernel VM at initialization.  
- *     Useful for mapping memory outside the range
- *      Sets no-cache, A, D.
- *     Otherwise like pmap_map.
- */
-vm_offset_t
-pmap_map_bd(
-       vm_offset_t     virt,
-       vm_map_offset_t start_addr,
-       vm_map_offset_t end_addr,
-       vm_prot_t       prot,
-       unsigned int    flags)
-{
-       pt_entry_t      template;
-       pt_entry_t      *pte;
-       spl_t           spl;
-
-       template = pa_to_pte(start_addr)
-               | INTEL_PTE_REF
-               | INTEL_PTE_MOD
-               | INTEL_PTE_WIRED
-               | INTEL_PTE_VALID;
-
-       if(flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) {
-           template |= INTEL_PTE_NCACHE;
-           if(!(flags & (VM_MEM_GUARDED | VM_WIMG_USE_DEFAULT)))
-                   template |= INTEL_PTE_PTA;
-       }
-
-       if (prot & VM_PROT_WRITE)
-           template |= INTEL_PTE_WRITE;
-
-       while (start_addr < end_addr) {
-               spl = splhigh();
-               pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt);
-               if (pte == PT_ENTRY_NULL) {
-                       panic("pmap_map_bd: Invalid kernel address\n");
-               }
-               pmap_store_pte(pte, template);
-               splx(spl);
-               pte_increment_pa(template);
-               virt += PAGE_SIZE;
-               start_addr += PAGE_SIZE;
-       } 
-
-       flush_tlb();
-       return(virt);
-}
-
 extern pmap_paddr_t            first_avail;
 extern vm_offset_t             virtual_avail, virtual_end;
 extern pmap_paddr_t            avail_start, avail_end;
@@ -1060,9 +1010,7 @@ pmap_init(void)
                                                if (pn > last_managed_page)
                                                        last_managed_page = pn;
 
-                                               if (pn < lowest_lo)
-                                                       pmap_phys_attributes[pn] |= PHYS_NOENCRYPT;
-                                               else if (pn >= lowest_hi && pn <= highest_hi)
+                                               if (pn >= lowest_hi && pn <= highest_hi)
                                                        pmap_phys_attributes[pn] |= PHYS_NOENCRYPT;
                                        }
                                }
index 44b6bf7423272a0ccc3f21b6a4b82f0e747c00c3..a168562c90c89fb766386611fcc58c1ff3089262 100644 (file)
@@ -507,7 +507,12 @@ extern uint32_t            pmap_kernel_text_ps;
 #define ID_MAP_VTOP(x) ((void *)(((uint64_t)(x)) & LOW_4GB_MASK))
 
 #define PHYSMAP_BASE   KVADDR(KERNEL_PHYSMAP_INDEX,0,0,0)
+#define NPHYSMAP (MAX(K64_MAXMEM/GB + 4, 4))
 #define PHYSMAP_PTOV(x)        ((void *)(((uint64_t)(x)) + PHYSMAP_BASE))
+
+static inline boolean_t physmap_enclosed(addr64_t a) {
+       return (a < (NPHYSMAP * GB));
+}
 #endif
 
 typedef        volatile long   cpu_set;        /* set of CPUs - must be <= 32 */
@@ -579,10 +584,11 @@ extern void         pmap_put_mapwindow(mapwindow_t *map);
 #endif
 
 typedef struct pmap_memory_regions {
-  ppnum_t base;
-  ppnum_t end;
-  ppnum_t alloc;
-  uint32_t type;
+       ppnum_t base;
+       ppnum_t end;
+       ppnum_t alloc;
+       uint32_t type;
+       uint64_t attribute;
 } pmap_memory_region_t;
 
 extern unsigned pmap_memory_region_count;
index d81248dae7cbef4921a1beef11f91d5e43e9c817..576b9c089858cb919a2f582581816e45049c2592 100644 (file)
@@ -137,7 +137,7 @@ pmap_is_noencrypt(ppnum_t pn)
        pai = ppn_to_pai(pn);
 
        if (!IS_MANAGED_PAGE(pai))
-               return (TRUE);
+               return (FALSE);
 
        if (pmap_phys_attributes[pai] & PHYS_NOENCRYPT)
                return (TRUE);
@@ -171,11 +171,17 @@ pmap_clear_noencrypt(ppnum_t pn)
        pai = ppn_to_pai(pn);
 
        if (IS_MANAGED_PAGE(pai)) {
-               LOCK_PVH(pai);
+               /*
+                * synchronization at VM layer prevents PHYS_NOENCRYPT
+                * from changing state, so we don't need the lock to inspect
+                */
+               if (pmap_phys_attributes[pai] & PHYS_NOENCRYPT) {
+                       LOCK_PVH(pai);
 
-               pmap_phys_attributes[pai] &= ~PHYS_NOENCRYPT;
+                       pmap_phys_attributes[pai] &= ~PHYS_NOENCRYPT;
 
-               UNLOCK_PVH(pai);
+                       UNLOCK_PVH(pai);
+               }
        }
 }
 
index d7e63d6b04116582fec22825b64112a339f56707..9061d73cf06267a719e7ce84547649e3509d8451 100644 (file)
@@ -1429,3 +1429,52 @@ pmap_change_wiring(
 
        PMAP_UNLOCK(map);
 }
+
+/*
+ *     "Backdoor" direct map routine for early mappings.
+ *     Useful for mapping memory outside the range
+ *      Sets A, D and NC if requested
+ */
+
+vm_offset_t
+pmap_map_bd(
+       vm_offset_t     virt,
+       vm_map_offset_t start_addr,
+       vm_map_offset_t end_addr,
+       vm_prot_t       prot,
+       unsigned int    flags)
+{
+       pt_entry_t      template;
+       pt_entry_t      *pte;
+       spl_t           spl;
+       vm_offset_t     base = virt;
+       template = pa_to_pte(start_addr)
+               | INTEL_PTE_REF
+               | INTEL_PTE_MOD
+               | INTEL_PTE_WIRED
+               | INTEL_PTE_VALID;
+
+       if ((flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) == VM_MEM_NOT_CACHEABLE) {
+               template |= INTEL_PTE_NCACHE;
+               if (!(flags & (VM_MEM_GUARDED)))
+                       template |= INTEL_PTE_PTA;
+       }
+       if (prot & VM_PROT_WRITE)
+               template |= INTEL_PTE_WRITE;
+
+       while (start_addr < end_addr) {
+               spl = splhigh();
+               pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt);
+               if (pte == PT_ENTRY_NULL) {
+                       panic("pmap_map_bd: Invalid kernel address\n");
+               }
+               pmap_store_pte(pte, template);
+               splx(spl);
+               pte_increment_pa(template);
+               virt += PAGE_SIZE;
+               start_addr += PAGE_SIZE;
+       }
+       flush_tlb_raw();
+       PMAP_UPDATE_TLBS(kernel_pmap, base, base + end_addr - start_addr);
+       return(virt);
+}
index b35d5c0a0b60839e99e4ac0bd0706c59973be920..05dd961f12163ca07edd036996f982c7f43e7072 100644 (file)
 /*
  * CR4
  */
-#define CR4_OSXSAVE 0x00040000 /* OS supports XSAVE */
-#define CR4_PCIDE   0x00020000 /* PCID Enable */
-#define CR4_SMXE    0x00004000 /* Enable SMX operation */
-#define CR4_VMXE    0x00002000 /* Enable VMX operation */
-#define CR4_OSXMM   0x00000400  /* SSE/SSE2 exceptions supported in OS */
-#define CR4_OSFXS   0x00000200  /* SSE/SSE2 OS supports FXSave */
-#define CR4_PCE     0x00000100  /* Performance-Monitor Count Enable */
-#define CR4_PGE     0x00000080  /* Page Global Enable */
-#define        CR4_MCE     0x00000040  /* Machine Check Exceptions */
-#define CR4_PAE     0x00000020  /* Physical Address Extensions */
-#define        CR4_PSE     0x00000010  /* Page Size Extensions */
-#define        CR4_DE      0x00000008  /* Debugging Extensions */
-#define        CR4_TSD     0x00000004  /* Time Stamp Disable */
-#define        CR4_PVI     0x00000002  /* Protected-mode Virtual Interrupts */
-#define        CR4_VME     0x00000001  /* Virtual-8086 Mode Extensions */
+#define CR4_SMEP       0x00100000      /* Supervisor-Mode Execute Protect */
+#define CR4_OSXSAVE    0x00040000      /* OS supports XSAVE */
+#define CR4_PCIDE      0x00020000      /* PCID Enable */
+#define CR4_RDWRFSGS   0x00010000      /* RDWRFSGS Enable */
+#define CR4_SMXE       0x00004000      /* Enable SMX operation */
+#define CR4_VMXE       0x00002000      /* Enable VMX operation */
+#define CR4_OSXMM      0x00000400      /* SSE/SSE2 exception support in OS */
+#define CR4_OSFXS      0x00000200      /* SSE/SSE2 OS supports FXSave */
+#define CR4_PCE                0x00000100      /* Performance-Monitor Count Enable */
+#define CR4_PGE                0x00000080      /* Page Global Enable */
+#define        CR4_MCE         0x00000040      /* Machine Check Exceptions */
+#define CR4_PAE                0x00000020      /* Physical Address Extensions */
+#define        CR4_PSE         0x00000010      /* Page Size Extensions */
+#define        CR4_DE          0x00000008      /* Debugging Extensions */
+#define        CR4_TSD         0x00000004      /* Time Stamp Disable */
+#define        CR4_PVI         0x00000002      /* Protected-mode Virtual Interrupts */
+#define        CR4_VME         0x00000001      /* Virtual-8086 Mode Extensions */
 
 /*
  * XCR0 - XFEATURE_ENABLED_MASK (a.k.a. XFEM) register
index 55be4fc7585cced87da54cf6238ed6e7fbce11a5..a07b1b8cb470d6fb89725c92197862898e3e799b 100644 (file)
@@ -145,6 +145,7 @@ perfCallback tempDTraceTrapHook = NULL; /* Pointer to DTrace fbt trap hook routi
 extern boolean_t dtrace_tally_fault(user_addr_t);
 #endif
 
+
 void
 thread_syscall_return(
         kern_return_t ret)
@@ -679,10 +680,11 @@ kernel_trap(
                                is_user = -1;
                        }
 #else
-                       if (vaddr < VM_MAX_USER_PAGE_ADDRESS) {
+                       if (__probable(vaddr < VM_MAX_USER_PAGE_ADDRESS)) {
                                /* fault occurred in userspace */
                                map = thread->map;
                                is_user = -1;
+
                                /*
                                 * If we're not sharing cr3 with the user
                                 * and we faulted in copyio,
@@ -699,6 +701,7 @@ kernel_trap(
 #endif
                }
        }
+
        KERNEL_DEBUG_CONSTANT(
                (MACHDBG_CODE(DBG_MACH_EXCP_KTRAP_x86, type)) | DBG_FUNC_NONE,
                (unsigned)(vaddr >> 32), (unsigned)vaddr, is_user, kern_ip, 0);
@@ -744,22 +747,6 @@ kernel_trap(
              goto debugger_entry;
 #endif
            case T_PAGE_FAULT:
-               /*
-                * If the current map is a submap of the kernel map,
-                * and the address is within that map, fault on that
-                * map.  If the same check is done in vm_fault
-                * (vm_map_lookup), we may deadlock on the kernel map
-                * lock.
-                */
-
-               prot = VM_PROT_READ;
-
-               if (code & T_PF_WRITE)
-                       prot |= VM_PROT_WRITE;
-#if     PAE
-               if (code & T_PF_EXECUTE)
-                       prot |= VM_PROT_EXECUTE;
-#endif
 
 #if    MACH_KDB
                /*
@@ -792,6 +779,16 @@ kernel_trap(
                }
 #endif /* CONFIG_DTRACE */
 
+               
+               prot = VM_PROT_READ;
+
+               if (code & T_PF_WRITE)
+                       prot |= VM_PROT_WRITE;
+#if     PAE
+               if (code & T_PF_EXECUTE)
+                       prot |= VM_PROT_EXECUTE;
+#endif
+
                result = vm_fault(map,
                                  vm_map_trunc_page(vaddr),
                                  prot,
@@ -863,9 +860,6 @@ FALL_THROUGH:
                        kprintf("kernel_trap() ignoring spurious trap 15\n"); 
                        return;
                }
-#if defined(__x86_64__) && DEBUG
-               kprint_state(saved_state);
-#endif
 debugger_entry:
                /* Ensure that the i386_kernel_state at the base of the
                 * current thread's stack (if any) is synchronized with the
@@ -959,6 +953,8 @@ panic_trap(x86_saved_state32_t *regs)
        cr0 = 0;
 }
 #else
+
+
 static void
 panic_trap(x86_saved_state64_t *regs)
 {
@@ -981,6 +977,7 @@ panic_trap(x86_saved_state64_t *regs)
 
        if (regs->isf.trapno < TRAP_TYPES)
                trapname = trap_type[regs->isf.trapno];
+
 #undef panic
        panic("Kernel trap at 0x%016llx, type %d=%s, registers:\n"
              "CR0: 0x%016llx, CR2: 0x%016llx, CR3: 0x%016llx, CR4: 0x%016llx\n"
@@ -989,7 +986,7 @@ panic_trap(x86_saved_state64_t *regs)
              "R8:  0x%016llx, R9:  0x%016llx, R10: 0x%016llx, R11: 0x%016llx\n"
              "R12: 0x%016llx, R13: 0x%016llx, R14: 0x%016llx, R15: 0x%016llx\n"
              "RFL: 0x%016llx, RIP: 0x%016llx, CS:  0x%016llx, SS:  0x%016llx\n"
-             "CR2: 0x%016llx, Error code: 0x%016llx, Faulting CPU: 0x%x\n",
+             "CR2: 0x%016llx, Error code: 0x%016llx, Faulting CPU: 0x%x%s\n",
              regs->isf.rip, regs->isf.trapno, trapname,
              cr0, cr2, cr3, cr4,
              regs->rax, regs->rbx, regs->rcx, regs->rdx,
@@ -997,7 +994,8 @@ panic_trap(x86_saved_state64_t *regs)
              regs->r8,  regs->r9,  regs->r10, regs->r11,
              regs->r12, regs->r13, regs->r14, regs->r15,
              regs->isf.rflags, regs->isf.rip, regs->isf.cs & 0xFFFF,
-             regs->isf.ss & 0xFFFF,regs->cr2, regs->isf.err, regs->isf.cpu);
+             regs->isf.ss & 0xFFFF,regs->cr2, regs->isf.err, regs->isf.cpu,
+             "");
        /*
         * This next statement is not executed,
         * but it's needed to stop the compiler using tail call optimization
index 72f01383c49ef5d3e54cd22247e3b7c381b78057..cc0970e3226d7e696d9a70edf6e8e538a1201ed0 100644 (file)
@@ -189,7 +189,6 @@ ipc_bootstrap(void)
                              IKM_SAVED_KMSG_SIZE,
                              "ipc kmsgs");
        zone_change(ipc_kmsg_zone, Z_CALLERACCT, FALSE);
-       zone_change(ipc_kmsg_zone, Z_NOENCRYPT, TRUE);
 
 #if CONFIG_MACF_MACH
        ipc_labelh_zone = 
index ab846c753582464e638a5789ca6a587aa8204604..51f77134a99e59b1ba371421084f5a0643810dcf 100644 (file)
@@ -47,3 +47,5 @@ kdp_reset(void);
 
 void
 kdp_init(void);
+
+void kdp_machine_init(void);
index 22bf8978a90950c2bd2d618752309ccf91e886f2..caa07dfa5b53129959e630827bae555199742a5b 100644 (file)
@@ -1405,7 +1405,9 @@ kdp_reset(void)
        kdp.reply_port = kdp.exception_port = 0;
        kdp.is_halted = kdp.is_conn = FALSE;
        kdp.exception_seq = kdp.conn_seq = 0;
-        kdp.session_key = 0;
+       kdp.session_key = 0;
+       pkt.input = manual_pkt.input = FALSE;
+       pkt.len = pkt.off = manual_pkt.len = 0;
 }
 
 struct corehdr *
@@ -2013,8 +2015,6 @@ kdp_panic_dump(void)
 
 panic_dump_exit:
        abort_panic_transfer();
-       pkt.input = FALSE;
-       pkt.len = 0;
        kdp_reset();
        return;
 }
@@ -2122,6 +2122,9 @@ kdp_init(void)
 
        if (debug_boot_arg & DB_REBOOT_POST_CORE)
                kdp_flag |= REBOOT_POST_CORE;
+#if    defined(__x86_64__)     
+       kdp_machine_init();
+#endif
 #if CONFIG_SERIAL_KDP
        char kdpname[80];
        struct in_addr ipaddr;
index 221d683ac7b2d9c5f84f266b83cbb1a83a432e48..6016e48353b44f3bc058da235f1243f57014f47e 100644 (file)
@@ -38,6 +38,8 @@
 #include <i386/pio.h>
 #include <i386/proc_reg.h>
 
+#include <i386/pmap_internal.h>
+
 #include <kdp/kdp_internal.h>
 #include <mach/vm_map.h>
 
@@ -374,3 +376,36 @@ kdp_machine_msr64_write(kdp_writemsr64_req_t *rq, caddr_t data, uint16_t lcpu)
        wrmsr64(msr, *value);
        return KDPERR_NO_ERROR;
 }
+
+pt_entry_t *debugger_ptep;
+vm_map_offset_t debugger_window_kva;
+
+/* Establish a pagetable window that can be remapped on demand.
+ * This is utilized by the debugger to address regions outside
+ * the physical map.
+ */
+
+void
+kdp_machine_init(void) {
+       if (debug_boot_arg == 0)
+               return;
+
+       vm_map_entry_t e;
+       kern_return_t kr = vm_map_find_space(kernel_map,
+           &debugger_window_kva,
+           PAGE_SIZE, 0,
+           VM_MAKE_TAG(VM_MEMORY_IOKIT), &e);
+
+       if (kr != KERN_SUCCESS) {
+               panic("%s: vm_map_find_space failed with %d\n", __FUNCTION__, kr);
+       }
+
+       vm_map_unlock(kernel_map);
+
+       debugger_ptep = pmap_pte(kernel_pmap, debugger_window_kva);
+
+       if (debugger_ptep == NULL) {
+               pmap_expand(kernel_pmap, debugger_window_kva);
+               debugger_ptep = pmap_pte(kernel_pmap, debugger_window_kva);
+       }
+}
index b6d146746e1f6985524c906d8dca9edb0e3c892a..1dd1aee281c1fc2a990b8688d5b51c55ac52f818 100644 (file)
@@ -235,6 +235,14 @@ panic(const char *str, ...)
        thread_t thread;
        wait_queue_t wq;
 
+       if (kdebug_enable) {
+               ml_set_interrupts_enabled(TRUE);
+               kdbg_dump_trace_to_file("/var/tmp/panic.trace");
+       }
+
+       s = splhigh();
+       disable_preemption();
+
 #if    defined(__i386__) || defined(__x86_64__)
        /* Attempt to display the unparsed panic string */
        const char *tstr = str;
@@ -244,11 +252,6 @@ panic(const char *str, ...)
                kprintf("%c", *tstr++);
        kprintf("\n");
 #endif
-       if (kdebug_enable)
-               kdbg_dump_trace_to_file("/var/tmp/panic.trace");
-
-       s = splhigh();
-       disable_preemption();
 
        panic_safe();
 
index 47290e3d8ac421c08349b48cc86e8b7036f573f4..cb31f783cd17eb9d1561caf06fc77e4d3f3fce3d 100644 (file)
@@ -69,7 +69,6 @@
 #include <debug.h>
 #include <xpr_debug.h>
 #include <mach_kdp.h>
-#include <norma_vm.h>
 
 #include <mach/boolean.h>
 #include <mach/machine.h>
@@ -326,11 +325,6 @@ kernel_bootstrap_thread(void)
        device_service_create();
 
        kth_started = 1;
-
-#if MACH_KDP
-       kernel_bootstrap_kprintf("calling kdp_init\n");
-       kdp_init();
-#endif
                
 #if (defined(__i386__) || defined(__x86_64__)) && NCOPY_WINDOWS > 0
        /*
@@ -340,6 +334,13 @@ kernel_bootstrap_thread(void)
        cpu_physwindow_init(0);
 #endif
 
+       vm_kernel_reserved_entry_init();
+       
+#if MACH_KDP
+       kernel_bootstrap_kprintf("calling kdp_init\n");
+       kdp_init();
+#endif
+
 #if CONFIG_COUNTERS
        pmc_bootstrap();
 #endif
index 84f7cf817f4b267855678402760dd494210397e3..3c3e5ce07def1844e7b1183c2c9a71e10ea49102 100644 (file)
@@ -279,8 +279,6 @@ thread_init(void)
                        THREAD_CHUNK * sizeof(struct thread),
                        "threads");
 
-       zone_change(thread_zone, Z_NOENCRYPT, TRUE);
-
        lck_grp_attr_setdefault(&thread_lck_grp_attr);
        lck_grp_init(&thread_lck_grp, "thread", &thread_lck_grp_attr);
        lck_attr_setdefault(&thread_lck_attr);
index c6bf2f01ead19d17136d9c3473b1900d89491d4e..9d1afa5d6e84a9e430c8c7218737481dd28c24d0 100644 (file)
@@ -95,6 +95,7 @@
 #include <machine/machparam.h>
 
 #include <libkern/OSDebug.h>
+#include <libkern/OSAtomic.h>
 #include <sys/kdebug.h>
 
 /* 
@@ -214,27 +215,47 @@ MACRO_END
 #endif /* ZONE_DEBUG */
 
 /*
- * Support for garbage collection of unused zone pages:
+ * Support for garbage collection of unused zone pages
+ *
+ * The kernel virtually allocates the "zone map" submap of the kernel
+ * map. When an individual zone needs more storage, memory is allocated
+ * out of the zone map, and the two-level "zone_page_table" is
+ * on-demand expanded so that it has entries for those pages.
+ * zone_page_init()/zone_page_alloc() initialize "alloc_count"
+ * to the number of zone elements that occupy the zone page (which may
+ * be a minimum of 1, including if a zone element spans multiple
+ * pages).
+ *
+ * Asynchronously, the zone_gc() logic attempts to walk zone free
+ * lists to see if all the elements on a zone page are free. If
+ * "collect_count" (which it increments during the scan) matches
+ * "alloc_count", the zone page is a candidate for collection and the
+ * physical page is returned to the VM system. During this process, the
+ * first word of the zone page is re-used to maintain a linked list of
+ * to-be-collected zone pages.
  */
+typedef uint32_t zone_page_index_t;
+#define ZONE_PAGE_INDEX_INVALID ((zone_page_index_t)0xFFFFFFFFU)
 
 struct zone_page_table_entry {
-       struct zone_page_table_entry    *link;
-       short   alloc_count;
-       short   collect_count;
+       volatile        uint16_t        alloc_count;
+       volatile        uint16_t        collect_count;
 };
 
+#define        ZONE_PAGE_USED  0
+#define ZONE_PAGE_UNUSED 0xffff
+
 /* Forwards */
 void           zone_page_init(
                                vm_offset_t     addr,
-                               vm_size_t       size,
-                               int             value);
+                               vm_size_t       size);
 
 void           zone_page_alloc(
                                vm_offset_t     addr,
                                vm_size_t       size);
 
 void           zone_page_free_element(
-                               struct zone_page_table_entry    **free_pages,
+                               zone_page_index_t       *free_page_list,
                                vm_offset_t     addr,
                                vm_size_t       size);
 
@@ -271,6 +292,7 @@ zone_t              zinfo_zone = ZONE_NULL; /* zone of per-task zone info */
 /*
  *     The VM system gives us an initial chunk of memory.
  *     It has to be big enough to allocate the zone_zone
+ *     all the way through the pmap zone.
  */
 
 vm_offset_t    zdata;
@@ -304,23 +326,23 @@ MACRO_END
 
 #define lock_try_zone(zone)    lck_mtx_try_lock_spin(&zone->lock)
 
-kern_return_t          zget_space(
-                               zone_t  zone,
-                               vm_offset_t size,
-                               vm_offset_t *result);
-
-decl_simple_lock_data(,zget_space_lock)
-vm_offset_t    zalloc_next_space;
-vm_offset_t    zalloc_end_of_space;
-vm_size_t      zalloc_wasted_space;
-
 /*
  *     Garbage collection map information
  */
-struct zone_page_table_entry * zone_page_table;
+#define ZONE_PAGE_TABLE_FIRST_LEVEL_SIZE (32)
+struct zone_page_table_entry * volatile zone_page_table[ZONE_PAGE_TABLE_FIRST_LEVEL_SIZE];
+vm_size_t                      zone_page_table_used_size;
 vm_offset_t                    zone_map_min_address;
 vm_offset_t                    zone_map_max_address;
 unsigned int                   zone_pages;
+unsigned int                   zone_page_table_second_level_size;                      /* power of 2 */
+unsigned int                   zone_page_table_second_level_shift_amount;
+
+#define zone_page_table_first_level_slot(x)  ((x) >> zone_page_table_second_level_shift_amount)
+#define zone_page_table_second_level_slot(x) ((x) & (zone_page_table_second_level_size - 1))
+
+void   zone_page_table_expand(zone_page_index_t pindex);
+struct zone_page_table_entry *zone_page_table_lookup(zone_page_index_t pindex);
 
 /*
  *     Exclude more than one concurrent garbage collection
@@ -343,10 +365,6 @@ lck_mtx_ext_t   zone_lck_ext;
         ((vm_offset_t)(zone_virtual_addr((vm_map_address_t)addr)) + size -1) <  zone_map_max_address)
 #endif
 
-#define        ZONE_PAGE_USED  0
-#define ZONE_PAGE_UNUSED -1
-
-
 /*
  *     Protects first_zone, last_zone, num_zones,
  *     and the next_zone field of zones.
@@ -1029,9 +1047,10 @@ zinit(
        zone_t          z;
 
        if (zone_zone == ZONE_NULL) {
-               if (zget_space(NULL, sizeof(struct zone), (vm_offset_t *)&z)
-                   != KERN_SUCCESS)
-                       return(ZONE_NULL);
+
+               z = (struct zone *)zdata;
+               zdata += sizeof(*z);
+               zdata_size -= sizeof(*z);
        } else
                z = (zone_t) zalloc(zone_zone);
        if (z == ZONE_NULL)
@@ -1061,7 +1080,11 @@ zinit(
                alloc = PAGE_SIZE;
        else
 #endif
-       {       vm_size_t best, waste; unsigned int i;
+#if    defined(__LP64__)               
+               if (((alloc % size) != 0) || (alloc > PAGE_SIZE * 8))
+#endif
+               {
+               vm_size_t best, waste; unsigned int i;
                best  = PAGE_SIZE;
                waste = best % size;
 
@@ -1103,7 +1126,10 @@ use_this_allocation:
        z->async_pending = FALSE;
        z->caller_acct = TRUE;
        z->noencrypt = FALSE;
-
+       z->no_callout = FALSE;
+       z->async_prio_refill = FALSE;
+       z->prio_refill_watermark = 0;
+       z->zone_replenish_thread = NULL;
 #if CONFIG_ZLEAKS
        z->num_allocs = 0;
        z->num_frees = 0;
@@ -1172,18 +1198,98 @@ use_this_allocation:
 
        return(z);
 }
+unsigned       zone_replenish_loops, zone_replenish_wakeups, zone_replenish_wakeups_initiated;
+
+static void zone_replenish_thread(zone_t);
+
+/* High priority VM privileged thread used to asynchronously refill a designated
+ * zone, such as the reserved VM map entry zone.
+ */
+static void zone_replenish_thread(zone_t z) {
+       vm_size_t free_size;
+       current_thread()->options |= TH_OPT_VMPRIV;
+
+       for (;;) {
+               lock_zone(z);
+               assert(z->prio_refill_watermark != 0);
+               while ((free_size = (z->cur_size - (z->count * z->elem_size))) < (z->prio_refill_watermark * z->elem_size)) {
+                       assert(z->doing_alloc == FALSE);
+                       assert(z->async_prio_refill == TRUE);
+
+                       unlock_zone(z);
+                       int     zflags = KMA_KOBJECT|KMA_NOPAGEWAIT;
+                       vm_offset_t space, alloc_size;
+                       kern_return_t kr;
+                               
+                       if (vm_pool_low())
+                               alloc_size = round_page(z->elem_size);
+                       else
+                               alloc_size = z->alloc_size;
+                               
+                       if (z->noencrypt)
+                               zflags |= KMA_NOENCRYPT;
+                               
+                       kr = kernel_memory_allocate(zone_map, &space, alloc_size, 0, zflags);
+
+                       if (kr == KERN_SUCCESS) {
+#if    ZONE_ALIAS_ADDR
+                               if (alloc_size == PAGE_SIZE)
+                                       space = zone_alias_addr(space);
+#endif
+                               zcram(z, space, alloc_size);
+                       } else if (kr == KERN_RESOURCE_SHORTAGE) {
+                               VM_PAGE_WAIT();
+                       } else if (kr == KERN_NO_SPACE) {
+                               kr = kernel_memory_allocate(kernel_map, &space, alloc_size, 0, zflags);
+                               if (kr == KERN_SUCCESS) {
+#if    ZONE_ALIAS_ADDR
+                                       if (alloc_size == PAGE_SIZE)
+                                               space = zone_alias_addr(space);
+#endif
+                                       zcram(z, space, alloc_size);
+                               } else {
+                                       assert_wait_timeout(&z->zone_replenish_thread, THREAD_UNINT, 1, 100 * NSEC_PER_USEC);
+                                       thread_block(THREAD_CONTINUE_NULL);
+                               }
+                       }
+
+                       lock_zone(z);
+                       zone_replenish_loops++;
+               }
+
+               unlock_zone(z);
+               assert_wait(&z->zone_replenish_thread, THREAD_UNINT);
+               thread_block(THREAD_CONTINUE_NULL);
+               zone_replenish_wakeups++;
+       }
+}
+
+void
+zone_prio_refill_configure(zone_t z, vm_size_t low_water_mark) {
+       z->prio_refill_watermark = low_water_mark;
+
+       z->async_prio_refill = TRUE;
+       OSMemoryBarrier();
+       kern_return_t tres = kernel_thread_start_priority((thread_continue_t)zone_replenish_thread, z, MAXPRI_KERNEL, &z->zone_replenish_thread);
+
+       if (tres != KERN_SUCCESS) {
+               panic("zone_prio_refill_configure, thread create: 0x%x", tres);
+       }
+
+       thread_deallocate(z->zone_replenish_thread);
+}
 
 /*
  *     Cram the given memory into the specified zone.
  */
 void
 zcram(
-       register zone_t         zone,
-       void                    *newaddr,
+       zone_t          zone,
+       vm_offset_t                     newmem,
        vm_size_t               size)
 {
-       register vm_size_t      elem_size;
-       vm_offset_t             newmem = (vm_offset_t) newaddr;
+       vm_size_t       elem_size;
+       boolean_t   from_zm = FALSE;
 
        /* Basic sanity checks */
        assert(zone != ZONE_NULL && newmem != (vm_offset_t)0);
@@ -1192,10 +1298,16 @@ zcram(
 
        elem_size = zone->elem_size;
 
+       if (from_zone_map(newmem, size))
+               from_zm = TRUE;
+
+       if (from_zm)
+               zone_page_init(newmem, size);
+
        lock_zone(zone);
        while (size >= elem_size) {
                ADD_TO_ZONE(zone, newmem);
-               if (from_zone_map(newmem, elem_size))
+               if (from_zm)
                        zone_page_alloc(newmem, elem_size);
                zone->count++;  /* compensate for ADD_TO_ZONE */
                size -= elem_size;
@@ -1205,95 +1317,6 @@ zcram(
        unlock_zone(zone);
 }
 
-/*
- * Contiguous space allocator for non-paged zones. Allocates "size" amount
- * of memory from zone_map.
- */
-
-kern_return_t
-zget_space(
-       zone_t  zone,
-       vm_offset_t size,
-       vm_offset_t *result)
-{
-       vm_offset_t     new_space = 0;
-       vm_size_t       space_to_add = 0;
-
-       simple_lock(&zget_space_lock);
-       while ((zalloc_next_space + size) > zalloc_end_of_space) {
-               /*
-                *      Add at least one page to allocation area.
-                */
-
-               space_to_add = round_page(size);
-
-               if (new_space == 0) {
-                       kern_return_t retval;
-                       int     zflags = KMA_KOBJECT|KMA_NOPAGEWAIT;
-
-                       /*
-                        *      Memory cannot be wired down while holding
-                        *      any locks that the pageout daemon might
-                        *      need to free up pages.  [Making the zget_space
-                        *      lock a complex lock does not help in this
-                        *      regard.]
-                        *
-                        *      Unlock and allocate memory.  Because several
-                        *      threads might try to do this at once, don't
-                        *      use the memory before checking for available
-                        *      space again.
-                        */
-
-                       simple_unlock(&zget_space_lock);
-
-                       if (zone == NULL || zone->noencrypt)
-                               zflags |= KMA_NOENCRYPT;
-
-                       retval = kernel_memory_allocate(zone_map, &new_space, space_to_add, 0, zflags);
-                       if (retval != KERN_SUCCESS)
-                               return(retval);
-#if    ZONE_ALIAS_ADDR
-                       if (space_to_add == PAGE_SIZE)
-                               new_space = zone_alias_addr(new_space);
-#endif
-                       zone_page_init(new_space, space_to_add,
-                                                       ZONE_PAGE_USED);
-                       simple_lock(&zget_space_lock);
-                       continue;
-               }
-
-               
-               /*
-                *      Memory was allocated in a previous iteration.
-                *
-                *      Check whether the new region is contiguous
-                *      with the old one.
-                */
-
-               if (new_space != zalloc_end_of_space) {
-                       /*
-                        *      Throw away the remainder of the
-                        *      old space, and start a new one.
-                        */
-                       zalloc_wasted_space +=
-                               zalloc_end_of_space - zalloc_next_space;
-                       zalloc_next_space = new_space;
-               }
-
-               zalloc_end_of_space = new_space + space_to_add;
-
-               new_space = 0;
-       }
-       *result = zalloc_next_space;
-       zalloc_next_space += size;              
-       simple_unlock(&zget_space_lock);
-
-       if (new_space != 0)
-               kmem_free(zone_map, new_space, space_to_add);
-
-       return(KERN_SUCCESS);
-}
-
 
 /*
  *     Steal memory for the zone package.  Called from
@@ -1302,8 +1325,9 @@ zget_space(
 void
 zone_steal_memory(void)
 {
-       zdata_size = round_page(128*sizeof(struct zone));
-       zdata = (vm_offset_t)((char *)pmap_steal_memory(zdata_size) - (char *)0);
+       /* Request enough early memory to get to the pmap zone */
+       zdata_size = 12 * sizeof(struct zone);
+       zdata = (vm_offset_t)pmap_steal_memory(round_page(zdata_size));
 }
 
 
@@ -1334,7 +1358,7 @@ zfill(
                return 0;
 
        zone_change(zone, Z_FOREIGN, TRUE);
-       zcram(zone, (void *)memory, size);
+       zcram(zone, memory, size);
        nalloc = (int)(size / zone->elem_size);
        assert(nalloc >= nelem);
 
@@ -1349,8 +1373,6 @@ zfill(
 void
 zone_bootstrap(void)
 {
-       vm_size_t zone_zone_size;
-       vm_offset_t zone_zone_space;
        char temp_buf[16];
 
 #if 6094439
@@ -1417,11 +1439,6 @@ zone_bootstrap(void)
        last_zone = &first_zone;
        num_zones = 0;
 
-       simple_lock_init(&zget_space_lock, 0);
-       zalloc_next_space = zdata;
-       zalloc_end_of_space = zdata + zdata_size;
-       zalloc_wasted_space = 0;
-
        /* assertion: nobody else called zinit before us */
        assert(zone_zone == ZONE_NULL);
        zone_zone = zinit(sizeof(struct zone), 128 * sizeof(struct zone),
@@ -1430,9 +1447,7 @@ zone_bootstrap(void)
        zone_change(zone_zone, Z_CALLERACCT, FALSE);
        zone_change(zone_zone, Z_NOENCRYPT, TRUE);
 
-       zone_zone_size = zalloc_end_of_space - zalloc_next_space;
-       zget_space(NULL, zone_zone_size, &zone_zone_space);
-       zcram(zone_zone, (void *)zone_zone_space, zone_zone_size);
+       zcram(zone_zone, zdata, zdata_size);
 
        /* initialize fake zones and zone info if tracking by task */
        if (zinfo_per_task) {
@@ -1475,7 +1490,6 @@ zone_init(
        kern_return_t   retval;
        vm_offset_t     zone_min;
        vm_offset_t     zone_max;
-       vm_size_t       zone_table_size;
 
        retval = kmem_suballoc(kernel_map, &zone_min, max_zonemap_size,
                               FALSE, VM_FLAGS_ANYWHERE | VM_FLAGS_PERMANENT,
@@ -1487,23 +1501,30 @@ zone_init(
        /*
         * Setup garbage collection information:
         */
-       zone_table_size = atop_kernel(zone_max - zone_min) * 
-                               sizeof(struct zone_page_table_entry);
-       if (kmem_alloc_kobject(zone_map, (vm_offset_t *) &zone_page_table,
-                            zone_table_size) != KERN_SUCCESS)
-               panic("zone_init");
-       zone_min = (vm_offset_t)zone_page_table + round_page(zone_table_size);
-       zone_pages = (unsigned int)atop_kernel(zone_max - zone_min);
        zone_map_min_address = zone_min;
        zone_map_max_address = zone_max;
+
+       zone_pages = (unsigned int)atop_kernel(zone_max - zone_min);
+       zone_page_table_used_size = sizeof(zone_page_table);
+
+       zone_page_table_second_level_size = 1;
+       zone_page_table_second_level_shift_amount = 0;
+       
+       /*
+        * Find the power of 2 for the second level that allows
+        * the first level to fit in ZONE_PAGE_TABLE_FIRST_LEVEL_SIZE
+        * slots.
+        */
+       while ((zone_page_table_first_level_slot(zone_pages-1)) >= ZONE_PAGE_TABLE_FIRST_LEVEL_SIZE) {
+               zone_page_table_second_level_size <<= 1;
+               zone_page_table_second_level_shift_amount++;
+       }
        
        lck_grp_attr_setdefault(&zone_lck_grp_attr);
        lck_grp_init(&zone_lck_grp, "zones", &zone_lck_grp_attr);
        lck_attr_setdefault(&zone_lck_attr);
        lck_mtx_init_ext(&zone_gc_lock, &zone_lck_ext, &zone_lck_grp, &zone_lck_attr);
        
-       zone_page_init(zone_min, zone_max - zone_min, ZONE_PAGE_UNUSED);
-       
 #if CONFIG_ZLEAKS
        /*
         * Initialize the zone leak monitor
@@ -1512,6 +1533,68 @@ zone_init(
 #endif /* CONFIG_ZLEAKS */
 }
 
+void
+zone_page_table_expand(zone_page_index_t pindex)
+{
+       unsigned int first_index;
+       struct zone_page_table_entry * volatile * first_level_ptr;
+
+       assert(pindex < zone_pages);
+
+       first_index = zone_page_table_first_level_slot(pindex);
+       first_level_ptr = &zone_page_table[first_index];
+
+       if (*first_level_ptr == NULL) {
+               /*
+                * We were able to verify the old first-level slot
+                * had NULL, so attempt to populate it.
+                */
+
+               vm_offset_t second_level_array = 0;
+               vm_size_t second_level_size = round_page(zone_page_table_second_level_size * sizeof(struct zone_page_table_entry));
+               zone_page_index_t i;
+               struct zone_page_table_entry *entry_array;
+
+               if (kmem_alloc_kobject(zone_map, &second_level_array,
+                                                          second_level_size) != KERN_SUCCESS) {
+                       panic("zone_page_table_expand");
+               }
+
+               /*
+                * zone_gc() may scan the "zone_page_table" directly,
+                * so make sure any slots have a valid unused state.
+                */
+               entry_array = (struct zone_page_table_entry *)second_level_array;
+               for (i=0; i < zone_page_table_second_level_size; i++) {
+                       entry_array[i].alloc_count = ZONE_PAGE_UNUSED;
+                       entry_array[i].collect_count = 0;
+               }
+
+               if (OSCompareAndSwapPtr(NULL, entry_array, first_level_ptr)) {
+                       /* Old slot was NULL, replaced with expanded level */
+                       OSAddAtomicLong(second_level_size, &zone_page_table_used_size);
+               } else {
+                       /* Old slot was not NULL, someone else expanded first */
+                       kmem_free(zone_map, second_level_array, second_level_size);
+               }
+       } else {
+               /* Old slot was not NULL, already been expanded */
+       }
+}
+
+struct zone_page_table_entry *
+zone_page_table_lookup(zone_page_index_t pindex)
+{
+       unsigned int first_index = zone_page_table_first_level_slot(pindex);
+       struct zone_page_table_entry *second_level = zone_page_table[first_index];
+
+       if (second_level) {
+               return &second_level[zone_page_table_second_level_slot(pindex)];
+       }
+
+       return NULL;
+}
+
 extern volatile SInt32 kfree_nop_count;
 
 #pragma mark -
@@ -1530,6 +1613,7 @@ zalloc_canblock(
        uintptr_t       zbt[MAX_ZTRACE_DEPTH];  /* used in zone leak logging and zone leak detection */
        int             numsaved = 0;
        int                     i;
+       boolean_t       zone_replenish_wakeup = FALSE;
 
 #if CONFIG_ZLEAKS
        uint32_t        zleak_tracedepth = 0;  /* log this allocation if nonzero */
@@ -1564,10 +1648,10 @@ zalloc_canblock(
 
        REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
 
-       while ((addr == 0) && canblock && (zone->doing_gc)) {
-               zone->waiting = TRUE;
-               zone_sleep(zone);
-               REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
+       if (zone->async_prio_refill &&
+       ((zone->cur_size - (zone->count * zone->elem_size)) < (zone->prio_refill_watermark * zone->elem_size))) {
+               zone_replenish_wakeup = TRUE;
+               zone_replenish_wakeups_initiated++;
        }
 
        while ((addr == 0) && canblock) {
@@ -1581,8 +1665,20 @@ zalloc_canblock(
                         */
                        zone->waiting = TRUE;
                        zone_sleep(zone);
-               }
-               else {
+               } else if (zone->doing_gc) {
+                       /* zone_gc() is running. Since we need an element
+                        * from the free list that is currently being
+                        * collected, set the waiting bit and try to
+                        * interrupt the GC process, and try again
+                        * when we obtain the lock.
+                        */
+                       zone->waiting = TRUE;
+                       zone_sleep(zone);
+               } else {
+                       vm_offset_t space;
+                       vm_size_t alloc_size;
+                       int retry = 0;
+
                        if ((zone->cur_size + zone->elem_size) >
                            zone->max_size) {
                                if (zone->exhaustible)
@@ -1608,141 +1704,85 @@ zalloc_canblock(
                        zone->doing_alloc = TRUE;
                        unlock_zone(zone);
 
-                       if (zone->collectable) {
-                               vm_offset_t space;
-                               vm_size_t alloc_size;
-                               int retry = 0;
-
-                               for (;;) {
-                                       int     zflags = KMA_KOBJECT|KMA_NOPAGEWAIT;
-
-                                       if (vm_pool_low() || retry >= 1)
-                                               alloc_size = 
-                                                 round_page(zone->elem_size);
-                                       else
-                                               alloc_size = zone->alloc_size;
-
-                                       if (zone->noencrypt)
-                                               zflags |= KMA_NOENCRYPT;
-
-                                       retval = kernel_memory_allocate(zone_map, &space, alloc_size, 0, zflags);
-                                       if (retval == KERN_SUCCESS) {
+                       for (;;) {
+                               int     zflags = KMA_KOBJECT|KMA_NOPAGEWAIT;
+                               
+                               if (vm_pool_low() || retry >= 1)
+                                       alloc_size = 
+                                               round_page(zone->elem_size);
+                               else
+                                       alloc_size = zone->alloc_size;
+                               
+                               if (zone->noencrypt)
+                                       zflags |= KMA_NOENCRYPT;
+                               
+                               retval = kernel_memory_allocate(zone_map, &space, alloc_size, 0, zflags);
+                               if (retval == KERN_SUCCESS) {
 #if    ZONE_ALIAS_ADDR
-                                               if (alloc_size == PAGE_SIZE)
-                                                       space = zone_alias_addr(space);
+                                       if (alloc_size == PAGE_SIZE)
+                                               space = zone_alias_addr(space);
 #endif
-
+                                       
 #if CONFIG_ZLEAKS
-                                               if ((zleak_state & (ZLEAK_STATE_ENABLED | ZLEAK_STATE_ACTIVE)) == ZLEAK_STATE_ENABLED) {
-                                                       if (zone_map->size >= zleak_global_tracking_threshold) {
-                                                               kern_return_t kr;
-                                                               
-                                                               kr = zleak_activate();
-                                                               if (kr != KERN_SUCCESS) {
-                                                                       printf("Failed to activate live zone leak debugging (%d).\n", kr);
-                                                               }
+                                       if ((zleak_state & (ZLEAK_STATE_ENABLED | ZLEAK_STATE_ACTIVE)) == ZLEAK_STATE_ENABLED) {
+                                               if (zone_map->size >= zleak_global_tracking_threshold) {
+                                                       kern_return_t kr;
+                                                       
+                                                       kr = zleak_activate();
+                                                       if (kr != KERN_SUCCESS) {
+                                                               printf("Failed to activate live zone leak debugging (%d).\n", kr);
                                                        }
                                                }
-
-                                               if ((zleak_state & ZLEAK_STATE_ACTIVE) && !(zone->zleak_on)) {
-                                                       if (zone->cur_size > zleak_per_zone_tracking_threshold) {
-                                                               zone->zleak_on = TRUE;
-                                                       }       
-                                               }
-#endif /* CONFIG_ZLEAKS */
-
-                                               zone_page_init(space, alloc_size,
-                                                              ZONE_PAGE_USED);
-                                               zcram(zone, (void *)space, alloc_size);
-
-                                               break;
-                                       } else if (retval != KERN_RESOURCE_SHORTAGE) {
-                                               retry++;
-
-                                               if (retry == 2) {
-                                                       zone_gc();
-                                                       printf("zalloc did gc\n");
-                                                       zone_display_zprint();
-                                               }
-                                               if (retry == 3) {
-                                                 panic_include_zprint = TRUE;
-#if CONFIG_ZLEAKS
-                                                 if ((zleak_state & ZLEAK_STATE_ACTIVE)) {
-                                                         panic_include_ztrace = TRUE;
-                                                 }
-#endif /* CONFIG_ZLEAKS */             
-                                                       /* TODO: Change this to something more descriptive, perhaps 
-                                                        * 'zone_map exhausted' only if we get retval 3 (KERN_NO_SPACE).
-                                                        */
-                                                 panic("zalloc: \"%s\" (%d elements) retry fail %d, kfree_nop_count: %d", zone->zone_name, zone->count, retval, (int)kfree_nop_count);
-                                               }
-                                       } else {
-                                               break;
                                        }
-                               }
-                               lock_zone(zone);
-                               zone->doing_alloc = FALSE; 
-                               if (zone->waiting) {
-                                       zone->waiting = FALSE;
-                                       zone_wakeup(zone);
-                               }
-                               REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
-                               if (addr == 0 &&
-                                       retval == KERN_RESOURCE_SHORTAGE) {
-                                       unlock_zone(zone);
                                        
-                                       VM_PAGE_WAIT();
-                                       lock_zone(zone);
-                               }
-                       } else {
-                               vm_offset_t space;
-                               retval = zget_space(zone, zone->elem_size, &space);
-
-                               lock_zone(zone);
-                               zone->doing_alloc = FALSE; 
-                               if (zone->waiting) {
-                                       zone->waiting = FALSE;
-                                       thread_wakeup((event_t)zone);
-                               }
-                               if (retval == KERN_SUCCESS) {
-                                       zone->count++;
-                                       zone->sum_count++;
-                                       zone->cur_size += zone->elem_size;
-#if    ZONE_DEBUG
-                                       if (zone_debug_enabled(zone)) {
-                                           enqueue_tail(&zone->active_zones, (queue_entry_t)space);
+                                       if ((zleak_state & ZLEAK_STATE_ACTIVE) && !(zone->zleak_on)) {
+                                               if (zone->cur_size > zleak_per_zone_tracking_threshold) {
+                                                       zone->zleak_on = TRUE;
+                                               }       
                                        }
-#endif
-                                       unlock_zone(zone);
-                                       zone_page_alloc(space, zone->elem_size);
-#if    ZONE_DEBUG
-                                       if (zone_debug_enabled(zone))
-                                               space += ZONE_DEBUG_OFFSET;
-#endif
-                                       addr = space;
-                                       goto success;
-                               }
-                               if (retval == KERN_RESOURCE_SHORTAGE) {
-                                       unlock_zone(zone);
+#endif /* CONFIG_ZLEAKS */
                                        
-                                       VM_PAGE_WAIT();
-                                       lock_zone(zone);
-                               } else {
-                                       /*
-                                        * Equivalent to a 'retry fail 3', we're out of address space in the zone_map
-                                        * (if it returned KERN_NO_SPACE)
-                                        */
-                                       if (retval == KERN_NO_SPACE) {
+                                       zcram(zone, space, alloc_size);
+                                       
+                                       break;
+                               } else if (retval != KERN_RESOURCE_SHORTAGE) {
+                                       retry++;
+                                       
+                                       if (retry == 2) {
+                                               zone_gc();
+                                               printf("zalloc did gc\n");
+                                               zone_display_zprint();
+                                       }
+                                       if (retry == 3) {
                                                panic_include_zprint = TRUE;
 #if CONFIG_ZLEAKS
-                                                 if ((zleak_state & ZLEAK_STATE_ACTIVE)) {
+                                               if ((zleak_state & ZLEAK_STATE_ACTIVE)) {
                                                        panic_include_ztrace = TRUE;
                                                }
-#endif /* CONFIG_ZLEAKS */
+#endif /* CONFIG_ZLEAKS */             
+                                               /* TODO: Change this to something more descriptive, perhaps 
+                                                * 'zone_map exhausted' only if we get retval 3 (KERN_NO_SPACE).
+                                                */
+                                               panic("zalloc: \"%s\" (%d elements) retry fail %d, kfree_nop_count: %d", zone->zone_name, zone->count, retval, (int)kfree_nop_count);
                                        }
-                                       panic("zalloc: \"%s\" (%d elements) zget_space returned %d", zone->zone_name, zone->count, retval);
+                               } else {
+                                       break;
                                }
                        }
+                       lock_zone(zone);
+                       zone->doing_alloc = FALSE; 
+                       if (zone->waiting) {
+                               zone->waiting = FALSE;
+                               zone_wakeup(zone);
+                       }
+                       REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
+                       if (addr == 0 &&
+                               retval == KERN_RESOURCE_SHORTAGE) {
+                               unlock_zone(zone);
+                               
+                               VM_PAGE_WAIT();
+                               lock_zone(zone);
+                       }
                }
                if (addr == 0)
                        REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
@@ -1832,7 +1872,7 @@ empty_slot:
                          zcurrent = 0;
        }
 
-       if ((addr == 0) && !canblock && (zone->async_pending == FALSE) && (zone->exhaustible == FALSE) && (!vm_pool_low())) {
+       if ((addr == 0) && !canblock && (zone->async_pending == FALSE) && (zone->no_callout == FALSE) && (zone->exhaustible == FALSE) && (!vm_pool_low())) {
                zone->async_pending = TRUE;
                unlock_zone(zone);
                thread_call_enter(&zone->call_async_alloc);
@@ -1855,7 +1895,9 @@ empty_slot:
 
        unlock_zone(zone);
 
-success:
+       if (zone_replenish_wakeup)
+               thread_wakeup(&zone->zone_replenish_thread);
+
        TRACE_MACHLEAKS(ZALLOC_CODE, ZALLOC_CODE_2, zone->elem_size, addr);
 
        if (addr) {
@@ -2179,6 +2221,9 @@ zone_change(
                case Z_CALLERACCT:
                        zone->caller_acct = value;
                        break;
+               case Z_NOCALLOUT:
+                       zone->no_callout = value;
+                       break;
 #if MACH_ASSERT
                default:
                        panic("Zone_change: Wrong Item Type!");
@@ -2222,8 +2267,7 @@ zprealloc(
        if (size != 0) {
                if (kmem_alloc_kobject(zone_map, &addr, size) != KERN_SUCCESS)
                  panic("zprealloc");
-               zone_page_init(addr, size, ZONE_PAGE_USED);
-               zcram(zone, (void *)addr, size);
+               zcram(zone, addr, size);
        }
 }
 
@@ -2237,7 +2281,7 @@ zone_page_collectable(
        vm_size_t       size)
 {
        struct zone_page_table_entry    *zp;
-       natural_t i, j;
+       zone_page_index_t i, j;
 
 #if    ZONE_ALIAS_ADDR
        addr = zone_virtual_addr(addr);
@@ -2247,12 +2291,14 @@ zone_page_collectable(
                panic("zone_page_collectable");
 #endif
 
-       i = (natural_t)atop_kernel(addr-zone_map_min_address);
-       j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address);
+       i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address);
+       j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address);
 
-       for (zp = zone_page_table + i; i <= j; zp++, i++)
+       for (; i <= j; i++) {
+               zp = zone_page_table_lookup(i);
                if (zp->collect_count == zp->alloc_count)
                        return (TRUE);
+       }
 
        return (FALSE);
 }
@@ -2263,7 +2309,7 @@ zone_page_keep(
        vm_size_t       size)
 {
        struct zone_page_table_entry    *zp;
-       natural_t i, j;
+       zone_page_index_t i, j;
 
 #if    ZONE_ALIAS_ADDR
        addr = zone_virtual_addr(addr);
@@ -2273,11 +2319,13 @@ zone_page_keep(
                panic("zone_page_keep");
 #endif
 
-       i = (natural_t)atop_kernel(addr-zone_map_min_address);
-       j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address);
+       i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address);
+       j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address);
 
-       for (zp = zone_page_table + i; i <= j; zp++, i++)
+       for (; i <= j; i++) {
+               zp = zone_page_table_lookup(i);
                zp->collect_count = 0;
+       }
 }
 
 void
@@ -2286,7 +2334,7 @@ zone_page_collect(
        vm_size_t       size)
 {
        struct zone_page_table_entry    *zp;
-       natural_t i, j;
+       zone_page_index_t i, j;
 
 #if    ZONE_ALIAS_ADDR
        addr = zone_virtual_addr(addr);
@@ -2296,21 +2344,22 @@ zone_page_collect(
                panic("zone_page_collect");
 #endif
 
-       i = (natural_t)atop_kernel(addr-zone_map_min_address);
-       j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address);
+       i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address);
+       j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address);
 
-       for (zp = zone_page_table + i; i <= j; zp++, i++)
+       for (; i <= j; i++) {
+               zp = zone_page_table_lookup(i);
                ++zp->collect_count;
+       }
 }
 
 void
 zone_page_init(
        vm_offset_t     addr,
-       vm_size_t       size,
-       int             value)
+       vm_size_t       size)
 {
        struct zone_page_table_entry    *zp;
-       natural_t i, j;
+       zone_page_index_t i, j;
 
 #if    ZONE_ALIAS_ADDR
        addr = zone_virtual_addr(addr);
@@ -2320,11 +2369,16 @@ zone_page_init(
                panic("zone_page_init");
 #endif
 
-       i = (natural_t)atop_kernel(addr-zone_map_min_address);
-       j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address);
+       i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address);
+       j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address);
+
+       for (; i <= j; i++) {
+               /* make sure entry exists before marking unused */
+               zone_page_table_expand(i);
 
-       for (zp = zone_page_table + i; i <= j; zp++, i++) {
-               zp->alloc_count = value;
+               zp = zone_page_table_lookup(i);
+               assert(zp);
+               zp->alloc_count = ZONE_PAGE_UNUSED;
                zp->collect_count = 0;
        }
 }
@@ -2335,7 +2389,7 @@ zone_page_alloc(
        vm_size_t       size)
 {
        struct zone_page_table_entry    *zp;
-       natural_t i, j;
+       zone_page_index_t i, j;
 
 #if    ZONE_ALIAS_ADDR
        addr = zone_virtual_addr(addr);
@@ -2345,29 +2399,32 @@ zone_page_alloc(
                panic("zone_page_alloc");
 #endif
 
-       i = (natural_t)atop_kernel(addr-zone_map_min_address);
-       j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address);
+       i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address);
+       j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address);
+
+       for (; i <= j; i++) {
+               zp = zone_page_table_lookup(i);
+               assert(zp);
 
-       for (zp = zone_page_table + i; i <= j; zp++, i++) {
                /*
-                * Set alloc_count to (ZONE_PAGE_USED + 1) if
+                * Set alloc_count to ZONE_PAGE_USED if
                 * it was previously set to ZONE_PAGE_UNUSED.
                 */
                if (zp->alloc_count == ZONE_PAGE_UNUSED)
-                       zp->alloc_count = 1;
-               else
-                       ++zp->alloc_count;
+                       zp->alloc_count = ZONE_PAGE_USED;
+
+               ++zp->alloc_count;
        }
 }
 
 void
 zone_page_free_element(
-       struct zone_page_table_entry    **free_pages,
+       zone_page_index_t       *free_page_list,
        vm_offset_t     addr,
        vm_size_t       size)
 {
        struct zone_page_table_entry    *zp;
-       natural_t i, j;
+       zone_page_index_t i, j;
 
 #if    ZONE_ALIAS_ADDR
        addr = zone_virtual_addr(addr);
@@ -2377,18 +2434,28 @@ zone_page_free_element(
                panic("zone_page_free_element");
 #endif
 
-       i = (natural_t)atop_kernel(addr-zone_map_min_address);
-       j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address);
+       i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address);
+       j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address);
+
+       for (; i <= j; i++) {
+               zp = zone_page_table_lookup(i);
 
-       for (zp = zone_page_table + i; i <= j; zp++, i++) {
                if (zp->collect_count > 0)
                        --zp->collect_count;
                if (--zp->alloc_count == 0) {
+                       vm_address_t        free_page_address;
+
                        zp->alloc_count  = ZONE_PAGE_UNUSED;
                        zp->collect_count = 0;
 
-                       zp->link = *free_pages;
-                       *free_pages = zp;
+
+                       /*
+                        * This element was the last one on this page, re-use the page's
+                        * storage for a page freelist
+                        */
+                       free_page_address = zone_map_min_address + PAGE_SIZE * ((vm_size_t)i);
+                       *(zone_page_index_t *)free_page_address = *free_page_list;
+                       *free_page_list = i;
                }
        }
 }
@@ -2451,7 +2518,7 @@ zone_gc(void)
        unsigned int    max_zones;
        zone_t                  z;
        unsigned int    i;
-       struct zone_page_table_entry    *zp, *zone_free_pages;
+       zone_page_index_t zone_free_page_head;
 
        lck_mtx_lock(&zone_gc_lock);
 
@@ -2461,11 +2528,15 @@ zone_gc(void)
        simple_unlock(&all_zones_lock);
 
 #if MACH_ASSERT
-       for (i = 0; i < zone_pages; i++)
-               assert(zone_page_table[i].collect_count == 0);
+       for (i = 0; i < zone_pages; i++) {
+               struct zone_page_table_entry    *zp;
+       
+               zp = zone_page_table_lookup(i);
+               assert(!zp || (zp->collect_count == 0));
+       }
 #endif /* MACH_ASSERT */
 
-       zone_free_pages = NULL;
+       zone_free_page_head = ZONE_PAGE_INDEX_INVALID;
 
        for (i = 0; i < max_zones; i++, z = z->next_zone) {
                unsigned int                            n, m;
@@ -2546,6 +2617,7 @@ zone_gc(void)
 
                        if (++n >= 50) {
                                if (z->waiting == TRUE) {
+                                       /* z->waiting checked without lock held, rechecked below after locking */
                                        lock_zone(z);
 
                                        if (keep != NULL) {
@@ -2586,6 +2658,11 @@ zone_gc(void)
 
                        ADD_LIST_TO_ZONE(z, keep, tail);
 
+                       if (z->waiting) {
+                               z->waiting = FALSE;
+                               zone_wakeup(z);
+                       }
+
                        unlock_zone(z);
                }
 
@@ -2601,11 +2678,20 @@ zone_gc(void)
                n = 0; tail = keep = NULL;
                while (elt != NULL) {
                        if (zone_page_collectable((vm_offset_t)elt, elt_size)) {
+                               struct zone_free_element *next_elt = elt->next;
+
                                size_freed += elt_size;
-                               zone_page_free_element(&zone_free_pages,
+
+                               /*
+                                * If this is the last allocation on the page(s),
+                                * we may use their storage to maintain the linked
+                                * list of free-able pages. So store elt->next because
+                                * "elt" may be scribbled over.
+                                */
+                               zone_page_free_element(&zone_free_page_head,
                                                                                (vm_offset_t)elt, elt_size);
 
-                               elt = elt->next;
+                               elt = next_elt;
 
                                ++zgc_stats.elems_freed;
                        }
@@ -2680,13 +2766,17 @@ zone_gc(void)
         * Reclaim the pages we are freeing.
         */
 
-       while ((zp = zone_free_pages) != NULL) {
-               zone_free_pages = zp->link;
+       while (zone_free_page_head != ZONE_PAGE_INDEX_INVALID) {
+               zone_page_index_t       zind = zone_free_page_head;
+               vm_address_t            free_page_address;
 #if    ZONE_ALIAS_ADDR
                z = (zone_t)zone_virtual_addr((vm_map_address_t)z);
 #endif
-               kmem_free(zone_map, zone_map_min_address + PAGE_SIZE *
-                                                                               (zp - zone_page_table), PAGE_SIZE);
+               /* Use the first word of the page about to be freed to find the next free page */
+               free_page_address = zone_map_min_address + PAGE_SIZE * ((vm_size_t)zind);
+               zone_free_page_head = *(zone_page_index_t *)free_page_address;
+
+               kmem_free(zone_map, free_page_address, PAGE_SIZE);
                ++zgc_stats.pgs_freed;
        }
 
index d7d72223903be9913151b92160551c75c25b4718..81322fd9f92bba554bc85f82e4e92b36597ead57 100644 (file)
@@ -114,7 +114,9 @@ struct zone {
 #endif /* ZONE_DEBUG */
        /* boolean_t */ caller_acct: 1, /* do we account allocation/free to the caller? */  
        /* boolean_t */ doing_gc :1,    /* garbage collect in progress? */
-       /* boolean_t */ noencrypt :1;
+       /* boolean_t */ noencrypt :1,
+       /* boolean_t */ no_callout:1,
+       /* boolean_t */ async_prio_refill:1;
        int             index;          /* index into zone_info arrays for this zone */
        struct zone *   next_zone;      /* Link for all-zones list */
        call_entry_data_t       call_async_alloc;       /* callout for asynchronous alloc */
@@ -128,6 +130,8 @@ struct zone {
        uint32_t num_frees;             /* free stats for zleak benchmarks */
        uint32_t zleak_capture; /* per-zone counter for capturing every N allocations */
 #endif /* CONFIG_ZLEAKS */
+       vm_size_t       prio_refill_watermark;
+       thread_t        zone_replenish_thread;
 };
 
 /*
@@ -232,7 +236,7 @@ extern void *       zget(
 /* Fill zone with memory */
 extern void            zcram(
                                        zone_t          zone,
-                                       void            *newmem,
+                                       vm_offset_t     newmem,
                                        vm_size_t       size);
 
 /* Initially fill zone with specified number of elements */
@@ -245,7 +249,7 @@ extern void         zone_change(
                                        zone_t                  zone,
                                        unsigned int    item,
                                        boolean_t               value);
-
+extern void            zone_prio_refill_configure(zone_t, vm_size_t);
 /* Item definitions */
 #define Z_EXHAUST      1       /* Make zone exhaustible        */
 #define Z_COLLECT      2       /* Make zone collectable        */
@@ -253,7 +257,9 @@ extern void         zone_change(
 #define        Z_FOREIGN       4       /* Allow collectable zone to contain foreign elements */
 #define Z_CALLERACCT   5       /* Account alloc/free against the caller */
 #define Z_NOENCRYPT    6       /* Don't encrypt zone during hibernation */
-
+#define Z_NOCALLOUT    7       /* Don't asynchronously replenish the zone via
+                                * callouts
+                                */
 /* Preallocate space for zone from zone map */
 extern void            zprealloc(
                                        zone_t          zone,
index dba05b94788e172f73a309ce40e068d0844ca1ad..2d542d3dd4424482193d7ec6a5afcb454edbb7ed 100644 (file)
@@ -33,7 +33,7 @@
 #define _MACH_I386_THREAD_STATE_H_
 
 /* Size of maximum exported thread state in words */
-#define I386_THREAD_STATE_MAX  (144)    /* Size of biggest state possible */
+#define I386_THREAD_STATE_MAX  (224)    /* Size of biggest state possible */
 
 #if defined (__i386__) || defined(__x86_64__)
 #define THREAD_STATE_MAX       I386_THREAD_STATE_MAX
index 06beca0ee664a20fef184d24e44064a06e23fe39..b405952d191768ecd6cf364edab2e2152eb9344d 100644 (file)
@@ -34,6 +34,6 @@
 
 extern void vm_mem_bootstrap(void) __attribute__((section("__TEXT, initcode")));
 extern void vm_mem_init(void) __attribute__((section("__TEXT, initcode")));
-extern void vm_map_steal_memory(void);
+extern void vm_map_steal_memory(void) __attribute__((section("__TEXT, initcode")));;
 
 #endif /* VM_INIT_H */
index 604bc202f79afb47653d6f44027974565c72259e..0ce07a4d9243d29bd08b5e03e27695a0273c7402 100644 (file)
@@ -121,7 +121,7 @@ static boolean_t    vm_map_range_check(
        vm_map_entry_t  *entry);
 
 static vm_map_entry_t  _vm_map_entry_create(
-       struct vm_map_header    *map_header);
+       struct vm_map_header    *map_header, boolean_t map_locked);
 
 static void            _vm_map_entry_dispose(
        struct vm_map_header    *map_header,
@@ -303,8 +303,9 @@ __private_extern__ void  default_freezer_mapping_free(void**, boolean_t all);
  * wire count; it's used for map splitting and zone changing in
  * vm_map_copyout.
  */
-#define vm_map_entry_copy(NEW,OLD) \
-MACRO_BEGIN                                     \
+#define vm_map_entry_copy(NEW,OLD)     \
+MACRO_BEGIN                            \
+boolean_t _vmec_reserved = (NEW)->from_reserved_zone;  \
        *(NEW) = *(OLD);                \
        (NEW)->is_shared = FALSE;       \
        (NEW)->needs_wakeup = FALSE;    \
@@ -312,9 +313,15 @@ MACRO_BEGIN                                     \
        (NEW)->wired_count = 0;         \
        (NEW)->user_wired_count = 0;    \
        (NEW)->permanent = FALSE;       \
+       (NEW)->from_reserved_zone = _vmec_reserved;                     \
 MACRO_END
 
-#define vm_map_entry_copy_full(NEW,OLD)        (*(NEW) = *(OLD))
+#define vm_map_entry_copy_full(NEW,OLD)                        \
+MACRO_BEGIN                                            \
+boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \
+(*(NEW) = *(OLD));                                     \
+(NEW)->from_reserved_zone = _vmecf_reserved;                   \
+MACRO_END
 
 /*
  *     Decide if we want to allow processes to execute from their data or stack areas.
@@ -419,7 +426,8 @@ override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
 
 static zone_t  vm_map_zone;            /* zone for vm_map structures */
 static zone_t  vm_map_entry_zone;      /* zone for vm_map_entry structures */
-static zone_t  vm_map_kentry_zone;     /* zone for kernel entry structures */
+static zone_t  vm_map_entry_reserved_zone;     /* zone with reserve for non-blocking
+                                        * allocations */
 static zone_t  vm_map_copy_zone;       /* zone for vm_map_copy structures */
 
 
@@ -435,7 +443,6 @@ static void         *map_data;
 static vm_size_t       map_data_size;
 static void            *kentry_data;
 static vm_size_t       kentry_data_size;
-static int             kentry_count = 2048;            /* to init kentry_data_size */
 
 #if CONFIG_EMBEDDED
 #define                NO_COALESCE_LIMIT  0
@@ -603,7 +610,7 @@ lck_attr_t          vm_map_lck_attr;
  *
  *     vm_map_zone:            used to allocate maps.
  *     vm_map_entry_zone:      used to allocate map entries.
- *     vm_map_kentry_zone:     used to allocate map entries for the kernel.
+ *     vm_map_entry_reserved_zone:     fallback zone for kernel map entries
  *
  *     The kernel allocates map entries from a special zone that is initially
  *     "crammed" with memory.  It would be difficult (perhaps impossible) for
@@ -615,37 +622,46 @@ void
 vm_map_init(
        void)
 {
+       vm_size_t entry_zone_alloc_size;
        vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
                            PAGE_SIZE, "maps");
        zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
-
+#if    defined(__LP64__)
+       entry_zone_alloc_size = PAGE_SIZE * 5;
+#else
+       entry_zone_alloc_size = PAGE_SIZE * 6;
+#endif
+       
        vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
-                                 1024*1024, PAGE_SIZE*5,
-                                 "non-kernel map entries");
+                                 1024*1024, entry_zone_alloc_size,
+                                 "VM map entries");
        zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
+       zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
 
-       vm_map_kentry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
-                                  kentry_data_size, kentry_data_size,
-                                  "kernel map entries");
-       zone_change(vm_map_kentry_zone, Z_NOENCRYPT, TRUE);
+       vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
+                                  kentry_data_size * 64, kentry_data_size,
+                                  "Reserved VM map entries");
+       zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
 
        vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
-                                16*1024, PAGE_SIZE, "map copies");
+                                16*1024, PAGE_SIZE, "VM map copies");
        zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
 
        /*
         *      Cram the map and kentry zones with initial data.
-        *      Set kentry_zone non-collectible to aid zone_gc().
+        *      Set reserved_zone non-collectible to aid zone_gc().
         */
        zone_change(vm_map_zone, Z_COLLECT, FALSE);
-       zone_change(vm_map_kentry_zone, Z_COLLECT, FALSE);
-       zone_change(vm_map_kentry_zone, Z_EXPAND, FALSE);
-       zone_change(vm_map_kentry_zone, Z_FOREIGN, TRUE);
-       zone_change(vm_map_kentry_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
+
+       zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
+       zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
+       zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
+       zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
+       zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
        zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 
-       zcram(vm_map_zone, map_data, map_data_size);
-       zcram(vm_map_kentry_zone, kentry_data, kentry_data_size);
+       zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
+       zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size);
        
        lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
        lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
@@ -656,26 +672,30 @@ void
 vm_map_steal_memory(
        void)
 {
+       uint32_t kentry_initial_pages;
+
        map_data_size = round_page(10 * sizeof(struct _vm_map));
        map_data = pmap_steal_memory(map_data_size);
 
-#if 0
        /*
-        * Limiting worst case: vm_map_kentry_zone needs to map each "available"
-        * physical page (i.e. that beyond the kernel image and page tables)
-        * individually; we guess at most one entry per eight pages in the
-        * real world. This works out to roughly .1 of 1% of physical memory,
-        * or roughly 1900 entries (64K) for a 64M machine with 4K pages.
+        * kentry_initial_pages corresponds to the number of kernel map entries
+        * required during bootstrap until the asynchronous replenishment
+        * scheme is activated and/or entries are available from the general
+        * map entry pool.
         */
+#if    defined(__LP64__)
+       kentry_initial_pages = 10;
+#else
+       kentry_initial_pages = 6;
 #endif
-       kentry_count = pmap_free_pages() / 8;
-
-
-       kentry_data_size =
-               round_page(kentry_count * sizeof(struct vm_map_entry));
+       kentry_data_size = kentry_initial_pages * PAGE_SIZE;
        kentry_data = pmap_steal_memory(kentry_data_size);
 }
 
+void vm_kernel_reserved_entry_init(void) {
+       zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
+}
+
 /*
  *     vm_map_create:
  *
@@ -742,27 +762,41 @@ vm_map_create(
  *     Allocates a VM map entry for insertion in the
  *     given map (or map copy).  No fields are filled.
  */
-#define        vm_map_entry_create(map) \
-       _vm_map_entry_create(&(map)->hdr)
+#define        vm_map_entry_create(map, map_locked)    _vm_map_entry_create(&(map)->hdr, map_locked)
 
-#define        vm_map_copy_entry_create(copy) \
-       _vm_map_entry_create(&(copy)->cpy_hdr)
+#define        vm_map_copy_entry_create(copy, map_locked)                                      \
+       _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
+unsigned reserved_zalloc_count, nonreserved_zalloc_count;
 
 static vm_map_entry_t
 _vm_map_entry_create(
-       register struct vm_map_header   *map_header)
+       struct vm_map_header    *map_header, boolean_t __unused map_locked)
 {
-       register zone_t zone;
-       register vm_map_entry_t entry;
+       zone_t  zone;
+       vm_map_entry_t  entry;
 
-       if (map_header->entries_pageable)
-               zone = vm_map_entry_zone;
-       else
-               zone = vm_map_kentry_zone;
+       zone = vm_map_entry_zone;
+
+       assert(map_header->entries_pageable ? !map_locked : TRUE);
+
+       if (map_header->entries_pageable) {
+               entry = (vm_map_entry_t) zalloc(zone);
+       }
+       else {
+               entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
+
+               if (entry == VM_MAP_ENTRY_NULL) {
+                       zone = vm_map_entry_reserved_zone;
+                       entry = (vm_map_entry_t) zalloc(zone);
+                       OSAddAtomic(1, &reserved_zalloc_count);
+               } else
+                       OSAddAtomic(1, &nonreserved_zalloc_count);
+       }
 
-       entry = (vm_map_entry_t) zalloc(zone);
        if (entry == VM_MAP_ENTRY_NULL)
                panic("vm_map_entry_create");
+       entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
+
        vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
 
        return(entry);
@@ -791,10 +825,17 @@ _vm_map_entry_dispose(
 {
        register zone_t         zone;
 
-       if (map_header->entries_pageable)
+       if (map_header->entries_pageable || !(entry->from_reserved_zone))
                zone = vm_map_entry_zone;
        else
-               zone = vm_map_kentry_zone;
+               zone = vm_map_entry_reserved_zone;
+
+       if (!map_header->entries_pageable) {
+               if (zone == vm_map_entry_zone)
+                       OSAddAtomic(-1, &nonreserved_zalloc_count);
+               else
+                       OSAddAtomic(-1, &reserved_zalloc_count);
+       }
 
        zfree(zone, entry);
 }
@@ -1160,7 +1201,7 @@ vm_map_find_space(
                size += PAGE_SIZE_64;
        }
 
-       new_entry = vm_map_entry_create(map);
+       new_entry = vm_map_entry_create(map, FALSE);
 
        /*
         *      Look for the first possible address; if there's already
@@ -3036,7 +3077,7 @@ _vm_map_clip_start(
         *      address.
         */
 
-       new_entry = _vm_map_entry_create(map_header);
+       new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
        vm_map_entry_copy_full(new_entry, entry);
 
        new_entry->vme_end = start;
@@ -3129,7 +3170,7 @@ _vm_map_clip_end(
         *      AFTER the specified entry
         */
 
-       new_entry = _vm_map_entry_create(map_header);
+       new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
        vm_map_entry_copy_full(new_entry, entry);
 
        assert(entry->vme_start < end);
@@ -7227,9 +7268,7 @@ StartAgain: ;
                /*
                 * Find the zone that the copies were allocated from
                 */
-               old_zone = (copy->cpy_hdr.entries_pageable)
-                       ? vm_map_entry_zone
-                       : vm_map_kentry_zone;
+
                entry = vm_map_copy_first_entry(copy);
 
                /*
@@ -7243,13 +7282,14 @@ StartAgain: ;
                 * Copy each entry.
                 */
                while (entry != vm_map_copy_to_entry(copy)) {
-                       new = vm_map_copy_entry_create(copy);
+                       new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
                        vm_map_entry_copy_full(new, entry);
                        new->use_pmap = FALSE;  /* clr address space specifics */
                        vm_map_copy_entry_link(copy,
                                               vm_map_copy_last_entry(copy),
                                               new);
                        next = entry->vme_next;
+                       old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
                        zfree(old_zone, entry);
                        entry = next;
                }
@@ -7511,7 +7551,7 @@ vm_map_copyin_common(
        copy->offset = src_addr;
        copy->size = len;
        
-       new_entry = vm_map_copy_entry_create(copy);
+       new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
 
 #define        RETURN(x)                                               \
        MACRO_BEGIN                                             \
@@ -7633,7 +7673,7 @@ vm_map_copyin_common(
                        version.main_timestamp = src_map->timestamp;
                        vm_map_unlock(src_map);
 
-                       new_entry = vm_map_copy_entry_create(copy);
+                       new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
 
                        vm_map_lock(src_map);
                        if ((version.main_timestamp + 1) != src_map->timestamp) {
@@ -8221,7 +8261,8 @@ vm_map_fork_share(
         *      Mark both entries as shared.
         */
        
-       new_entry = vm_map_entry_create(new_map);
+       new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
+                                                         * map or descendants */
        vm_map_entry_copy(new_entry, old_entry);
        old_entry->is_shared = TRUE;
        new_entry->is_shared = TRUE;
@@ -8394,7 +8435,7 @@ vm_map_fork(
                                goto slow_vm_map_fork_copy;
                        }
 
-                       new_entry = vm_map_entry_create(new_map);
+                       new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
                        vm_map_entry_copy(new_entry, old_entry);
                        /* clear address space specifics */
                        new_entry->use_pmap = FALSE;
@@ -11146,7 +11187,7 @@ vm_map_entry_insert(
 
        assert(insp_entry != (vm_map_entry_t)0);
 
-       new_entry = vm_map_entry_create(map);
+       new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
 
        new_entry->vme_start = start;
        new_entry->vme_end = end;
@@ -11349,7 +11390,7 @@ vm_map_remap_extract(
 
                offset = src_entry->offset + (src_start - src_entry->vme_start);
 
-               new_entry = _vm_map_entry_create(map_header);
+               new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
                vm_map_entry_copy(new_entry, src_entry);
                new_entry->use_pmap = FALSE; /* clr address space specifics */
 
index d8ab731e9157db01391f5424417b6ef21687647e..f88bd545d91f33ca51269712ccfb76bdd17616c3 100644 (file)
@@ -237,7 +237,8 @@ struct vm_map_entry {
        /* boolean_t */         superpage_size:3,/* use superpages of a certain size */
        /* boolean_t */         zero_wired_pages:1, /* zero out the wired pages of this entry it is being deleted without unwiring them */
        /* boolean_t */         used_for_jit:1,
-       /* unsigned char */     pad:1;          /* available bits */
+       /* boolean_t */ from_reserved_zone:1;   /* Allocated from
+                                                        * kernel reserved zone  */
        unsigned short          wired_count;    /* can be paged if = 0 */
        unsigned short          user_wired_count; /* for vm_wire */
 };
@@ -458,6 +459,8 @@ struct vm_map_copy {
 /* Initialize the module */
 extern void            vm_map_init(void) __attribute__((section("__TEXT, initcode")));
 
+extern void            vm_kernel_reserved_entry_init(void) __attribute__((section("__TEXT, initcode")));
+
 /* Allocate a range in the specified virtual address map and
  * return the entry allocated for that range. */
 extern kern_return_t vm_map_find_space(
index 0761db5ef7d56dff14dcfa13d52c2d5144980662..28b3cb17293ee8ad45028258f043a169ef6fc36e 100644 (file)
@@ -4724,6 +4724,9 @@ process_upl_to_commit:
                                else {
                                        m->absent = FALSE;
                                        dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
+
+                                       if ( !(dwp->dw_mask & DW_vm_page_deactivate_internal))
+                                               dwp->dw_mask |= DW_vm_page_activate;
                                }
                        } else
                                dwp->dw_mask |= DW_vm_page_unwire;
index 397914b0c9c683ff305e734b810519476bb4bc98..0c0a34e04d1bd4a3e78b7cc3331a33b462e2c097 100644 (file)
@@ -136,7 +136,7 @@ static vm_page_t    vm_page_grab_fictitious_common(ppnum_t phys_addr);
 
 vm_offset_t virtual_space_start;
 vm_offset_t virtual_space_end;
-int    vm_page_pages;
+uint32_t       vm_page_pages;
 
 /*
  *     The vm_page_lookup() routine, which provides for fast
@@ -1473,6 +1473,14 @@ vm_page_init(
        boolean_t       lopage)
 {
        assert(phys_page);
+
+#if    DEBUG
+       if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
+               if (!(pmap_valid_page(phys_page))) {
+                       panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
+               }
+       }
+#endif
        *mem = vm_page_template;
        mem->phys_page = phys_page;
 #if 0
@@ -1628,7 +1636,7 @@ void vm_page_more_fictitious(void)
                vm_page_wait(THREAD_UNINT);
                return;
        }
-       zcram(vm_page_zone, (void *) addr, PAGE_SIZE);
+       zcram(vm_page_zone, addr, PAGE_SIZE);
 
        lck_mtx_unlock(&vm_page_alloc_lock);
 }
@@ -1717,6 +1725,7 @@ vm_page_grablo(void)
        assert(!mem->free);
        assert(!mem->pmapped);
        assert(!mem->wpmapped);
+       assert(!pmap_is_noencrypt(mem->phys_page));
 
        mem->pageq.next = NULL;
        mem->pageq.prev = NULL;
@@ -1779,6 +1788,7 @@ return_page_from_cpu_list:
                assert(!mem->inactive);
                assert(!mem->throttled);
                assert(!mem->speculative);
+               assert(!pmap_is_noencrypt(mem->phys_page));
 
                return mem;
        }
@@ -1895,6 +1905,7 @@ return_page_from_cpu_list:
                        assert(!mem->encrypted);
                        assert(!mem->pmapped);
                        assert(!mem->wpmapped);
+                       assert(!pmap_is_noencrypt(mem->phys_page));
                }
                PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
                PROCESSOR_DATA(current_processor(), start_color) = color;
@@ -1954,6 +1965,8 @@ vm_page_release(
 //     dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5);      /* (TEST/DEBUG) */
 
 
+       pmap_clear_noencrypt(mem->phys_page);
+
        lck_mtx_lock_spin(&vm_page_queue_free_lock);
 #if DEBUG
        if (mem->free)
@@ -2372,6 +2385,8 @@ vm_page_free_list(
                                                  vm_page_t,
                                                  pageq);
                                pg_count++;
+
+                               pmap_clear_noencrypt(mem->phys_page);
                        }
                } else {
                        assert(mem->phys_page == vm_page_fictitious_addr ||
@@ -4776,7 +4791,7 @@ hibernate_flush_memory()
                        
                        sync_internal();
                        (void)(*consider_buffer_cache_collect)(1);
-                       consider_zone_gc(1);
+                       consider_zone_gc(TRUE);
 
                        KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, vm_page_wire_count, 0, 0, 0, 0);
                }
index 50bc8b99110389c4b88b3dd2f63b371f99044d03..fe6cb1295e553ec76693a639bcdd33164778ed28 100644 (file)
@@ -268,13 +268,14 @@ L_32bit_dispatch: /* 32-bit user task */
        mov     %eax, R32_EIP(%rsp)
        mov     ISC32_RFLAGS(%rsp), %eax
        mov     %eax, R32_EFLAGS(%rsp)
+       mov     ISC32_CS(%rsp), %esi            /* %esi := %cs for later */
+
+       mov     %esi, R32_CS(%rsp)
        mov     ISC32_RSP(%rsp), %eax
        mov     %eax, R32_UESP(%rsp)
        mov     ISC32_SS(%rsp), %eax
        mov     %eax, R32_SS(%rsp)
 L_32bit_dispatch_after_fault:
-       mov     ISC32_CS(%rsp), %esi            /* %esi := %cs for later */
-       mov     %esi, R32_CS(%rsp)
        mov     ISC32_TRAPNO(%rsp), %ebx        /* %ebx := trapno for later */
        mov     %ebx, R32_TRAPNO(%rsp)
        mov     ISC32_ERR(%rsp), %eax
index 3d75d8eab4573081aaea2ad4c706d8297e7e3fad..10a086542bcc289180d046c080b899689bf14380 100644 (file)
@@ -248,30 +248,36 @@ ml_phys_read_data(pmap_paddr_t paddr, int size)
 {
        unsigned int result;
 
+       if (!physmap_enclosed(paddr))
+               panic("%s: 0x%llx out of bounds\n", __FUNCTION__, paddr);
+
         switch (size) {
-            unsigned char s1;
-            unsigned short s2;
+               unsigned char s1;
+               unsigned short s2;
         case 1:
-            s1 = *(unsigned char *)PHYSMAP_PTOV(paddr);
-            result = s1;
-            break;
+               s1 = *(volatile unsigned char *)PHYSMAP_PTOV(paddr);
+               result = s1;
+               break;
         case 2:
-            s2 = *(unsigned short *)PHYSMAP_PTOV(paddr);
-            result = s2;
-            break;
+               s2 = *(volatile unsigned short *)PHYSMAP_PTOV(paddr);
+               result = s2;
+               break;
         case 4:
-        default:
-            result = *(unsigned int *)PHYSMAP_PTOV(paddr);
-            break;
+               result = *(volatile unsigned int *)PHYSMAP_PTOV(paddr);
+               break;
+       default:
+               panic("Invalid size %d for ml_phys_read_data\n", size);
+               break;
         }
-
         return result;
 }
 
 static unsigned long long
 ml_phys_read_long_long(pmap_paddr_t paddr )
 {
-       return *(unsigned long long *)PHYSMAP_PTOV(paddr);
+       if (!physmap_enclosed(paddr))
+               panic("%s: 0x%llx out of bounds\n", __FUNCTION__, paddr);
+       return *(volatile unsigned long long *)PHYSMAP_PTOV(paddr);
 }
 
 unsigned int ml_phys_read( vm_offset_t paddr)
@@ -333,24 +339,32 @@ unsigned long long ml_phys_read_double_64(addr64_t paddr64)
 static inline void
 ml_phys_write_data(pmap_paddr_t paddr, unsigned long data, int size)
 {
+       if (!physmap_enclosed(paddr))
+               panic("%s: 0x%llx out of bounds\n", __FUNCTION__, paddr);
+
         switch (size) {
         case 1:
-           *(unsigned char *)PHYSMAP_PTOV(paddr) = (unsigned char)data;
+           *(volatile unsigned char *)PHYSMAP_PTOV(paddr) = (unsigned char)data;
             break;
         case 2:
-           *(unsigned short *)PHYSMAP_PTOV(paddr) = (unsigned short)data;
+           *(volatile unsigned short *)PHYSMAP_PTOV(paddr) = (unsigned short)data;
             break;
         case 4:
-        default:
-           *(unsigned int *)PHYSMAP_PTOV(paddr) = (unsigned int)data;
+           *(volatile unsigned int *)PHYSMAP_PTOV(paddr) = (unsigned int)data;
             break;
+       default:
+               panic("Invalid size %d for ml_phys_write_data\n", size);
+               break;
         }
 }
 
 static void
 ml_phys_write_long_long(pmap_paddr_t paddr, unsigned long long data)
 {
-       *(unsigned long long *)PHYSMAP_PTOV(paddr) = data;
+       if (!physmap_enclosed(paddr))
+               panic("%s: 0x%llx out of bounds\n", __FUNCTION__, paddr);
+
+       *(volatile unsigned long long *)PHYSMAP_PTOV(paddr) = data;
 }
 
 void ml_phys_write_byte(vm_offset_t paddr, unsigned int data)
@@ -408,9 +422,8 @@ void ml_phys_write_double_64(addr64_t paddr64, unsigned long long data)
  *
  *
  *      Read the memory location at physical address paddr.
- *  This is a part of a device probe, so there is a good chance we will
- *  have a machine check here. So we have to be able to handle that.
- *  We assume that machine checks are enabled both in MSR and HIDs
+ * *Does not* recover from machine checks, unlike the PowerPC implementation.
+ * Should probably be deprecated.
  */
 
 boolean_t
index 2dadb540c5b61807ba756fccd32a0f7e13d52345..014df101f9d14c218211c1604c1968b9dfe693a2 100644 (file)
@@ -286,55 +286,6 @@ pmap_map(
        return(virt);
 }
 
-/*
- *     Back-door routine for mapping kernel VM at initialization.  
- *     Useful for mapping memory outside the range
- *      Sets no-cache, A, D.
- *     Otherwise like pmap_map.
- */
-vm_offset_t
-pmap_map_bd(
-       vm_offset_t     virt,
-       vm_map_offset_t start_addr,
-       vm_map_offset_t end_addr,
-       vm_prot_t       prot,
-       unsigned int    flags)
-{
-       pt_entry_t      template;
-       pt_entry_t      *pte;
-       spl_t           spl;
-       vm_offset_t     base = virt;
-       template = pa_to_pte(start_addr)
-               | INTEL_PTE_REF
-               | INTEL_PTE_MOD
-               | INTEL_PTE_WIRED
-               | INTEL_PTE_VALID;
-
-       if (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) {
-               template |= INTEL_PTE_NCACHE;
-               if (!(flags & (VM_MEM_GUARDED | VM_WIMG_USE_DEFAULT)))
-                       template |= INTEL_PTE_PTA;
-       }
-       if (prot & VM_PROT_WRITE)
-               template |= INTEL_PTE_WRITE;
-
-       while (start_addr < end_addr) {
-               spl = splhigh();
-               pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt);
-               if (pte == PT_ENTRY_NULL) {
-                       panic("pmap_map_bd: Invalid kernel address\n");
-               }
-               pmap_store_pte(pte, template);
-               splx(spl);
-               pte_increment_pa(template);
-               virt += PAGE_SIZE;
-               start_addr += PAGE_SIZE;
-       }
-       (void)base;
-       PMAP_UPDATE_TLBS(kernel_pmap, base, base + end_addr - start_addr);
-       return(virt);
-}
-
 extern char                    *first_avail;
 extern vm_offset_t             virtual_avail, virtual_end;
 extern pmap_paddr_t            avail_start, avail_end;
@@ -472,6 +423,7 @@ pmap_bootstrap(
        if (pmap_pcid_ncpus)
                printf("PMAP: PCID enabled\n");
 
+
        boot_args *args = (boot_args *)PE_state.bootArgs;
        if (args->efiMode == kBootArgsEfiMode32) {
                printf("EFI32: kernel virtual space limited to 4GB\n");
@@ -597,9 +549,7 @@ pmap_init(void)
                                if (pn > last_managed_page)
                                        last_managed_page = pn;
 
-                               if (pn < lowest_lo)
-                                       pmap_phys_attributes[pn] |= PHYS_NOENCRYPT;
-                               else if (pn >= lowest_hi && pn <= highest_hi)
+                               if (pn >= lowest_hi && pn <= highest_hi)
                                        pmap_phys_attributes[pn] |= PHYS_NOENCRYPT;
                        }
                }
index fc29c1a6525034ef70f35e9a7c9b2529668ed5b0..80335544f161ceb0f0192063f2dadeda632c7092 100644 (file)
 
 /* extern references */
 extern void pe_identify_machine(void * args);
+extern int
+vc_display_lzss_icon(uint32_t dst_x,       uint32_t dst_y,
+                     uint32_t image_width, uint32_t image_height,
+                     const uint8_t *compressed_image,
+                     uint32_t       compressed_size, 
+                     const uint8_t *clut);
 
 /* private globals */
 PE_state_t  PE_state;
@@ -54,6 +60,9 @@ clock_frequency_info_t gPEClockFrequencyInfo;
 void *gPEEFISystemTable;
 void *gPEEFIRuntimeServices;
 
+static boot_icon_element* norootIcon_lzss;
+static const uint8_t*     norootClut_lzss;
+
 int PE_initialize_console( PE_Video * info, int op )
 {
     static int   last_console = -1;
@@ -104,12 +113,16 @@ void PE_init_iokit(void)
     } DriversPackageProp;
 
     boolean_t bootClutInitialized = FALSE;
-    boolean_t norootInitialized = FALSE;
+    boolean_t noroot_rle_Initialized = FALSE;
+
     DTEntry             entry;
     unsigned int       size;
     uint32_t           *map;
        boot_progress_element *bootPict;
 
+    norootIcon_lzss = NULL;
+    norootClut_lzss = NULL;
+
     PE_init_kprintf(TRUE);
     PE_init_printf(TRUE);
 
@@ -120,34 +133,45 @@ void PE_init_iokit(void)
      */
 
     if( kSuccess == DTLookupEntry(NULL, "/chosen/memory-map", &entry)) {
-       if( kSuccess == DTGetProperty(entry, "BootCLUT", (void **) &map, &size)) {
-           if (sizeof(appleClut8) <= map[1]) {
-               bcopy( (void *)ml_static_ptovirt(map[0]), appleClut8, sizeof(appleClut8) );
-               bootClutInitialized = TRUE;
-           }
-       }
-
-       if( kSuccess == DTGetProperty(entry, "Pict-FailedBoot", (void **) &map, &size)) {
-           bootPict = (boot_progress_element *) ml_static_ptovirt(map[0]);
-           default_noroot.width  = bootPict->width;
-           default_noroot.height = bootPict->height;
-           default_noroot.dx     = 0;
-           default_noroot.dy     = bootPict->yOffset;
-           default_noroot_data   = &bootPict->data[0];
-            norootInitialized = TRUE;
-       }
+        if( kSuccess == DTGetProperty(entry, "BootCLUT", (void **) &map, &size)) {
+            if (sizeof(appleClut8) <= map[1]) {
+                bcopy( (void *)ml_static_ptovirt(map[0]), appleClut8, sizeof(appleClut8) );
+                bootClutInitialized = TRUE;
+            }
+        }
+
+        if( kSuccess == DTGetProperty(entry, "Pict-FailedBoot", (void **) &map, &size)) {
+            bootPict = (boot_progress_element *) ml_static_ptovirt(map[0]);
+            default_noroot.width  = bootPict->width;
+            default_noroot.height = bootPict->height;
+            default_noroot.dx     = 0;
+            default_noroot.dy     = bootPict->yOffset;
+            default_noroot_data   = &bootPict->data[0];
+            noroot_rle_Initialized = TRUE;
+        }
+
+        if( kSuccess == DTGetProperty(entry, "FailedCLUT", (void **) &map, &size)) {
+               norootClut_lzss = (uint8_t*) ml_static_ptovirt(map[0]);
+        }
+
+        if( kSuccess == DTGetProperty(entry, "FailedImage", (void **) &map, &size)) {
+            norootIcon_lzss = (boot_icon_element *) ml_static_ptovirt(map[0]);
+            if (norootClut_lzss == NULL) {
+                    printf("ERROR: No FailedCLUT provided for noroot icon!\n");
+            }
+        }
     }
 
     if (!bootClutInitialized) {
-    bcopy( (void *) (uintptr_t) bootClut, (void *) appleClut8, sizeof(appleClut8) );
+        bcopy( (void *) (uintptr_t) bootClut, (void *) appleClut8, sizeof(appleClut8) );
     }
 
-    if (!norootInitialized) {
-    default_noroot.width  = kFailedBootWidth;
-    default_noroot.height = kFailedBootHeight;
-    default_noroot.dx     = 0;
-    default_noroot.dy     = kFailedBootOffset;
-    default_noroot_data   = failedBootPict;
+    if (!noroot_rle_Initialized) {
+        default_noroot.width  = kFailedBootWidth;
+        default_noroot.height = kFailedBootHeight;
+        default_noroot.dx     = 0;
+        default_noroot.dy     = kFailedBootOffset;
+        default_noroot_data   = failedBootPict;
     }
     
     /*
@@ -214,8 +238,22 @@ int PE_current_console( PE_Video * info )
 
 void PE_display_icon( __unused unsigned int flags, __unused const char * name )
 {
-    if ( default_noroot_data )
+    if ( norootIcon_lzss && norootClut_lzss ) {
+        uint32_t width  = norootIcon_lzss->width;
+        uint32_t height = norootIcon_lzss->height;
+        uint32_t x = ((PE_state.video.v_width  - width) / 2);
+        uint32_t y = ((PE_state.video.v_height - height) / 2) + norootIcon_lzss->y_offset_from_center;
+
+        vc_display_lzss_icon(x, y, width, height,
+                             &norootIcon_lzss->data[0],
+                             norootIcon_lzss->data_size,
+                             norootClut_lzss);
+    }
+    else if ( default_noroot_data ) {
         vc_display_icon( &default_noroot, default_noroot_data );
+    } else {
+        printf("ERROR: No data found for noroot icon!\n");
+    }
 }
 
 boolean_t
index 6533908ebea67eecc2a767703c99990f4b389b00..68d5fc2eb1f631011008927d5ba0a3ba30fd1b58 100644 (file)
@@ -36,6 +36,7 @@
 #include <kern/simple_lock.h>
 #include <i386/mp.h>
 #include <machine/pal_routines.h>
+#include <i386/proc_reg.h>
 
 /* Globals */
 void (*PE_kputc)(char c);
@@ -105,10 +106,13 @@ void kprintf(const char *fmt, ...)
        boolean_t state;
 
        if (!disable_serial_output) {
-
+               boolean_t early = FALSE;
+               if (rdmsr64(MSR_IA32_GS_BASE) == 0) {
+                       early = TRUE;
+               }
                /* If PE_kputc has not yet been initialized, don't
                 * take any locks, just dump to serial */
-               if (!PE_kputc) {
+               if (!PE_kputc || early) {
                        va_start(listp, fmt);
                        _doprnt(fmt, &listp, pal_serial_putc, 16);
                        va_end(listp);
index 18e65d40642797890ace15f76fd4a1ec2f3ae512..f911e64bc352667808dc931b791e5e041af613fd 100644 (file)
@@ -96,6 +96,17 @@ typedef struct Boot_Video    Boot_Video;
 #define GRAPHICS_MODE         1
 #define FB_TEXT_MODE          2
 
+/* Struct describing an image passed in by the booter */
+struct boot_icon_element {
+    unsigned int    width;
+    unsigned int    height;
+    int             y_offset_from_center;
+    unsigned int    data_size;
+    unsigned int    __reserved1[4];
+    unsigned char   data[0];
+};
+typedef struct boot_icon_element boot_icon_element;
+
 /* Boot argument structure - passed into Mach kernel at boot time.
  * "Revision" can be incremented for compatible changes
  */
@@ -149,7 +160,10 @@ typedef struct boot_args {
     uint64_t   bootMemSize;
     uint64_t    PhysicalMemorySize;
     uint64_t    FSBFrequency;
-    uint32_t    __reserved4[734];
+    uint64_t    pciConfigSpaceBaseAddress;
+    uint32_t    pciConfigSpaceStartBusNumber;
+    uint32_t    pciConfigSpaceEndBusNumber;
+    uint32_t    __reserved4[730];
 
 } boot_args;